3 Process vp data for the bird migration flow visualization
In this chapter we’ll process the vp data (CSV file) and metadata for the bird migration flow visualization.
library(bioRad)
library(dplyr)
library(lubridate)
library(circular)
source("functions/load_settings.R")
3.1 Load settings
Load the same settings we defined and used before (see 1.3):
settings <- load_settings(settings_file, radars_metadata_file)
Found 3 radars from 1 countries in the settings.
3.2 Read data from CSV file
To allow this chapter to run without having to process all the vp data, we’ll load the data from the CSV file(s) rather than from the dataframe created in 2.
Search in the defined processed data directory for csv files with processed_
in the filename:
processed_data_dir_no_slash <- substr(processed_data_dir, 1, nchar(processed_data_dir)-1)
csv_files <- dir(processed_data_dir_no_slash, pattern = ".*processed_.*.csv", full.names = TRUE, recursive = FALSE)
csv_files
## [1] "../data/processed/example/example_processed_3_radars_20161003_20161004.csv"
Read and concatenate files:
csv_data <- lapply(csv_files, read.csv) # Call read.csv for each file path
flowviz_data <- bind_rows(csv_data)
Preview of the 1740 rows:
head(flowviz_data)
## radar_id datetime HGHT u v dens dd ff DBZH mtr
## 1 seang 2016-10-03 20:00:00 0 NA NA NA NA NA NA NA
## 2 seang 2016-10-03 20:00:00 200 NA NA NA NA NA NA NA
## 3 seang 2016-10-03 20:00:00 400 NA NA NA NA NA 3.8646252 NA
## 4 seang 2016-10-03 20:00:00 600 NA NA NA NA NA -0.8646818 NA
## 5 seang 2016-10-03 20:00:00 800 NA NA NA NA NA -2.6370287 NA
## 6 seang 2016-10-03 20:00:00 1000 NA NA NA NA NA -4.5374875 NA
## day_night date_of_sunset exclusion_reason
## 1 night 20161003 datetime
## 2 night 20161003 datetime
## 3 night 20161003 datetime
## 4 night 20161003 datetime
## 5 night 20161003 datetime
## 6 night 20161003 datetime
3.3 Add time bins
Add time bins per hour:
flowviz_data %>%
mutate(datetime_bin = floor_date(as.POSIXct(datetime, tz = "UTC"), "hour")) -> flowviz_data
3.4 Add height bins
Add two height bins: 200-2000
and above 2000
:
flowviz_data %>%
mutate(height_bin = case_when(
.$HGHT >= 200 & .$HGHT < 2000 ~ "1",
.$HGHT >= 2000 ~ "2"
)) -> flowviz_data
Preview:
head(flowviz_data, 20)
## radar_id datetime HGHT u v dens dd ff DBZH mtr
## 1 seang 2016-10-03 20:00:00 0 NA NA NA NA NA NA NA
## 2 seang 2016-10-03 20:00:00 200 NA NA NA NA NA NA NA
## 3 seang 2016-10-03 20:00:00 400 NA NA NA NA NA 3.8646252 NA
## 4 seang 2016-10-03 20:00:00 600 NA NA NA NA NA -0.8646818 NA
## 5 seang 2016-10-03 20:00:00 800 NA NA NA NA NA -2.6370287 NA
## 6 seang 2016-10-03 20:00:00 1000 NA NA NA NA NA -4.5374875 NA
## 7 seang 2016-10-03 20:00:00 1200 NA NA NA NA NA NA NA
## 8 seang 2016-10-03 20:00:00 1400 NA NA NA NA NA NA NA
## 9 seang 2016-10-03 20:00:00 1600 NA NA NA NA NA NA NA
## 10 seang 2016-10-03 20:00:00 1800 NA NA NA NA NA NA NA
## 11 seang 2016-10-03 20:00:00 2000 NA NA NA NA NA NA NA
## 12 seang 2016-10-03 20:00:00 2200 NA NA NA NA NA NA NA
## 13 seang 2016-10-03 20:00:00 2400 NA NA NA NA NA NA NA
## 14 seang 2016-10-03 20:00:00 2600 NA NA NA NA NA NA NA
## 15 seang 2016-10-03 20:00:00 2800 NA NA NA NA NA NA NA
## 16 seang 2016-10-03 20:00:00 3000 NA NA NA NA NA NA NA
## 17 seang 2016-10-03 20:00:00 3200 NA NA NA NA NA NA NA
## 18 seang 2016-10-03 20:00:00 3400 NA NA NA NA NA NA NA
## 19 seang 2016-10-03 20:00:00 3600 NA NA NA NA NA NA NA
## 20 seang 2016-10-03 20:00:00 3800 NA NA NA NA NA NA NA
## day_night date_of_sunset exclusion_reason datetime_bin
## 1 night 20161003 datetime 2016-10-03 20:00:00
## 2 night 20161003 datetime 2016-10-03 20:00:00
## 3 night 20161003 datetime 2016-10-03 20:00:00
## 4 night 20161003 datetime 2016-10-03 20:00:00
## 5 night 20161003 datetime 2016-10-03 20:00:00
## 6 night 20161003 datetime 2016-10-03 20:00:00
## 7 night 20161003 datetime 2016-10-03 20:00:00
## 8 night 20161003 datetime 2016-10-03 20:00:00
## 9 night 20161003 datetime 2016-10-03 20:00:00
## 10 night 20161003 datetime 2016-10-03 20:00:00
## 11 night 20161003 datetime 2016-10-03 20:00:00
## 12 night 20161003 datetime 2016-10-03 20:00:00
## 13 night 20161003 datetime 2016-10-03 20:00:00
## 14 night 20161003 datetime 2016-10-03 20:00:00
## 15 night 20161003 datetime 2016-10-03 20:00:00
## 16 night 20161003 datetime 2016-10-03 20:00:00
## 17 night 20161003 datetime 2016-10-03 20:00:00
## 18 night 20161003 datetime 2016-10-03 20:00:00
## 19 night 20161003 datetime 2016-10-03 20:00:00
## 20 night 20161003 datetime 2016-10-03 20:00:00
## height_bin
## 1 <NA>
## 2 1
## 3 1
## 4 1
## 5 1
## 6 1
## 7 1
## 8 1
## 9 1
## 10 1
## 11 2
## 12 2
## 13 2
## 14 2
## 15 2
## 16 2
## 17 2
## 18 2
## 19 2
## 20 2
3.5 Filter out low density data
Only keep data with a density above or equal to 10 (birds) and not marked to be excluded in 2:
flowviz_data %>%
filter(dens >= 10) %>%
filter(exclusion_reason == "") -> flowviz_data
3.6 Aggregate data
Aggregate the data by datetime and height bin, taking the mean for every value. For the direction dd
we need to calculate the circular mean, with a little helper function:
circular_mean_degrees <- function(directions) {
# If all directions are NA, mean.circular will throw an error, so we test it beforehand:
if (all(is.na(directions))) {
circular_mean = NA
} else {
circular_mean <- mean.circular(circular(directions, units = "degrees"), na.rm = TRUE)[[1]]
# Convert negative to positive degrees (e.g. -45 = 315):
circular_mean <- if (circular_mean < 0) 360 + circular_mean else circular_mean
}
return(circular_mean)
}
Aggregate data:
flowviz_data %>%
group_by(radar_id, datetime_bin, height_bin) %>%
summarize(
avg_u = mean(u, na.rm = TRUE),
avg_v = mean(v, na.rm = TRUE),
avg_dens = mean(dens, na.rm = TRUE),
avg_dd = circular_mean_degrees(dd),
avg_ff = mean(ff, na.rm = TRUE)
) %>%
ungroup() -> flowviz_data
Preview:
head(data.frame(flowviz_data))
## radar_id datetime_bin height_bin avg_u avg_v avg_dens
## 1 seang 2016-10-03 20:00:00 1 -6.028157 -8.363182 18.63652
## avg_dd avg_ff
## 1 215.778 10.33008
3.7 Filter out empty rows
Only keep data where at least one of the calculated values is not NA
:
flowviz_data %>%
filter(
!is.na(avg_u) |
!is.na(avg_v) |
!is.na(avg_dens) |
!is.na(avg_dd) |
!is.na(avg_ff)
) -> flowviz_data
Preview:
head(data.frame(flowviz_data))
## radar_id datetime_bin height_bin avg_u avg_v avg_dens
## 1 seang 2016-10-03 20:00:00 1 -6.028157 -8.363182 18.63652
## avg_dd avg_ff
## 1 215.778 10.33008
3.8 Rename and select columns
Rename and select the columns required by the flowviz:
flowviz_data %>%
rename(
interval_start_time = datetime_bin,
altitude_band = height_bin,
avg_u_speed = avg_u,
avg_v_speed = avg_v
) %>%
# Add +00 for interval_start_time:
mutate(interval_start_time = paste0(interval_start_time, "+00")) %>%
# Select columns
select(radar_id, interval_start_time, altitude_band, avg_u_speed, avg_v_speed, avg_dens) -> flowviz_data
Preview:
head(flowviz_data)
## # A tibble: 1 x 6
## radar_id interval_start_… altitude_band avg_u_speed avg_v_speed avg_dens
## <fct> <chr> <chr> <dbl> <dbl> <dbl>
## 1 seang 2016-10-03 20:0… 1 -6.03 -8.36 18.6
3.9 Export to a CSV file
Export the data to a flowviz data file:
write.csv(flowviz_data, file = paste0(processed_data_dir, project_name, "_flowviz.csv"), na = "", row.names = FALSE)
3.10 Create metadata
Not yet implemented, currently created manually (see example).