3 Process vp data for the bird migration flow visualization

In this chapter we’ll process the vp data (CSV file) and metadata for the bird migration flow visualization.

library(bioRad)
library(dplyr)
library(lubridate)
library(circular)
source("functions/load_settings.R")

3.1 Load settings

Load the same settings we defined and used before (see 1.3):

settings <- load_settings(settings_file, radars_metadata_file)

Found 3 radars from 1 countries in the settings.

3.2 Read data from CSV file

To allow this chapter to run without having to process all the vp data, we’ll load the data from the CSV file(s) rather than from the dataframe created in 2.

Search in the defined processed data directory for csv files with processed_ in the filename:

processed_data_dir_no_slash <- substr(processed_data_dir, 1, nchar(processed_data_dir)-1) 
csv_files <- dir(processed_data_dir_no_slash, pattern = ".*processed_.*.csv", full.names = TRUE, recursive = FALSE)
csv_files
## [1] "../data/processed/example/example_processed_3_radars_20161003_20161004.csv"

Read and concatenate files:

csv_data <- lapply(csv_files, read.csv) # Call read.csv for each file path
flowviz_data <- bind_rows(csv_data)

Preview of the 1740 rows:

head(flowviz_data)
##   radar_id            datetime HGHT  u  v dens dd ff       DBZH mtr
## 1    seang 2016-10-03 20:00:00    0 NA NA   NA NA NA         NA  NA
## 2    seang 2016-10-03 20:00:00  200 NA NA   NA NA NA         NA  NA
## 3    seang 2016-10-03 20:00:00  400 NA NA   NA NA NA  3.8646252  NA
## 4    seang 2016-10-03 20:00:00  600 NA NA   NA NA NA -0.8646818  NA
## 5    seang 2016-10-03 20:00:00  800 NA NA   NA NA NA -2.6370287  NA
## 6    seang 2016-10-03 20:00:00 1000 NA NA   NA NA NA -4.5374875  NA
##   day_night date_of_sunset exclusion_reason
## 1     night       20161003         datetime
## 2     night       20161003         datetime
## 3     night       20161003         datetime
## 4     night       20161003         datetime
## 5     night       20161003         datetime
## 6     night       20161003         datetime

3.3 Add time bins

Add time bins per hour:

flowviz_data %>%
  mutate(datetime_bin = floor_date(as.POSIXct(datetime, tz = "UTC"), "hour")) -> flowviz_data

3.4 Add height bins

Add two height bins: 200-2000 and above 2000:

flowviz_data %>%
mutate(height_bin = case_when(
  .$HGHT >= 200 & .$HGHT < 2000 ~ "1",
  .$HGHT >= 2000 ~ "2"
)) -> flowviz_data

Preview:

head(flowviz_data, 20)
##    radar_id            datetime HGHT  u  v dens dd ff       DBZH mtr
## 1     seang 2016-10-03 20:00:00    0 NA NA   NA NA NA         NA  NA
## 2     seang 2016-10-03 20:00:00  200 NA NA   NA NA NA         NA  NA
## 3     seang 2016-10-03 20:00:00  400 NA NA   NA NA NA  3.8646252  NA
## 4     seang 2016-10-03 20:00:00  600 NA NA   NA NA NA -0.8646818  NA
## 5     seang 2016-10-03 20:00:00  800 NA NA   NA NA NA -2.6370287  NA
## 6     seang 2016-10-03 20:00:00 1000 NA NA   NA NA NA -4.5374875  NA
## 7     seang 2016-10-03 20:00:00 1200 NA NA   NA NA NA         NA  NA
## 8     seang 2016-10-03 20:00:00 1400 NA NA   NA NA NA         NA  NA
## 9     seang 2016-10-03 20:00:00 1600 NA NA   NA NA NA         NA  NA
## 10    seang 2016-10-03 20:00:00 1800 NA NA   NA NA NA         NA  NA
## 11    seang 2016-10-03 20:00:00 2000 NA NA   NA NA NA         NA  NA
## 12    seang 2016-10-03 20:00:00 2200 NA NA   NA NA NA         NA  NA
## 13    seang 2016-10-03 20:00:00 2400 NA NA   NA NA NA         NA  NA
## 14    seang 2016-10-03 20:00:00 2600 NA NA   NA NA NA         NA  NA
## 15    seang 2016-10-03 20:00:00 2800 NA NA   NA NA NA         NA  NA
## 16    seang 2016-10-03 20:00:00 3000 NA NA   NA NA NA         NA  NA
## 17    seang 2016-10-03 20:00:00 3200 NA NA   NA NA NA         NA  NA
## 18    seang 2016-10-03 20:00:00 3400 NA NA   NA NA NA         NA  NA
## 19    seang 2016-10-03 20:00:00 3600 NA NA   NA NA NA         NA  NA
## 20    seang 2016-10-03 20:00:00 3800 NA NA   NA NA NA         NA  NA
##    day_night date_of_sunset exclusion_reason        datetime_bin
## 1      night       20161003         datetime 2016-10-03 20:00:00
## 2      night       20161003         datetime 2016-10-03 20:00:00
## 3      night       20161003         datetime 2016-10-03 20:00:00
## 4      night       20161003         datetime 2016-10-03 20:00:00
## 5      night       20161003         datetime 2016-10-03 20:00:00
## 6      night       20161003         datetime 2016-10-03 20:00:00
## 7      night       20161003         datetime 2016-10-03 20:00:00
## 8      night       20161003         datetime 2016-10-03 20:00:00
## 9      night       20161003         datetime 2016-10-03 20:00:00
## 10     night       20161003         datetime 2016-10-03 20:00:00
## 11     night       20161003         datetime 2016-10-03 20:00:00
## 12     night       20161003         datetime 2016-10-03 20:00:00
## 13     night       20161003         datetime 2016-10-03 20:00:00
## 14     night       20161003         datetime 2016-10-03 20:00:00
## 15     night       20161003         datetime 2016-10-03 20:00:00
## 16     night       20161003         datetime 2016-10-03 20:00:00
## 17     night       20161003         datetime 2016-10-03 20:00:00
## 18     night       20161003         datetime 2016-10-03 20:00:00
## 19     night       20161003         datetime 2016-10-03 20:00:00
## 20     night       20161003         datetime 2016-10-03 20:00:00
##    height_bin
## 1        <NA>
## 2           1
## 3           1
## 4           1
## 5           1
## 6           1
## 7           1
## 8           1
## 9           1
## 10          1
## 11          2
## 12          2
## 13          2
## 14          2
## 15          2
## 16          2
## 17          2
## 18          2
## 19          2
## 20          2

3.5 Filter out low density data

Only keep data with a density above or equal to 10 (birds) and not marked to be excluded in 2:

flowviz_data %>%
filter(dens >= 10) %>%
filter(exclusion_reason == "") -> flowviz_data

3.6 Aggregate data

Aggregate the data by datetime and height bin, taking the mean for every value. For the direction dd we need to calculate the circular mean, with a little helper function:

circular_mean_degrees <- function(directions) {
  # If all directions are NA, mean.circular will throw an error, so we test it beforehand:
  if (all(is.na(directions))) {
    circular_mean = NA
  } else {
    circular_mean <- mean.circular(circular(directions, units = "degrees"), na.rm = TRUE)[[1]]
    # Convert negative to positive degrees (e.g. -45 = 315):
    circular_mean <- if (circular_mean < 0) 360 + circular_mean else circular_mean
  }
  return(circular_mean)
}

Aggregate data:

flowviz_data %>%
group_by(radar_id, datetime_bin, height_bin) %>%
summarize(
  avg_u = mean(u, na.rm = TRUE),
  avg_v = mean(v, na.rm = TRUE),
  avg_dens = mean(dens, na.rm = TRUE),
  avg_dd = circular_mean_degrees(dd),
  avg_ff = mean(ff, na.rm = TRUE)
) %>%
ungroup() -> flowviz_data

Preview:

head(data.frame(flowviz_data))
##   radar_id        datetime_bin height_bin     avg_u     avg_v avg_dens
## 1    seang 2016-10-03 20:00:00          1 -6.028157 -8.363182 18.63652
##    avg_dd   avg_ff
## 1 215.778 10.33008

3.7 Filter out empty rows

Only keep data where at least one of the calculated values is not NA:

flowviz_data %>%
  filter(
    !is.na(avg_u) | 
    !is.na(avg_v) | 
    !is.na(avg_dens) | 
    !is.na(avg_dd) | 
    !is.na(avg_ff)
  ) -> flowviz_data

Preview:

head(data.frame(flowviz_data))
##   radar_id        datetime_bin height_bin     avg_u     avg_v avg_dens
## 1    seang 2016-10-03 20:00:00          1 -6.028157 -8.363182 18.63652
##    avg_dd   avg_ff
## 1 215.778 10.33008

3.8 Rename and select columns

Rename and select the columns required by the flowviz:

flowviz_data %>%
rename(
  interval_start_time = datetime_bin,
  altitude_band = height_bin,
  avg_u_speed = avg_u,
  avg_v_speed = avg_v
) %>%
  
# Add +00 for interval_start_time:
mutate(interval_start_time = paste0(interval_start_time, "+00")) %>%

# Select columns
select(radar_id, interval_start_time, altitude_band, avg_u_speed, avg_v_speed, avg_dens) -> flowviz_data

Preview:

head(flowviz_data)
## # A tibble: 1 x 6
##   radar_id interval_start_… altitude_band avg_u_speed avg_v_speed avg_dens
##   <fct>    <chr>            <chr>               <dbl>       <dbl>    <dbl>
## 1 seang    2016-10-03 20:0… 1                   -6.03       -8.36     18.6

3.9 Export to a CSV file

Export the data to a flowviz data file:

write.csv(flowviz_data, file = paste0(processed_data_dir, project_name, "_flowviz.csv"), na = "", row.names = FALSE)

3.10 Create metadata

Not yet implemented, currently created manually (see example).