library(tidyverse)
library(lubridate)
library(arrow)
library(timetk)
library(brpop)
library(piggyback)Exported data
Packages
Dengue data
Total number of confirmed dengue cases, aggregated per municipality of residence and week of the first symptom’s onset.
dengue <- read_parquet("../dengue-data/parquet_aggregated/dengue_md.parquet") %>%
group_by(mun) %>%
summarise_by_time(.date_var = date, .by = "week", freq = sum(freq, na.rm = TRUE)) %>%
ungroup() %>%
rename(cases = freq)Population
Estimated municipality population per year.
pop <- mun_pop_totals() %>%
filter(year %in% seq(year(min(dengue$date)), year(max(dengue$date)))) %>%
mutate(mun = as.character(mun))Human Development Index
hdi <- read_parquet("../socioeconomic-data/hdi.parquet") %>%
select(code_muni, hdi2010 = idhm2010)Weather data
Weather indicators estimated by using zonal statistics of the territorial area of the municipality.
Precipitation
Total estimated precipitation, per municipality and week, in millimeter.
prec <- open_dataset(sources = "../weather-data/parquet/brdwgd/pr.parquet") %>%
filter(name == "pr_sum") %>%
select(date, value) %>%
collect() %>%
filter(date >= min(dengue$date) & date <= max(dengue$date)) %>%
summarise_by_time(.date_var = date, .by = "week", value = sum(value, na.rm = TRUE)) %>%
rename(prec = value)Average maximun temperature
Average of maximum temperatures, per municipality and week, in celsius.
tmax <- open_dataset(sources = "../weather-data/parquet/brdwgd/tmax.parquet") %>%
filter(name == "Tmax_mean") %>%
select(date, value) %>%
collect() %>%
filter(date >= min(dengue$date) & date <= max(dengue$date)) %>%
summarise_by_time(.date_var = date, .by = "week", value = mean(value, na.rm = TRUE)) %>%
rename(tmax = value)Average minimum temperature
Average of minimum temperatures, per municipality and week, in celsius.
tmin <- open_dataset(sources = "../weather-data/parquet/brdwgd/tmin.parquet") %>%
filter(name == "Tmin_mean") %>%
select(date, value) %>%
collect() %>%
filter(date >= min(dengue$date) & date <= max(dengue$date)) %>%
summarise_by_time(.date_var = date, .by = "week", value = mean(value, na.rm = TRUE)) %>%
rename(tmin = value)Join data
exp_data <- dengue %>%
mutate(dengue_year = year(date)) %>%
inner_join(pop, by = c("dengue_year" = "year", "mun")) %>%
select(-dengue_year) %>%
inner_join(hdi, by = c("mun" = "code_muni")) %>%
inner_join(prec, by = "date") %>%
inner_join(tmax, by = "date") %>%
inner_join(tmin, by = "date")Lag weather variables
Lag one and two weeks.
exp_data <- exp_data %>%
tk_augment_lags(.value = c(prec, tmax, tmin), .lags = 1:2)Overview
glimpse(exp_data)Rows: 2,202,058
Columns: 14
$ mun <chr> "110001", "110001", "110001", "110001", "110001", "110001", …
$ date <date> 2011-03-13, 2011-03-20, 2011-03-27, 2011-04-03, 2011-04-10,…
$ cases <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ pop <int> 24737, 24737, 24737, 24737, 24737, 24737, 24737, 24737, 2473…
$ hdi2010 <dbl> 0.641, 0.641, 0.641, 0.641, 0.641, 0.641, 0.641, 0.641, 0.64…
$ prec <dbl> 5977667, 5832714, 6581013, 6411878, 5538108, 4712170, 412114…
$ tmax <dbl> 29.89511, 29.59591, 29.72916, 29.27960, 29.05301, 29.32273, …
$ tmin <dbl> 20.19973, 20.17791, 20.34251, 19.15049, 19.11662, 19.37211, …
$ prec_lag1 <dbl> NA, 5977667, 5832714, 6581013, 6411878, 5538108, 4712170, 41…
$ tmax_lag1 <dbl> NA, 29.89511, 29.59591, 29.72916, 29.27960, 29.05301, 29.322…
$ tmin_lag1 <dbl> NA, 20.19973, 20.17791, 20.34251, 19.15049, 19.11662, 19.372…
$ prec_lag2 <dbl> NA, NA, 5977667, 5832714, 6581013, 6411878, 5538108, 4712170…
$ tmax_lag2 <dbl> NA, NA, 29.89511, 29.59591, 29.72916, 29.27960, 29.05301, 29…
$ tmin_lag2 <dbl> NA, NA, 20.19973, 20.17791, 20.34251, 19.15049, 19.11662, 19…
Municipalities count: 5167
Export data
exp_data %>% write_parquet(sink = "exp_data.parquet")exp_data %>% write_csv(file = "exp_data.csv")
zip::zip(zipfile = "exp_data.csv.zip", files = "exp_data.csv")
unlink(x = "exp_data.csv")Data dictionary
muncharacter. Municipality code with 6 digitsdatedate. Date on format YYYY-MM-DD of the ceiling data of the weekcasesinteger. Confirmed dengue cases countpopinteger. Population estimation of the municipalityhdi2010double. Human Development Index for 2010precdouble. Total precipitation, mmtmaxdouble. Average maximum temperature, Celsiustmindouble. Average minimum temperature, Celsius*_lag1double. One week lagged variables*_lag2double. Two weeks lagged variables
Include on current GitHub release
# Files list to upload
files_list <- c("exp_data.parquet", "exp_data.csv.zip")
# Upload files
for(i in files_list){
pb_upload(file = i, repo = "rfsaldanha/dengue", overwrite = TRUE)
}Files exp_data.parquet and exp_data.csv.zip available on current release: https://github.com/rfsaldanha/dengue/releases
Session info
sessionInfo()R version 4.1.2 (2021-11-01)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 22.04.2 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
locale:
[1] LC_CTYPE=pt_BR.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] piggyback_0.1.4.9006 brpop_0.3.0 timetk_2.8.3
[4] arrow_12.0.1 lubridate_1.9.2 forcats_1.0.0
[7] stringr_1.5.0 dplyr_1.1.2 purrr_1.0.1
[10] readr_2.1.4 tidyr_1.3.0 tibble_3.2.1
[13] ggplot2_3.4.2 tidyverse_2.0.0
loaded via a namespace (and not attached):
[1] xts_0.13.1 bit64_4.0.5 httr_1.4.6
[4] DiceDesign_1.9 gh_1.4.0 tools_4.1.2
[7] utf8_1.2.3 R6_2.5.1 rpart_4.1.16
[10] colorspace_2.1-0 yardstick_1.2.0 nnet_7.3-17
[13] withr_2.5.0 tidyselect_1.2.0 curl_5.0.1
[16] bit_4.0.5 compiler_4.1.2 httr2_0.2.3
[19] cli_3.6.1 scales_1.2.1 tune_1.1.1
[22] rappdirs_0.3.3 digest_0.6.31 rmarkdown_2.22
[25] pkgconfig_2.0.3 htmltools_0.5.5 parallelly_1.36.0
[28] lhs_1.1.6 fastmap_1.1.1 htmlwidgets_1.6.2
[31] rlang_1.1.1 rstudioapi_0.14 generics_0.1.3
[34] zoo_1.8-12 jsonlite_1.8.5 vroom_1.6.3
[37] zip_2.3.0 magrittr_2.0.3 Matrix_1.5-4.1
[40] Rcpp_1.0.10 munsell_0.5.0 fansi_1.0.4
[43] GPfit_1.0-8 lifecycle_1.0.3 furrr_0.3.1
[46] stringi_1.7.12 yaml_2.3.7 MASS_7.3-55
[49] recipes_1.0.6 grid_4.1.2 parallel_4.1.2
[52] listenv_0.9.0 crayon_1.5.2 lattice_0.20-45
[55] splines_4.1.2 hms_1.1.3 knitr_1.43
[58] pillar_1.9.0 dials_1.2.0 future.apply_1.11.0
[61] codetools_0.2-18 parsnip_1.1.0 glue_1.6.2
[64] evaluate_0.21 rsample_1.1.1 data.table_1.14.8
[67] vctrs_0.6.3 tzdb_0.4.0 foreach_1.5.2
[70] gtable_0.3.3 future_1.32.0 assertthat_0.2.1
[73] cachem_1.0.8 xfun_0.39 gower_1.0.1
[76] mime_0.12 prodlim_2023.03.31 gitcreds_0.1.2
[79] class_7.3-20 survival_3.2-13 timeDate_4022.108
[82] iterators_1.0.14 memoise_2.0.1 hardhat_1.3.0
[85] lava_1.7.2.1 workflows_1.1.3 timechange_0.2.0
[88] globals_0.16.2 ipred_0.9-14