library(tidyverse)
library(lubridate)
library(arrow)
library(timetk)
library(dtwclust)
library(kableExtra)
library(tictoc)
source("../functions.R")
Hierarchical clustering
with scaled cases
This notebooks aims to explore hierarchical clustering of municipalities considering scaled dengue cases time series.
Packages
Load data
Load the bundled data (326 municipalities, pop \(\geq\) 100k inhab.) with standardized cases and keep only the municipality code, date and cases variables.
<- open_dataset(sources = data_dir("bundled_data/tdengue.parquet")) %>%
tdengue select(mun, date, cases) %>%
collect()
Prepare data
<- tdengue %>%
tdengue mutate(mun = paste0("m_", mun)) %>%
arrange(mun, date) %>%
pivot_wider(names_from = mun, values_from = cases) %>%
select(-date) %>%
t() %>%
tslist()
Clustering
tic()
<- tsclust(
clust series = tdengue,
type = "hierarchical",
k = 5,
distance = "dtw_basic",
seed = 123,
control = hierarchical_control(method = "average")
)toc()
44.964 sec elapsed
::ggdendrogram(clust, size = .3, labels = F) ggdendro
Session info
sessionInfo()
R version 4.3.2 (2023-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 22.04.3 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
Random number generation:
RNG: L'Ecuyer-CMRG
Normal: Inversion
Sample: Rejection
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_CA.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_CA.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_CA.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_CA.UTF-8 LC_IDENTIFICATION=C
time zone: Europe/Paris
tzcode source: system (glibc)
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] tictoc_1.2 kableExtra_1.3.4 dtwclust_5.5.12 dtw_1.23-1
[5] proxy_0.4-27 timetk_2.9.0 arrow_13.0.0.1 lubridate_1.9.3
[9] forcats_1.0.0 stringr_1.5.0 dplyr_1.1.3 purrr_1.0.2
[13] readr_2.1.4 tidyr_1.3.0 tibble_3.2.1 ggplot2_3.4.4
[17] tidyverse_2.0.0
loaded via a namespace (and not attached):
[1] rlang_1.1.2 magrittr_2.0.3 clue_0.3-65
[4] furrr_0.3.1 flexclust_1.4-1 compiler_4.3.2
[7] systemfonts_1.0.5 vctrs_0.6.4 reshape2_1.4.4
[10] rvest_1.0.3 lhs_1.1.6 tune_1.1.2
[13] pkgconfig_2.0.3 fastmap_1.1.1 ellipsis_0.3.2
[16] labeling_0.4.3 utf8_1.2.4 promises_1.2.1
[19] rmarkdown_2.25 prodlim_2023.08.28 tzdb_0.4.0
[22] bit_4.0.5 xfun_0.41 modeltools_0.2-23
[25] jsonlite_1.8.7 recipes_1.0.8 later_1.3.1
[28] parallel_4.3.2 cluster_2.1.4 R6_2.5.1
[31] stringi_1.7.12 rsample_1.2.0 parallelly_1.36.0
[34] rpart_4.1.21 Rcpp_1.0.11 assertthat_0.2.1
[37] dials_1.2.0 iterators_1.0.14 knitr_1.45
[40] future.apply_1.11.0 zoo_1.8-12 httpuv_1.6.12
[43] Matrix_1.6-1.1 splines_4.3.2 nnet_7.3-19
[46] timechange_0.2.0 tidyselect_1.2.0 rstudioapi_0.15.0
[49] yaml_2.3.7 timeDate_4022.108 codetools_0.2-19
[52] listenv_0.9.0 lattice_0.22-5 plyr_1.8.9
[55] shiny_1.7.5.1 withr_2.5.2 evaluate_0.23
[58] future_1.33.0 survival_3.5-7 RcppParallel_5.1.7
[61] xml2_1.3.5 xts_0.13.1 pillar_1.9.0
[64] foreach_1.5.2 stats4_4.3.2 shinyjs_2.1.0
[67] generics_0.1.3 hms_1.1.3 munsell_0.5.0
[70] scales_1.2.1 xtable_1.8-4 globals_0.16.2
[73] class_7.3-22 glue_1.6.2 tools_4.3.2
[76] data.table_1.14.8 RSpectra_0.16-1 webshot_0.5.5
[79] gower_1.0.1 grid_4.3.2 yardstick_1.2.0
[82] ipred_0.9-14 colorspace_2.1-0 cli_3.6.1
[85] DiceDesign_1.9 workflows_1.1.3 parsnip_1.1.1
[88] fansi_1.0.5 viridisLite_0.4.2 ggdendro_0.1.23
[91] svglite_2.1.2 lava_1.7.3 gtable_0.3.4
[94] GPfit_1.0-8 digest_0.6.33 ggrepel_0.9.4
[97] farver_2.1.1 htmlwidgets_1.6.2 htmltools_0.5.7
[100] lifecycle_1.0.4 httr_1.4.7 hardhat_1.3.0
[103] mime_0.12 bit64_4.0.5 MASS_7.3-60