library(tidyverse)
library(lubridate)
library(arrow)
library(timetk)
library(dtwclust)
library(kableExtra)
library(tictoc)
source("../functions.R")Hierarchical clustering
with scaled cases
This notebooks aims to explore hierarchical clustering of municipalities considering scaled dengue cases time series.
Packages
Load data
Load the bundled data (326 municipalities, pop \(\geq\) 100k inhab.) with standardized cases and keep only the municipality code, date and cases variables.
tdengue <- open_dataset(sources = data_dir("bundled_data/tdengue.parquet")) %>%
select(mun, date, cases) %>%
collect()Prepare data
tdengue <- tdengue %>%
mutate(mun = paste0("m_", mun)) %>%
arrange(mun, date) %>%
pivot_wider(names_from = mun, values_from = cases) %>%
select(-date) %>%
t() %>%
tslist()Clustering
tic()
clust <- tsclust(
series = tdengue,
type = "hierarchical",
k = 5,
distance = "dtw_basic",
seed = 123,
control = hierarchical_control(method = "average")
)
toc()44.964 sec elapsed
ggdendro::ggdendrogram(clust, size = .3, labels = F)
Session info
sessionInfo()R version 4.3.2 (2023-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 22.04.3 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
Random number generation:
RNG: L'Ecuyer-CMRG
Normal: Inversion
Sample: Rejection
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_CA.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_CA.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_CA.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_CA.UTF-8 LC_IDENTIFICATION=C
time zone: Europe/Paris
tzcode source: system (glibc)
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] tictoc_1.2 kableExtra_1.3.4 dtwclust_5.5.12 dtw_1.23-1
[5] proxy_0.4-27 timetk_2.9.0 arrow_13.0.0.1 lubridate_1.9.3
[9] forcats_1.0.0 stringr_1.5.0 dplyr_1.1.3 purrr_1.0.2
[13] readr_2.1.4 tidyr_1.3.0 tibble_3.2.1 ggplot2_3.4.4
[17] tidyverse_2.0.0
loaded via a namespace (and not attached):
[1] rlang_1.1.2 magrittr_2.0.3 clue_0.3-65
[4] furrr_0.3.1 flexclust_1.4-1 compiler_4.3.2
[7] systemfonts_1.0.5 vctrs_0.6.4 reshape2_1.4.4
[10] rvest_1.0.3 lhs_1.1.6 tune_1.1.2
[13] pkgconfig_2.0.3 fastmap_1.1.1 ellipsis_0.3.2
[16] labeling_0.4.3 utf8_1.2.4 promises_1.2.1
[19] rmarkdown_2.25 prodlim_2023.08.28 tzdb_0.4.0
[22] bit_4.0.5 xfun_0.41 modeltools_0.2-23
[25] jsonlite_1.8.7 recipes_1.0.8 later_1.3.1
[28] parallel_4.3.2 cluster_2.1.4 R6_2.5.1
[31] stringi_1.7.12 rsample_1.2.0 parallelly_1.36.0
[34] rpart_4.1.21 Rcpp_1.0.11 assertthat_0.2.1
[37] dials_1.2.0 iterators_1.0.14 knitr_1.45
[40] future.apply_1.11.0 zoo_1.8-12 httpuv_1.6.12
[43] Matrix_1.6-1.1 splines_4.3.2 nnet_7.3-19
[46] timechange_0.2.0 tidyselect_1.2.0 rstudioapi_0.15.0
[49] yaml_2.3.7 timeDate_4022.108 codetools_0.2-19
[52] listenv_0.9.0 lattice_0.22-5 plyr_1.8.9
[55] shiny_1.7.5.1 withr_2.5.2 evaluate_0.23
[58] future_1.33.0 survival_3.5-7 RcppParallel_5.1.7
[61] xml2_1.3.5 xts_0.13.1 pillar_1.9.0
[64] foreach_1.5.2 stats4_4.3.2 shinyjs_2.1.0
[67] generics_0.1.3 hms_1.1.3 munsell_0.5.0
[70] scales_1.2.1 xtable_1.8-4 globals_0.16.2
[73] class_7.3-22 glue_1.6.2 tools_4.3.2
[76] data.table_1.14.8 RSpectra_0.16-1 webshot_0.5.5
[79] gower_1.0.1 grid_4.3.2 yardstick_1.2.0
[82] ipred_0.9-14 colorspace_2.1-0 cli_3.6.1
[85] DiceDesign_1.9 workflows_1.1.3 parsnip_1.1.1
[88] fansi_1.0.5 viridisLite_0.4.2 ggdendro_0.1.23
[91] svglite_2.1.2 lava_1.7.3 gtable_0.3.4
[94] GPfit_1.0-8 digest_0.6.33 ggrepel_0.9.4
[97] farver_2.1.1 htmlwidgets_1.6.2 htmltools_0.5.7
[100] lifecycle_1.0.4 httr_1.4.7 hardhat_1.3.0
[103] mime_0.12 bit64_4.0.5 MASS_7.3-60