library(tidyverse)
# remotes::install_github("rfsaldanha/microdatasus")
library(microdatasus)
library(arrow)
source("../functions.R")
Raw files, chikungunya
This notebook presents the process of downloading SINAN Dengue raw data files from DataSUS, pre-processing steps using the {microdatasus}
package and exporting the raw files to parquet
files.
Packages
Execution node
node_name()
[1] "rfsaldanha"
Data download
# for(a in 2016:2021){
# tmp <- fetch_datasus(
# year_start = a, year_end = a,
# information_system = "SINAN-ZIKA"
# )
#
# write_parquet(x = tmp, sink = paste0(data_dir("zika_data/parquets_raw/raw_zika_"),a, ".parquet"))
#
# tmp <- process_sinan_zika(data = tmp)
#
# write_parquet(x = tmp, sink = paste0(data_dir("zika_data/parquets/zika_"), a, ".parquet"))
#
# rm(tmp)
# }
Warning
This code chunk is commented to avoid unnecessary download again the raw DBC files from DataSUS.
Overview
<- open_dataset(sources = data_dir("dengue_data/parquets")) dengue
%>% glimpse() dengue
FileSystemDataset with 15 Parquet files
19,180,457 rows x 75 columns
$ TP_NOT <string> "Individual", "Individual", "Individual", "Individual", …
$ ID_AGRAVO <string> "A90", "A90", "A90", "A90", "A90", "A90", "A90", "A90", …
$ DT_NOTIFIC <string> "2007-02-12", "2007-02-12", "2007-02-26", "2007-02-12", …
$ SEM_NOT <string> "200707", "200707", "200709", "200707", "200706", "20070…
$ NU_ANO <string> "2007", "2007", "2007", "2007", "2007", "2007", "2007", …
$ SG_UF_NOT <string> "Maranhão", "Maranhão", "Pernambuco", "Bahia", "Bahia", …
$ ID_MUNICIP <string> "210780", "210780", "260640", "292150", "292150", "29215…
$ ID_REGIONA <string> "1577", "1577", "1499", "1410", "1410", "1410", "1410", …
$ ID_UNIDADE <string> "2454793", "2454793", "2436027", "3340392", "3340392", "…
$ DT_SIN_PRI <string> "2007-02-09", "2007-02-10", "2007-02-21", "2007-01-28", …
$ SEM_PRI <string> "200706", "200706", "200708", "200705", "200703", "20070…
$ CS_SEXO <string> "Feminino", "Feminino", "Masculino", "Feminino", "Mascul…
$ CS_GESTANT <string> "Ignorado", "Ignorado", "Não se aplica", "Não se aplica"…
$ CS_RACA <string> "Parda", "Parda", "Branca", "Branca", "Parda", "Parda", …
$ CS_ESCOL_N <string> "5a à 8a série incompleta do EF (antigo ginásio ou 1o gr…
$ SG_UF <string> "Maranhão", "Maranhão", "Pernambuco", "Bahia", "Bahia", …
$ ID_MN_RESI <string> "210780", "210780", "260640", "292150", "292150", "29215…
$ ID_RG_RESI <string> "1577", "1577", "1499", "1410", "1410", "1410", "1410", …
$ ID_PAIS <string> "BRASIL", "BRASIL", "BRASIL", "BRASIL", "BRASIL", "BRASI…
$ NDUPLIC_N <string> "Não identificado", "Não identificado", NA, NA, NA, NA, …
$ DT_DIGITA <string> NA, NA, "2007-03-08", "2007-05-22", "2007-05-02", "2007-…
$ CS_FLXRET <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ FLXRECEBI <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ MIGRADO_W <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ DT_INVEST <string> "2007-02-12", "2007-02-12", "2007-02-26", "2007-02-12", …
$ ID_OCUPA_N <string> "Nao informada", "Nao informada", NA, "Nao informada", "…
$ DT_SORO <string> NA, NA, "2007-02-26", "2007-02-01", "2007-02-09", NA, NA…
$ RESUL_SORO <string> "Não realizado", "Não realizado", "Não reagente", "Reage…
$ HISTOPA_N <string> "Não realizado", "Não realizado", "Não realizado", "Não …
$ DT_VIRAL <string> NA, NA, NA, NA, NA, NA, NA, NA, "2007-02-14", NA, NA, NA…
$ RESUL_VI_N <string> "Não realizado", "Não realizado", NA, "Não realizado", "…
$ SOROTIPO <string> NA, NA, NA, NA, NA, NA, NA, NA, "DEN 3", NA, NA, NA, NA,…
$ IMUNOH_N <string> "Não realizado", "Não realizado", "Não realizado", "Não …
$ DT_PCR <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ RESUL_PCR_ <string> "Não realizado", "Não realizado", NA, "Não realizado", "…
$ CLASSI_FIN <string> "Dengue clássico", "Dengue clássico", "Descartado", "Den…
$ CRITERIO <string> "Clínico epidemiológico", "Clínico epidemiológico", "Clí…
$ TPAUTOCTO <string> "Sim", "Sim", NA, "Não", "Sim", NA, NA, "Não", "Sim", NA…
$ COUFINF <string> "Maranhão", "Maranhão", NA, "Bahia", "Bahia", NA, NA, "B…
$ COPAISINF <string> "BRASIL", "BRASIL", NA, "BRASIL", "BRASIL", NA, NA, "BRA…
$ COMUNINF <string> "210780", "210780", NA, "292150", "292150", NA, NA, "292…
$ DOENCA_TRA <string> "Não", "Não", NA, "Não", "Não", NA, NA, "Não", "Não", NA…
$ EVOLUCAO <string> "Cura", "Cura", NA, "Cura", "Cura", NA, NA, "Cura", "Cur…
$ DT_OBITO <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ DT_ENCERRA <string> "2007-05-08", "2007-05-08", "2007-04-19", "2007-05-28", …
$ MANI_HEMOR <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ EPISTAXE <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ GENGIVO <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ METRO <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PETEQUIAS <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ HEMATURA <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ SANGRAM <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ LACO_N <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PLASMATICO <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ EVIDENCIA <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PLAQ_MENOR <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ CON_FHD <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ COMPLICA <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ HOSPITALIZ <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ DT_INTERNA <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ UF <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ MUNICIPIO <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ IDADEminutos <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ IDADEhoras <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ IDADEdias <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ IDADEmeses <string> NA, NA, NA, "0", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ IDADEanos <string> "3", "1", "3", NA, "0", "1", "3", "0", "3", "0", "1", "2…
$ munResStatus <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ munResTipo <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ munResNome <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ munResUf <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ munResLat <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ munResLon <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ munResAlt <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ munResArea <string> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
Call `print()` for full schema details
Session info
sessionInfo()
R version 4.3.2 (2023-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 22.04.3 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_CA.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_CA.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_CA.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_CA.UTF-8 LC_IDENTIFICATION=C
time zone: Europe/Paris
tzcode source: system (glibc)
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] arrow_14.0.0.2 microdatasus_2.2.4 lubridate_1.9.3 forcats_1.0.0
[5] stringr_1.5.1 dplyr_1.1.4 purrr_1.0.2 readr_2.1.5
[9] tidyr_1.3.1 tibble_3.2.1 ggplot2_3.4.4 tidyverse_2.0.0
loaded via a namespace (and not attached):
[1] bit_4.0.5 gtable_0.3.4 jsonlite_1.8.8 compiler_4.3.2
[5] tidyselect_1.2.0 assertthat_0.2.1 scales_1.3.0 yaml_2.3.8
[9] fastmap_1.1.1 R6_2.5.1 generics_0.1.3 knitr_1.45
[13] htmlwidgets_1.6.4 munsell_0.5.0 pillar_1.9.0 tzdb_0.4.0
[17] rlang_1.1.3 utf8_1.2.4 stringi_1.8.3 xfun_0.41
[21] bit64_4.0.5 timechange_0.3.0 cli_3.6.2 withr_3.0.0
[25] magrittr_2.0.3 digest_0.6.34 grid_4.3.2 rstudioapi_0.15.0
[29] hms_1.1.3 lifecycle_1.0.4 vctrs_0.6.5 data.table_1.15.0
[33] evaluate_0.23 glue_1.7.0 fansi_1.0.6 colorspace_2.1-0
[37] rmarkdown_2.25 tools_4.3.2 pkgconfig_2.0.3 htmltools_0.5.7