From c00d1f6f2c888189d8636b309410651de8666f0e Mon Sep 17 00:00:00 2001 From: cmdoret Date: Fri, 28 Jul 2023 17:04:19 +0200 Subject: [PATCH 1/6] feat(A16): add pipeline --- scripts/A16.R | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 scripts/A16.R diff --git a/scripts/A16.R b/scripts/A16.R new file mode 100644 index 00000000..33a2f655 --- /dev/null +++ b/scripts/A16.R @@ -0,0 +1,74 @@ +# create ds object -------------------------------------------------------- + +ds <- create_dataset(id = "A16") + + +# download the data ------------------------------------------------------- + +ds <- download_data(ds) + +# data cleaning ----------------------------------------------------------- + +### Filter specific dimensions + +# To reduce the dataset size, we will not include sex and citizenship category +ds$data %>% + janitor::clean_names() %>% + dplyr::filter( + sex == "Sex - total" & + citizenship_category == "Citizenship (category) - total" + ) %>% + dplyr::select(-sex, -citizenship_category) -> ds$data + +# Pivot indicators into 1 indicator per column and cleanup names +ds$data %>% + tidyr::pivot_wider( + names_from = demographic_component, + values_from = demographic_balance_by_canton + ) %>% + janitor::clean_names() %>% + dplyr::rename( + "total_population" = population_on_1_january, + "births" = live_birth, + "deaths" = death + ) -> ds$data + +# Remove redundant or constant columns +# + acquisition of swiss citizenship is always 0 +# + The 'change of population type' component is +# always included in the demographic components of +# 'immigration' and 'net migration'. +ds$data %>% + dplyr::select( + -change_of_population_type, + -population_on_31_december, + -natural_change, + -acquisition_of_swiss_citizenship + ) -> ds$data + +# Remove rows with no canton and 0 values +# Excluding years 1971 - 1980 where there is only +# information about net migration +ds$data %>% + dplyr::filter(year >= 1981) %>% + dplyr::filter(canton != "No indication") -> ds$data + +# join the cleaned data to the postgres spatial units table --------------- + +spatial_map <- ds$data %>% + dplyr::select(canton) %>% + dplyr::distinct(canton) %>% + map_ds_spatial_units() + +ds$data %>% + dplyr::left_join(spatial_map, by = "canton") %>% + dplyr::select(-canton) -> ds$data + +## check that each spatial unit could be matched -> this has to be TRUE + +assertthat::noNA(ds$data$spatialunit_uid) + + +# ingest into postgres ---------------------------------------------------- + +### important: name the table as energiebilanz_schweiz_in_tera_joule From b99eec4fba8fc2f318314d6ebdb6aca3543091fe Mon Sep 17 00:00:00 2001 From: cmdoret Date: Wed, 2 Aug 2023 17:01:16 +0200 Subject: [PATCH 2/6] refactor(A16): rename net migration col --- scripts/A16.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/A16.R b/scripts/A16.R index 33a2f655..ff30e97c 100644 --- a/scripts/A16.R +++ b/scripts/A16.R @@ -30,7 +30,8 @@ ds$data %>% dplyr::rename( "total_population" = population_on_1_january, "births" = live_birth, - "deaths" = death + "deaths" = death, + "net_migration" = net_migration_incl_change_of_population_type ) -> ds$data # Remove redundant or constant columns @@ -71,4 +72,4 @@ assertthat::noNA(ds$data$spatialunit_uid) # ingest into postgres ---------------------------------------------------- -### important: name the table as energiebilanz_schweiz_in_tera_joule +### important: name the table as demographic_balance_by_canton From 0f14d4c65dacbd6385bbc35dc90cc54bbd87e809 Mon Sep 17 00:00:00 2001 From: cmdoret Date: Wed, 2 Aug 2023 17:23:33 +0200 Subject: [PATCH 3/6] fix(A16): simplify column name --- scripts/A16.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/A16.R b/scripts/A16.R index ff30e97c..725e5420 100644 --- a/scripts/A16.R +++ b/scripts/A16.R @@ -31,7 +31,8 @@ ds$data %>% "total_population" = population_on_1_january, "births" = live_birth, "deaths" = death, - "net_migration" = net_migration_incl_change_of_population_type + "net_migration" = net_migration_incl_change_of_population_type, + "immigration" = immigration_incl_change_of_population_type ) -> ds$data # Remove redundant or constant columns From 71f718d515fc4fea7dc50f63bfff2584b64127c9 Mon Sep 17 00:00:00 2001 From: cmdoret Date: Mon, 7 Aug 2023 15:35:17 +0200 Subject: [PATCH 4/6] refactor(A16): move to pipelines dir --- {scripts => pipelines/A16}/A16.R | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {scripts => pipelines/A16}/A16.R (100%) diff --git a/scripts/A16.R b/pipelines/A16/A16.R similarity index 100% rename from scripts/A16.R rename to pipelines/A16/A16.R From dae425a6a939c48ab7b6254c65b9f58d28e8918b Mon Sep 17 00:00:00 2001 From: cmdoret Date: Mon, 7 Aug 2023 16:43:19 +0200 Subject: [PATCH 5/6] refactor(A16): use postgres_export attribute --- pipelines/A16/A16.R | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/pipelines/A16/A16.R b/pipelines/A16/A16.R index 725e5420..a63d7e5c 100644 --- a/pipelines/A16/A16.R +++ b/pipelines/A16/A16.R @@ -15,13 +15,12 @@ ds <- download_data(ds) ds$data %>% janitor::clean_names() %>% dplyr::filter( - sex == "Sex - total" & - citizenship_category == "Citizenship (category) - total" + sex == "Sex - total" ) %>% - dplyr::select(-sex, -citizenship_category) -> ds$data + dplyr::select(-sex, -citizenship_category) -> ds$postgres_export # Pivot indicators into 1 indicator per column and cleanup names -ds$data %>% +ds$postgres_export %>% tidyr::pivot_wider( names_from = demographic_component, values_from = demographic_balance_by_canton @@ -33,38 +32,37 @@ ds$data %>% "deaths" = death, "net_migration" = net_migration_incl_change_of_population_type, "immigration" = immigration_incl_change_of_population_type - ) -> ds$data + ) -> ds$postgres_export # Remove redundant or constant columns # + acquisition of swiss citizenship is always 0 # + The 'change of population type' component is # always included in the demographic components of # 'immigration' and 'net migration'. -ds$data %>% +ds$postgres_export %>% dplyr::select( -change_of_population_type, -population_on_31_december, -natural_change, - -acquisition_of_swiss_citizenship - ) -> ds$data + ) -> ds$postgres_export # Remove rows with no canton and 0 values # Excluding years 1971 - 1980 where there is only # information about net migration -ds$data %>% +ds$postgres_export %>% dplyr::filter(year >= 1981) %>% - dplyr::filter(canton != "No indication") -> ds$data + dplyr::filter(canton != "No indication") -> ds$postgres_export # join the cleaned data to the postgres spatial units table --------------- -spatial_map <- ds$data %>% +spatial_map <- ds$postgres_export %>% dplyr::select(canton) %>% dplyr::distinct(canton) %>% map_ds_spatial_units() -ds$data %>% +ds$postgres_export %<>% dplyr::left_join(spatial_map, by = "canton") %>% - dplyr::select(-canton) -> ds$data + dplyr::select(-canton) ## check that each spatial unit could be matched -> this has to be TRUE From 84d6a34351d5044ed71cc79d603b83509081fa5c Mon Sep 17 00:00:00 2001 From: cmdoret Date: Mon, 7 Aug 2023 17:20:41 +0200 Subject: [PATCH 6/6] feat(A16): add acquisition_of_swiss_citizenship and retain citizenship_category. Use assignment pipe --- pipelines/A16/A16.R | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pipelines/A16/A16.R b/pipelines/A16/A16.R index a63d7e5c..43ab146b 100644 --- a/pipelines/A16/A16.R +++ b/pipelines/A16/A16.R @@ -1,3 +1,4 @@ +library(magrittr) # create ds object -------------------------------------------------------- ds <- create_dataset(id = "A16") @@ -12,15 +13,15 @@ ds <- download_data(ds) ### Filter specific dimensions # To reduce the dataset size, we will not include sex and citizenship category -ds$data %>% +ds$postgres_export <- ds$data %>% janitor::clean_names() %>% dplyr::filter( sex == "Sex - total" ) %>% - dplyr::select(-sex, -citizenship_category) -> ds$postgres_export + dplyr::select(-sex) # Pivot indicators into 1 indicator per column and cleanup names -ds$postgres_export %>% +ds$postgres_export %<>% tidyr::pivot_wider( names_from = demographic_component, values_from = demographic_balance_by_canton @@ -32,26 +33,25 @@ ds$postgres_export %>% "deaths" = death, "net_migration" = net_migration_incl_change_of_population_type, "immigration" = immigration_incl_change_of_population_type - ) -> ds$postgres_export - + ) # Remove redundant or constant columns # + acquisition of swiss citizenship is always 0 # + The 'change of population type' component is # always included in the demographic components of # 'immigration' and 'net migration'. -ds$postgres_export %>% +ds$postgres_export %<>% dplyr::select( -change_of_population_type, -population_on_31_december, -natural_change, - ) -> ds$postgres_export + ) # Remove rows with no canton and 0 values # Excluding years 1971 - 1980 where there is only # information about net migration -ds$postgres_export %>% +ds$postgres_export %<>% dplyr::filter(year >= 1981) %>% - dplyr::filter(canton != "No indication") -> ds$postgres_export + dplyr::filter(canton != "No indication") # join the cleaned data to the postgres spatial units table --------------- @@ -66,7 +66,7 @@ ds$postgres_export %<>% ## check that each spatial unit could be matched -> this has to be TRUE -assertthat::noNA(ds$data$spatialunit_uid) +assertthat::noNA(ds$postgres_export$spatialunit_uid) # ingest into postgres ----------------------------------------------------