Skip to content

Commit

Permalink
feat: gains nace_classification version 2.1 (#348)
Browse files Browse the repository at this point in the history
  • Loading branch information
jdhoffa authored Mar 12, 2024
1 parent be3f02e commit 964b799
Show file tree
Hide file tree
Showing 24 changed files with 1,715 additions and 1,045 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# r2dii.data (development version)

* `nace_classification` updated to version 2.1 (#234).
* `isic_classification` updated to revision 5 (#329).
* Begin deprecation of `cnb_classification` (#329).
* Complete deprecation of `ald_demo` in favor of `abcd_demo` (#328).
Expand Down
1,572 changes: 1,572 additions & 0 deletions data-raw/NACE2.1_NACE2_Table.tsv

Large diffs are not rendered by default.

63 changes: 59 additions & 4 deletions data-raw/classification_bridge.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,71 @@ library(usethis)

source(file.path("data-raw", "utils.R"))

nace_classification_raw <- read_bridge(
file.path("data-raw", "nace_classification.csv")
nace_classification_raw <- readr::read_tsv(
file.path("data-raw", "NACE2.1_NACE2_Table.tsv")
)

nace_classification <- prepend_letter_nace_code(
nace_classification <- dplyr::distinct(
nace_classification_raw,
col_from = "original_code",
.data[["NACE21_CODE"]],
.data[["LEVEL"]],
.data[["NACE21_HEADING"]]
)

nace_classification <- prepend_letter_nace_code(
nace_classification,
col_from = "NACE21_CODE",
col_to = "code"
)

nace_classification <- dplyr::mutate(
nace_classification,
sector = dplyr::case_when(
grepl("^B05", code) ~ "coal",
grepl("^B06", code) ~ "oil and gas",
grepl("^B09.1", code) ~ "oil and gas", # borderline
grepl("^B09.9", code) ~ "coal", # borderline
.data$code == "C23.5" ~ "cement", #borderline
grepl("^C23.51", code) ~ "cement",
grepl("^C23.52", code) ~ "cement", #borderline
grepl("^C23.6", code) ~ "cement", #borderline
grepl("^C23.95", code) ~ "cement", # borderline
grepl("^C24.1", code) ~ "steel",
grepl("^C24.2", code) ~"steel", # borderline
grepl("^C24.3", code) ~"steel", # borderline
grepl("^C24.52", code) ~"steel", # borderline
grepl("^C29.1", code) ~ "automotive", # borderline
grepl("^C29.2", code) ~ "automotive", # borderline
grepl("^D35.1", code) ~ "power", # some of these are borderline
grepl("^H50", code) ~ "shipping",
grepl("^H51.1", code) ~ "aviation",
TRUE ~ "not in scope"
),
borderline = dplyr::case_when(
grepl("^B09.1", code) ~ TRUE,
grepl("^B09.9", code) ~ TRUE,
.data$code == "C23.5" ~ TRUE,
grepl("^C23.52", code) ~ TRUE,
grepl("^C23.6", code) ~ TRUE,
grepl("^C24.2", code) ~ TRUE,
grepl("^C24.3", code) ~ TRUE,
grepl("^C24.52", code) ~ TRUE,
grepl("^C29.1", code) ~ TRUE,
grepl("^C29.2", code) ~ TRUE,
code == "D35.1" ~ TRUE,
grepl("^D35.13", code) ~ TRUE,
grepl("^D35.14", code) ~ TRUE,
grepl("^D35.15", code) ~ TRUE,
grepl("^D35.16", code) ~ TRUE,
TRUE ~ FALSE
),
)

nace_classification <- dplyr::mutate(
nace_classification,
version = "2.1"
)

use_data(nace_classification, overwrite = TRUE)

naics_classification <- read_bridge(
Expand Down
12 changes: 7 additions & 5 deletions data-raw/data_dictionary/nace_classification.csv
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
dataset,column,typeof,definition
nace_classification,original_code,character,Original NACE code
nace_classification,code,character,Formatted NACE code with periods removed
nace_classification,code_level,double,Level of granularity of NACE code
nace_classification,sector,character,Associated 2dii sector
nace_classification,borderline,logical,Flag indicating if 2dii sector and classification code are a borderline match. The value TRUE indicates that the match is uncertain between the 2dii sector and the classification. The value FALSE indicates that the match is certainly perfect or the classification is certainly out of 2dii's scope.
nace_classification,NACE21_CODE,character,Original NACE version 2.1 code
nace_classification,LEVEL,character,Original NACE version 2.1 code level
nace_classification,NACE21_HEADING,character,Original NACE version 2.1 description
nace_classification,code,character,NACE version 2.1 code with top-level letter prepended
nace_classification,sector,character,Associated PACTA sector
nace_classification,borderline,logical,Flag indicating if PACTA sector and classification code are a borderline match. The value TRUE indicates that the match is uncertain between the PACTA sector and the classification. The value FALSE indicates that the match is certainly perfect or the classification is certainly out of PACTA's scope
nace_classification,version,character,Column identifying to which NACE version the code belongs
Loading

0 comments on commit 964b799

Please sign in to comment.