diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..5559d3c --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,5 @@ +# These owners will be the default owners for everything in +# the repo. Unless a later match takes precedence, +# @cjyetman will be requested for review when someone opens +# a pull request. +* @cjyetman diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..76691c7 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,6 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" # Location of package manifests + schedule: + interval: "weekly" diff --git a/.github/workflows/add-issues-to-ADO.yml b/.github/workflows/add-issues-to-ADO.yml new file mode 100644 index 0000000..2fee46d --- /dev/null +++ b/.github/workflows/add-issues-to-ADO.yml @@ -0,0 +1,26 @@ +--- +# This example file will enable actions that trigger on created or modified GitHub issues. +# +# Note the @main in `uses:` on the last line. This will call the latest version of the workflow from the `main` branch in the RMI-PACTA/actions repo. +# You can also specify a tag from that repo, or a commit SHA to pin action versions. +on: + issues: + types: + [opened, edited, deleted, closed, reopened, labeled, unlabeled, assigned] + issue_comment: + types: [created, edited, deleted] + +name: GH issues + +permissions: + issues: write + pull-requests: write + +jobs: + issues: + name: Run issues workflows + uses: RMI-PACTA/actions/.github/workflows/issues.yml@main + with: + ado_area_path: "2DegreesInvesting\\GitHub Issues" + secrets: + ADO_TOKEN: ${{ secrets.ADO_PERSONAL_ACCESS_TOKEN }} diff --git a/.github/workflows/add-prs-and-issues-to-project.yml b/.github/workflows/add-prs-and-issues-to-project.yml new file mode 100644 index 0000000..713094d --- /dev/null +++ b/.github/workflows/add-prs-and-issues-to-project.yml @@ -0,0 +1,21 @@ +name: Adds all new issues and PRs to appropriate projects + +on: + issues: + types: + - opened + - transferred + pull_request: + branches: [main] + types: + - opened + +jobs: + add-to-maintainer-project: + name: Add issue/PR to @cjyetman's maintainer project + runs-on: ubuntu-latest + steps: + - uses: actions/add-to-project@v1.0.1 + with: + project-url: https://github.com/orgs/RMI-PACTA/projects/13 + github-token: ${{ secrets.PAT_ADD_ISSUES_TO_PROJECT }} diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml new file mode 100644 index 0000000..1dc1e65 --- /dev/null +++ b/.github/workflows/docker.yml @@ -0,0 +1,29 @@ +--- +# This example file will enable docker-related checks on push or PR to the main +# branch. +# It will also run the checks every weeknight at midnight UTC +# +# Note the @main in `uses:` on the last line. This will call the latest version +# of the workflow from the `main` brnach in the RMI-PACTA/actions repo. You can +# also specify a tag from that repo, or a commit SHA to pin action versions. +on: + pull_request: + push: + branches: [main] + schedule: + - cron: '0 0 * * 1,2,3,4,5' + workflow_dispatch: + +name: docker + +jobs: + docker: + name: Docker actions + uses: RMI-PACTA/actions/.github/workflows/docker.yml@main + with: + build-platform: | + [ + "linux/amd64" + ] + do-lint: false + do-check-r-sysdeps: false diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4ce91ef --- /dev/null +++ b/.gitignore @@ -0,0 +1,17 @@ +.Rproj.user +.Rhistory +.RData +.Ruserdata + +# because RStudio adds one even though this repo isn't an R package +.Rbuildignore + +# to not accidentally merge a local config +.env + +# macOS file +.DS_Store + +# inputs/ and outputs/ dirs +inputs/ +outputs/ diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..e8c6392 --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,29 @@ +Package: workflow.benchmark.preparation +Title: Run the Benchmark Preparation Workflow +Version: 0.0.0.9000 +Authors@R: + c( + person( + given = "CJ", + family = "Yetman", + role = c("aut", "cre", "ctr"), + email = "cj@cjyetman.com", + comment = c(ORCID = "0000-0001-5099-9500") + ), + person( + given = "RMI", + role = c("cph", "fnd"), + email = "PACTA4investors@rmi.org" + ) + ) +Description: A workflow to prepare benchmark indices for the PACTA tool. +License: MIT + file LICENSE +Encoding: UTF-8 +Imports: + config, + dplyr, + logger, + pacta.data.scraping, + readxl +Remotes: + RMI-PACTA/pacta.data.scraping diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..a4a23d9 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,54 @@ +# using rocker r-vers as a base with R 4.4.0 +# https://hub.docker.com/r/rocker/r-ver +# https://rocker-project.org/images/versioned/r-ver.html +ARG R_VERS="4.4.0" +FROM rocker/r-ver:$R_VERS AS base + +# set Docker image labels +LABEL org.opencontainers.image.source=https://github.com/RMI-PACTA/workflow.benchmark.preparation +LABEL org.opencontainers.image.description="Docker image to run benchmark preparation" +LABEL org.opencontainers.image.licenses=MIT +LABEL org.opencontainers.image.title="" +LABEL org.opencontainers.image.revision="" +LABEL org.opencontainers.image.version="" +LABEL org.opencontainers.image.vendor="" +LABEL org.opencontainers.image.base.name="" +LABEL org.opencontainers.image.ref.name="" +LABEL org.opencontainers.image.authors="" + +WORKDIR /app + +# set apt-get to noninteractive mode +ARG DEBIAN_FRONTEND="noninteractive" +ARG DEBCONF_NOWARNINGS="yes" + +# install system dependencies +RUN apt-get update \ + && rm -rf /var/lib/apt/lists/* + +# sets CRAN repo to use Posit Package Manager to freeze R package versions to +# those available on 2024-05-15 +# https://packagemanager.posit.co/client/#/repos/2/overview +# https://packagemanager.posit.co/cran/__linux__/jammy/2024-05-15 +ARG CRAN_REPO="https://packagemanager.posit.co/cran/__linux__/jammy/2024-05-15" + +RUN echo "options(repos = c(CRAN = '$CRAN_REPO'), pkg.sysreqs = FALSE)" >> "${R_HOME}/etc/Rprofile.site" \ + && Rscript -e "\ + install.packages('pak'); \ + " + +# copy in DESCRIPTION from this repo +COPY DESCRIPTION /app/DESCRIPTION + +# install pak, find dependencies from DESCRIPTION, and install them +RUN Rscript -e "\ + deps <- pak::local_deps(root = '.'); \ + pkg_deps <- deps[!deps[['direct']], 'ref']; \ + print(pkg_deps); \ + pak::pak(pkg_deps); \ + " +COPY config.yml /app/config.yml +COPY *.R /app/ + +CMD ["Rscript", "/app/main.R"] + diff --git a/config.yml b/config.yml new file mode 100644 index 0000000..1449a4b --- /dev/null +++ b/config.yml @@ -0,0 +1,76 @@ +default: + project_code: "" # a single string specifying the desired project code, e.g."PA2024CH" + pacta_financial_timestamp: "" # a single string specifying the desired timestamp in the form "YYYYQN", e.g. "2023Q4" + msci_filename: "" # a single string specifying the filename of the MSCI XLSX input data + ishares_date: "" # a single string specifying the desired date of iShares data in the form "YYYYMMDD", e.g."20231229" + bonds_indices_urls: "" # a R expression specifying a named vector including the desired names and URLS of the iShares Bond indices to include + equity_indices_urls: "" # a R expression specifying a named vector including the desired names and URLS of the iShares Equity indices to include + + +2021Q4: + project_code: "" + pacta_financial_timestamp: "2021Q4" + msci_filename: "" + ishares_date: "20211231" + bonds_indices_urls: !expr |- + c( + "iShares Global Corp Bond UCITS ETF " = + "https://www.ishares.com/uk/individual/en/products/251813/ishares-global-corporate-bond-ucits-etf/" + ) + equity_indices_urls: !expr |- + c( + "iShares Core S&P 500 UCITS ETF USD (Dist) " = + "https://www.ishares.com/uk/individual/en/products/251900/ishares-sp-500-ucits-etf-inc-fund/", + "iShares MSCI World UCITS ETF " = + "https://www.ishares.com/uk/individual/en/products/251881/ishares-msci-world-ucits-etf-inc-fund/", + "iShares MSCI EM UCITS ETF USD (Acc)" = + "https://www.ishares.com/uk/individual/en/products/251858/ishares-msci-emerging-markets-ucits-etf-acc-fund/", + "iShares MSCI ACWI UCITS ETF " = + "https://www.ishares.com/uk/individual/en/products/251850/ishares-msci-acwi-ucits-etf/" + ) + + +2022Q4: + project_code: "" + pacta_financial_timestamp: "2022Q4" + msci_filename: "" + ishares_date: "20221230" + bonds_indices_urls: !expr |- + c( + "iShares Global Corp Bond UCITS ETF " = + "https://www.ishares.com/uk/individual/en/products/251813/ishares-global-corporate-bond-ucits-etf/" + ) + equity_indices_urls: !expr |- + c( + "iShares Core S&P 500 UCITS ETF USD (Dist) " = + "https://www.ishares.com/uk/individual/en/products/251900/ishares-sp-500-ucits-etf-inc-fund/", + "iShares MSCI World UCITS ETF " = + "https://www.ishares.com/uk/individual/en/products/251881/ishares-msci-world-ucits-etf-inc-fund/", + "iShares MSCI EM UCITS ETF USD (Acc)" = + "https://www.ishares.com/uk/individual/en/products/251858/ishares-msci-emerging-markets-ucits-etf-acc-fund/", + "iShares MSCI ACWI UCITS ETF " = + "https://www.ishares.com/uk/individual/en/products/251850/ishares-msci-acwi-ucits-etf/" + ) + + +2023Q4: + project_code: "" + pacta_financial_timestamp: "2023Q4" + msci_filename: "Constituents -29th Dec 2023-3.xlsx" + ishares_date: "20231229" + bonds_indices_urls: !expr |- + c( + "iShares Global Corp Bond UCITS ETF " = + "https://www.ishares.com/uk/individual/en/products/251813/ishares-global-corporate-bond-ucits-etf/" + ) + equity_indices_urls: !expr |- + c( + "iShares Core S&P 500 UCITS ETF USD (Dist) " = + "https://www.ishares.com/uk/individual/en/products/251900/ishares-sp-500-ucits-etf-inc-fund/", + "iShares MSCI World UCITS ETF " = + "https://www.ishares.com/uk/individual/en/products/251881/ishares-msci-world-ucits-etf-inc-fund/", + "iShares MSCI EM UCITS ETF USD (Acc)" = + "https://www.ishares.com/uk/individual/en/products/251858/ishares-msci-emerging-markets-ucits-etf-acc-fund/", + "iShares MSCI ACWI UCITS ETF " = + "https://www.ishares.com/uk/individual/en/products/251850/ishares-msci-acwi-ucits-etf/" + ) diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..b812ba7 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,18 @@ +services: + scenario_prep: + platform: linux/amd64 + build: + context: . + environment: + R_CONFIG_ACTIVE: ${R_CONFIG_ACTIVE:-2023Q4} + BENCHMARKS_PREPARATION_INPUTS_PATH: /mnt/inputs + BENCHMARKS_PREPARATION_OUTPUTS_PATH: /mnt/outputs + volumes: + - type: bind + source: ${BENCHMARKS_PREPARATION_INPUTS_PATH:-./inputs} + # target must be kept in sync with config.yml + target: /mnt/inputs + - type: bind + source: ${BENCHMARKS_PREPARATION_OUTPUTS_PATH:-./outputs} + # target must be kept in sync with config.yml + target: /mnt/outputs diff --git a/main.R b/main.R new file mode 100644 index 0000000..47ef31e --- /dev/null +++ b/main.R @@ -0,0 +1,219 @@ +# load necessary packages ------------------------------------------------------ + +logger::log_info("Checking for necessary packages.") +is_installed <- function(packagename) { + packagename %in% utils::installed.packages()[, "Package"] +} + +necessary_pkgs <- + unlist( + strsplit( + x = as.data.frame(read.dcf("DESCRIPTION"))$Imports, + split = ",\\n" + ) + ) + +installed_pkgs <- unlist(lapply(X = necessary_pkgs, is_installed)) + +if (!all(installed_pkgs)) { + missing_pkgs <- necessary_pkgs[!installed_pkgs] + logger::log_error("Necessary packages are not installed: {missing_pkgs}") + stop() +} + + +logger::log_info("Loading necessary packages.") +suppressPackageStartupMessages({ + library("dplyr") +}) + + +# set general i/o paths -------------------------------------------------------- + +benchmarks_preparation_inputs_path <- + Sys.getenv( + "BENCHMARKS_PREPARATION_INPUTS_PATH", + "./inputs" + ) + +if (dir.exists(benchmarks_preparation_inputs_path)) { + logger::log_info("Setting benchmarks preparation inputs path: {benchmarks_preparation_inputs_path}") +} else { + logger::log_error("Benchmarks preparation inputs path does not exist: {benchmarks_preparation_inputs_path}") + stop() +} + +benchmarks_preparation_outputs_path <- Sys.getenv( + "BENCHMARKS_PREPARATION_OUTPUTS_PATH", + "./outputs" +) + +if (dir.exists(benchmarks_preparation_outputs_path)) { + logger::log_info("Setting benchmarks preparation outputs path: {benchmarks_preparation_outputs_path}") +} else { + logger::log_error("Benchmarks preparation outputs path does not exist: {benchmarks_preparation_outputs_path}") + stop() +} + + +# load config ------------------------------------------------------------------ + +logger::log_info("Loading config.") +config <- + config::get( + file = "config.yml", + config = Sys.getenv(x = "R_CONFIG_ACTIVE", unset = "2023Q4"), + use_parent = FALSE + ) + + +# set and check paths ---------------------------------------------------------- + +msci_xlsx_path <- + file.path( + benchmarks_preparation_inputs_path, + config[["msci_filename"]] + ) + +if (!dir.exists(msci_xlsx_path) && file.exists(msci_xlsx_path)) { + logger::log_info("Setting MSCI XLSX input filepath: {msci_xlsx_path}") +} else { + logger::log_warn("MSCI XLSX input filepath does not exist: {msci_xlsx_path}") + msci_xlsx_path <- "" +} + + +# create output list ------------------------------------------------------ + +benchmark_portfolios <- list() + + +# scrape iShares indices data -------------------------------------------------- + +logger::log_info("Scraping iShares indices data") + +logger::log_debug("Scraping iShares indices bonds data.") +ishares_indices_bonds <- + dplyr::bind_rows( + lapply( + seq_along(config[["bonds_indices_urls"]]), function(index) { + pacta.data.scraping::get_ishares_index_data( + config[["bonds_indices_urls"]][[index]], + names(config[["bonds_indices_urls"]])[[index]], + config[["ishares_date"]] + ) + } + ) + ) %>% + pacta.data.scraping::process_ishares_index_data() + +logger::log_debug("Scraping iShares indices equity data.") +ishares_indices_equity <- + dplyr::bind_rows( + lapply( + seq_along(config[["equity_indices_urls"]]), function(index) { + pacta.data.scraping::get_ishares_index_data( + config[["equity_indices_urls"]][[index]], + names(config[["equity_indices_urls"]])[[index]], + config[["ishares_date"]] + ) + } + ) + ) %>% + pacta.data.scraping::process_ishares_index_data() + +logger::log_debug("Combining iShares indices data.") +ishares_indices <- bind_rows(ishares_indices_bonds, ishares_indices_equity) + +benchmark_portfolios <- c(benchmark_portfolios, list(ishares_indices)) + + +# load MSCI indices ------------------------------------------------------------ + +if (nzchar(msci_xlsx_path)) { + sheet_names <- readxl::excel_sheets(path = msci_xlsx_path)[-1] + logger::log_info("Setting MSCI XLSX sheet names: {sheet_names}") + + logger::log_debug("Reading and combining MSCI indices data.") + msci_indices <- + dplyr::bind_rows( + lapply(sheet_names, function(sheet_name) { + index_name <- paste0("MSCI - ", sub("_[0-9]*$", "", sheet_name)) + + readxl::read_excel(path = msci_xlsx_path, sheet = sheet_name) %>% + dplyr::mutate(investor_name = "Benchmark Portfolios") %>% + dplyr::mutate(portfolio_name = index_name) %>% + dplyr::mutate(currency = "USD") %>% + dplyr::select( + investor_name, + portfolio_name, + isin = `Isin Code`, + market_value = `Security Closing Weight`, + currency + ) + }) + ) + + benchmark_portfolios <- c(benchmark_portfolios, list(msci_indices)) +} else { + logger::log_info("Skipping MSCI XLSX import because input file is not available.") +} + + +# combine all benchmarks ------------------------------------------------------- + +benchmark_portfolios <- dplyr::bind_rows(benchmark_portfolios) + +if (nzchar(config[["project_code"]])) { + run_id <- + paste( + config[["pacta_financial_timestamp"]], + config[["project_code"]], + sep = "_" + ) +} else { + run_id <- config[["pacta_financial_timestamp"]] +} + +benchmarks_investor_name <- + paste0( + "Benchmark Portfolios ", + run_id + ) + +benchmark_portfolios <- + dplyr::mutate( + benchmark_portfolios, + investor_name = benchmarks_investor_name + ) + + +# save output to output path --------------------------------------------------- + +run_timestamp <- format(Sys.time(), "%Y%m%d-%H%M%S") + +output_filename <- + paste0( + paste( + run_id, + "benchmark_portfolios", + run_timestamp, + sep = "_" + ), + ".rds" + ) + +output_filepath <- + file.path( + benchmarks_preparation_outputs_path, + output_filename + ) + +logger::log_info("Saving benchmark output file: {output_filepath}") + +saveRDS( + object = benchmark_portfolios, + file = output_filepath +) + +logger::log_info("Benchmark preparation complete!") diff --git a/workflow.benchmark.preparation.Rproj b/workflow.benchmark.preparation.Rproj new file mode 100644 index 0000000..8ec1ef9 --- /dev/null +++ b/workflow.benchmark.preparation.Rproj @@ -0,0 +1,21 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: XeLaTeX + +AutoAppendNewline: Yes +StripTrailingWhitespace: Yes +LineEndingConversion: Posix + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source