From 054002926d75ea4f43fca4b60d09055f51e6478a Mon Sep 17 00:00:00 2001 From: Jackson Hoffart Date: Wed, 17 Jul 2024 11:17:06 +0200 Subject: [PATCH] bug: filter financial data unusable in `ownership_weight` methodology MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `current_shares_outstanding_all_classes` - a value that is used downstream to calculate `ownership_weight` (a value that also is derived from the `shares_all_classes`) - will cause divide by 0 problems downstream if it is calculated to be 0. It is unusable by the `ownership_weight` calculation if it is 0 (https://github.com/RMI-PACTA/pacta.portfolio.allocate/blob/4c96adb9856788d97293e41c0cd68b8951c3c56b/R/calculate_ownership_weight.R#L5). There is already a filter in `prepare_financial_data` to remove this case, however it filters the wrong (but similar) column. An edge case was found by @Antoine-Lalechere in which ADR holdings with no similar EQ holdings pass through and yield a 0 value for `shares_all_classes`, causing the issue described above. This PR adjusts the filter to ensure that the actual required value is usable in the downstream analysis. See the reprex below for more information: ``` r library(dplyr) #> #> Attaching package: 'dplyr' #> The following objects are masked from 'package:stats': #> #> filter, lag #> The following objects are masked from 'package:base': #> #> intersect, setdiff, setequal, union library(pacta.data.preparation) # consider the case there an entity has an equity holding and an ADR holding # consider further, that the equity holding that reflects the ADR holding has no price or share information # note: this was a real case in the data data <- tibble::tribble( ~fsym_id, ~isin, ~factset_entity_id, ~adj_price, ~adj_shares_outstanding, ~issue_type, ~one_adr_eq, "ABCDE-F", "US123456789", "12ABCD-E", 5.5, 12345678, "AD", NA, "ABCDE-G", "JP123456789", "12ABCD-E", NA, NA, "EQ", NA ) issue_code_bridge <- tibble::tribble( ~issue_type_code, ~asset_type, "AD", "Equity", "EQ", "Equity" ) |> pacta.data.preparation::standardize_asset_type_names() # our current methodology does NOT consider ADR holdings in the calculation of ownership_weight denominator # what this means is that, in the case above, it would be impossible to calculate an appropriate ownership_weight # since the denominator would be 0 # however, as can be seen below, the ADR holding does appear in the output, with # a value of 0 for current_shares_outstanding_all_classes. This is unusable downstream by the ownership_weight calculation # # See: https://rmi-pacta.github.io/pacta.data.preparation/articles/share_ownership_methodology.html for more information out <- pacta.data.preparation::prepare_financial_data(data, issue_code_bridge) print("US123456789" %in% out$isin) #> [1] TRUE print(filter(out, isin == "US123456789")["current_shares_outstanding_all_classes"]) #> # A tibble: 1 × 1 #> current_shares_outstanding_all_classes #> #> 1 0 ``` Created on 2024-07-17 with [reprex v2.1.1](https://reprex.tidyverse.org) --- R/prepare_financial_data.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/prepare_financial_data.R b/R/prepare_financial_data.R index a5d4c9c..22c9aae 100644 --- a/R/prepare_financial_data.R +++ b/R/prepare_financial_data.R @@ -31,11 +31,12 @@ prepare_financial_data <- .by = "factset_entity_id" ) %>% filter( + # these filters are meant to drop financial data points that are functionally unusable for PACTA's ownership weight and fund expansion methodology case_when( asset_type == "Bonds" ~ TRUE, asset_type == "Others" ~ TRUE, asset_type == "Funds" ~ !is.na(.data$adj_price), - asset_type == "Equity" ~ .data$adj_price > 0 & .data$adj_shares_outstanding > 0 + asset_type == "Equity" ~ .data$adj_price > 0 & .data$shares_all_classes > 0 ) ) %>% select(