From 601af343aaa92ffd0715d4e08a80526475bf452e Mon Sep 17 00:00:00 2001 From: smlmbrt Date: Thu, 26 Jan 2023 17:05:00 +0000 Subject: [PATCH 1/3] Clarify schema matching assertion (more informative output) --- pgscatalog_utils/match/write.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pgscatalog_utils/match/write.py b/pgscatalog_utils/match/write.py index b5251c1..0ab7742 100644 --- a/pgscatalog_utils/match/write.py +++ b/pgscatalog_utils/match/write.py @@ -68,7 +68,7 @@ def _check_column_types(matches: pl.LazyFrame): 'matched_effect_allele': pl.Categorical, 'effect_weight': pl.Float64, 'effect_type': pl.Categorical, 'accession': pl.Categorical} col_types = {x: matches.schema.get(x) for x in list((matches.schema.keys() & correct_schema.keys()))} - assert col_types == correct_schema + assert col_types == correct_schema, "MISMATCHED SCHEMA\nCurrent columns: {}\nCorrect schema:{}".format(col_types, correct_schema) def _write_split(deduplicated: dict[str: tuple[int, pl.LazyFrame]], chrom: str, dataset: str): From 380b897a544c18c19941b258bfaee985d46f3b38 Mon Sep 17 00:00:00 2001 From: smlmbrt Date: Thu, 26 Jan 2023 17:34:21 +0000 Subject: [PATCH 2/3] Make sure effect_weight is read as a float from the scorefile. Appears the problem was that the scorefile weight becomes a UTF because the effect_weight isn't read in as a float64 for some files... (need to check what's happening upstream here next) --- pgscatalog_utils/match/read.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pgscatalog_utils/match/read.py b/pgscatalog_utils/match/read.py index 43ed39b..392c9cf 100644 --- a/pgscatalog_utils/match/read.py +++ b/pgscatalog_utils/match/read.py @@ -28,6 +28,7 @@ def read_scorefile(path: str, chrom: typing.Union[str, None]) -> pl.LazyFrame: 'chr_position': pl.UInt64, 'effect_allele': pl.Utf8, # str functions required to complement 'other_allele': pl.Utf8, + 'effect_weight': pl.Float64, 'effect_type': pl.Categorical, 'accession': pl.Categorical} From 41663334ceba372acf3f7039d97dafed4b0ed6be Mon Sep 17 00:00:00 2001 From: smlmbrt Date: Thu, 26 Jan 2023 17:40:43 +0000 Subject: [PATCH 3/3] Bump version / patch number --- pgscatalog_utils/__init__.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pgscatalog_utils/__init__.py b/pgscatalog_utils/__init__.py index 0404d81..e1424ed 100644 --- a/pgscatalog_utils/__init__.py +++ b/pgscatalog_utils/__init__.py @@ -1 +1 @@ -__version__ = '0.3.0' +__version__ = '0.3.1' diff --git a/pyproject.toml b/pyproject.toml index 3fc9cbb..608e709 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "pgscatalog_utils" -version = "0.3.0" +version = "0.3.1" description = "Utilities for working with PGS Catalog API and scoring files" homepage = "https://github.com/PGScatalog/pgscatalog_utils" authors = ["Benjamin Wingfield ", "Samuel Lambert ", "Laurent Gil "]