Skip to content

Commit

Permalink
Drop null timestamps and subject_ids
Browse files Browse the repository at this point in the history
  • Loading branch information
justin13601 committed May 27, 2024
1 parent 1a56ce1 commit 33aa411
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions src/aces/predicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ def verify_plain_predicates_from_csv(data_path: Path, predicates: list[str]) ->

columns = ["subject_id", "timestamp"] + predicates
logger.info(f"Attempting to load {columns} from CSV file {str(data_path.resolve())}")
return pl.read_csv(data_path, columns=columns).select(
data = pl.read_csv(data_path, columns=columns).drop_nulls(subset=["subject_id", "timestamp"])
return data.select(
"subject_id",
pl.col("timestamp").str.strptime(pl.Datetime, format=CSV_TIMESTAMP_FORMAT),
*predicates,
Expand Down Expand Up @@ -119,7 +120,11 @@ def generate_plain_predicates_from_meds(data_path: Path, predicates: dict) -> pl
"""

logger.info("Loading MEDS data...")
data = pl.read_parquet(data_path)
data = (
pl.read_parquet(data_path)
.rename({"patient_id": "subject_id"})
.drop_nulls(subset=["subject_id", "timestamp"])
)

# generate plain predicate columns
logger.info("Generating plain predicate columns...")
Expand Down

0 comments on commit 33aa411

Please sign in to comment.