Skip to content

Commit

Permalink
Merge pull request #334 from crocs-muni/fix/error-resistant-date-parsing
Browse files Browse the repository at this point in the history
coerce problematic datetime values in cert csvs/dfs/htmls
  • Loading branch information
adamjanovsky authored May 18, 2023
2 parents b808490 + b0ce6e5 commit 396882d
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 6 deletions.
8 changes: 4 additions & 4 deletions src/sec_certs/dataset/cc.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ def to_pandas(self) -> pd.DataFrame:
df = pd.DataFrame([x.pandas_tuple for x in self.certs.values()], columns=CCCertificate.pandas_columns)
df = df.set_index("dgst")

df.not_valid_before = pd.to_datetime(df.not_valid_before, infer_datetime_format=True)
df.not_valid_after = pd.to_datetime(df.not_valid_after, infer_datetime_format=True)
df.not_valid_before = pd.to_datetime(df.not_valid_before, infer_datetime_format=True, errors="coerce")
df.not_valid_after = pd.to_datetime(df.not_valid_after, infer_datetime_format=True, errors="coerce")
df = df.astype(
{"category": "category", "status": "category", "scheme": "category", "cert_lab": "category"}
).fillna(value=np.nan)
Expand Down Expand Up @@ -361,7 +361,7 @@ def _get_primary_key_str(row: Tag):

df[["not_valid_before", "not_valid_after", "maintenance_date"]] = df[
["not_valid_before", "not_valid_after", "maintenance_date"]
].apply(pd.to_datetime)
].apply(pd.to_datetime, errors="coerce")

df["dgst"] = df.apply(lambda row: helpers.get_first_16_bytes_sha256(_get_primary_key_str(row)), axis=1)

Expand Down Expand Up @@ -897,7 +897,7 @@ def to_pandas(self) -> pd.DataFrame:
df = df.set_index("dgst")
df.index.name = "dgst"

df.maintenance_date = pd.to_datetime(df.maintenance_date, infer_datetime_format=True)
df.maintenance_date = pd.to_datetime(df.maintenance_date, infer_datetime_format=True, errors="coerce")
return df.fillna(value=np.nan)

@classmethod
Expand Down
4 changes: 2 additions & 2 deletions src/sec_certs/dataset/fips.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,8 +329,8 @@ def to_pandas(self) -> pd.DataFrame:
df = pd.DataFrame([x.pandas_tuple for x in self.certs.values()], columns=FIPSCertificate.pandas_columns)
df = df.set_index("dgst")

df.date_validation = pd.to_datetime(df.date_validation, infer_datetime_format=True)
df.date_sunset = pd.to_datetime(df.date_sunset, infer_datetime_format=True)
df.date_validation = pd.to_datetime(df.date_validation, infer_datetime_format=True, errors="coerce")
df.date_sunset = pd.to_datetime(df.date_sunset, infer_datetime_format=True, errors="coerce")

# Manually delete one certificate with bad embodiment (seems to have many blank fields)
df = df.loc[~(df.embodiment == "*")]
Expand Down

0 comments on commit 396882d

Please sign in to comment.