Skip to content

Commit

Permalink
TDD: fix ignore header case test case: test passes
Browse files Browse the repository at this point in the history
  • Loading branch information
amelie-rondot committed Feb 10, 2024
1 parent a618b49 commit 9b4eb90
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 21 deletions.
32 changes: 29 additions & 3 deletions frictionless/detector/detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,7 @@ def detect_schema(
labels: Optional[List[str]] = None,
schema: Optional[Schema] = None,
field_candidates: List[Dict[str, Any]] = settings.DEFAULT_FIELD_CANDIDATES,
**options: Any,
) -> Schema:
"""Detect schema from fragment
Expand Down Expand Up @@ -419,9 +420,10 @@ def detect_schema(
for _, field in mapping.items():
if field and field.required and field.name not in labels:
schema.add_field(field)
# For primary field that are missing
if field and not field.required and field.name in schema.primary_key and field.name not in labels:
schema.add_field(field)
# For primary key field that are missing
self.add_missing_primary_key_to_schema_fields(
field, schema, labels, options["header_case"] # type: ignore
)

# Patch schema
if self.schema_patch:
Expand All @@ -436,3 +438,27 @@ def detect_schema(
schema = Schema.from_descriptor(descriptor)

return schema # type: ignore

@staticmethod
def add_missing_primary_key_to_schema_fields(
field: Field,
schema: Schema,
labels: List[str],
case_sensitive: bool,
):
if case_sensitive:
if (
not field.required
and field.name in schema.primary_key
and field.name not in labels
):
schema.add_field(field)
else:
lower_primary_key = [pk.lower() for pk in schema.primary_key]
lower_labels = [label.lower() for label in labels]
if (
not field.required
and field.name.lower() in lower_primary_key
and field.name.lower() not in lower_labels
):
schema.add_field(field)
29 changes: 12 additions & 17 deletions frictionless/resources/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ def __open_schema(self):
labels=self.labels,
schema=self.schema,
field_candidates=system.detect_field_candidates(),
header_case=self.dialect.header_case,
)
self.stats.fields = len(self.schema.fields)

Expand Down Expand Up @@ -303,7 +304,6 @@ def __open_row_stream(self):
enumerated_content_stream = self.dialect.read_enumerated_content_stream(
self.cell_stream
)


# Create row stream
def row_stream():
Expand Down Expand Up @@ -333,8 +333,7 @@ def row_stream():
# Primary Key Error
if is_integrity and self.schema.primary_key:
try:
cells = self.primary_key_cells(row,
self.dialect.header_case)
cells = self.primary_key_cells(row, self.dialect.header_case)
except KeyError:
# Row does not have primary_key as key
# There should already be a missing-label error in
Expand Down Expand Up @@ -409,16 +408,13 @@ def row_stream():
# # Create row stream
self.__row_stream = row_stream()

def primary_key_cells(
self,
row: Row,
case_sensitive: bool
) -> Tuple[str, Any]:
"""Create a tuple containg all cells related to primary_key
"""
return tuple(row[label] for label in
self.labels_related_to_primary_key(row, case_sensitive))

def primary_key_cells(self, row: Row, case_sensitive: bool) -> Tuple[str, Any]:
"""Create a tuple containg all cells related to primary_key"""
return tuple(
row[label]
for label in self.labels_related_to_primary_key(row, case_sensitive)
)

def labels_related_to_primary_key(
self,
row: Row,
Expand All @@ -430,11 +426,10 @@ def labels_related_to_primary_key(
if case_sensitive:
labels_primary_key = self.schema.primary_key
else:
lower_primary_key = [
pk.lower() for pk in self.schema.primary_key
lower_primary_key = [pk.lower() for pk in self.schema.primary_key]
labels_primary_key = [
label for label in row.field_names if label.lower() in lower_primary_key
]
labels_primary_key = [label for label in row.field_names
if label.lower() in lower_primary_key]
return labels_primary_key

# Read
Expand Down
2 changes: 1 addition & 1 deletion tests/table/test_header.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def test_missing_primary_key_label_with_shema_sync_issue_1633():
# Ignore header_case
schema_descriptor = {
"$schema": "https://frictionlessdata.io/schemas/table-schema.json",
"fields": [{"name": "A", "constraints": {"required": True}}],
"fields": [{"name": "A"}],
"primaryKey": ["A"],
}

Expand Down

0 comments on commit 9b4eb90

Please sign in to comment.