diff --git a/frictionless/detector/detector.py b/frictionless/detector/detector.py index 1938913b9c..dbf4fde8a6 100644 --- a/frictionless/detector/detector.py +++ b/frictionless/detector/detector.py @@ -298,6 +298,7 @@ def detect_schema( labels: Optional[List[str]] = None, schema: Optional[Schema] = None, field_candidates: List[Dict[str, Any]] = settings.DEFAULT_FIELD_CANDIDATES, + **options: Any, ) -> Schema: """Detect schema from fragment @@ -419,9 +420,10 @@ def detect_schema( for _, field in mapping.items(): if field and field.required and field.name not in labels: schema.add_field(field) - # For primary field that are missing - if field and not field.required and field.name in schema.primary_key and field.name not in labels: - schema.add_field(field) + # For primary key field that are missing + self.add_missing_primary_key_to_schema_fields( + field, schema, labels, options["header_case"] # type: ignore + ) # Patch schema if self.schema_patch: @@ -436,3 +438,27 @@ def detect_schema( schema = Schema.from_descriptor(descriptor) return schema # type: ignore + + @staticmethod + def add_missing_primary_key_to_schema_fields( + field: Field, + schema: Schema, + labels: List[str], + case_sensitive: bool, + ): + if case_sensitive: + if ( + not field.required + and field.name in schema.primary_key + and field.name not in labels + ): + schema.add_field(field) + else: + lower_primary_key = [pk.lower() for pk in schema.primary_key] + lower_labels = [label.lower() for label in labels] + if ( + not field.required + and field.name.lower() in lower_primary_key + and field.name.lower() not in lower_labels + ): + schema.add_field(field) diff --git a/frictionless/resources/table.py b/frictionless/resources/table.py index 8a8882bf95..0b547a3a09 100644 --- a/frictionless/resources/table.py +++ b/frictionless/resources/table.py @@ -204,6 +204,7 @@ def __open_schema(self): labels=self.labels, schema=self.schema, field_candidates=system.detect_field_candidates(), + header_case=self.dialect.header_case, ) self.stats.fields = len(self.schema.fields) @@ -303,7 +304,6 @@ def __open_row_stream(self): enumerated_content_stream = self.dialect.read_enumerated_content_stream( self.cell_stream ) - # Create row stream def row_stream(): @@ -333,8 +333,7 @@ def row_stream(): # Primary Key Error if is_integrity and self.schema.primary_key: try: - cells = self.primary_key_cells(row, - self.dialect.header_case) + cells = self.primary_key_cells(row, self.dialect.header_case) except KeyError: # Row does not have primary_key as key # There should already be a missing-label error in @@ -409,16 +408,13 @@ def row_stream(): # # Create row stream self.__row_stream = row_stream() - def primary_key_cells( - self, - row: Row, - case_sensitive: bool - ) -> Tuple[str, Any]: - """Create a tuple containg all cells related to primary_key - """ - return tuple(row[label] for label in - self.labels_related_to_primary_key(row, case_sensitive)) - + def primary_key_cells(self, row: Row, case_sensitive: bool) -> Tuple[str, Any]: + """Create a tuple containg all cells related to primary_key""" + return tuple( + row[label] + for label in self.labels_related_to_primary_key(row, case_sensitive) + ) + def labels_related_to_primary_key( self, row: Row, @@ -430,11 +426,10 @@ def labels_related_to_primary_key( if case_sensitive: labels_primary_key = self.schema.primary_key else: - lower_primary_key = [ - pk.lower() for pk in self.schema.primary_key + lower_primary_key = [pk.lower() for pk in self.schema.primary_key] + labels_primary_key = [ + label for label in row.field_names if label.lower() in lower_primary_key ] - labels_primary_key = [label for label in row.field_names - if label.lower() in lower_primary_key] return labels_primary_key # Read diff --git a/tests/table/test_header.py b/tests/table/test_header.py index 7caad5b424..7583e84517 100644 --- a/tests/table/test_header.py +++ b/tests/table/test_header.py @@ -78,7 +78,7 @@ def test_missing_primary_key_label_with_shema_sync_issue_1633(): # Ignore header_case schema_descriptor = { "$schema": "https://frictionlessdata.io/schemas/table-schema.json", - "fields": [{"name": "A", "constraints": {"required": True}}], + "fields": [{"name": "A"}], "primaryKey": ["A"], }