From 003aca5ceb03c5222fae352637f0f8a9d4f91336 Mon Sep 17 00:00:00 2001 From: Alessandro Pasotti Date: Mon, 16 Dec 2024 11:31:32 +0100 Subject: [PATCH] [sqlite] OGR_SCHEMA implementation - RFC 103 --- autotest/ogr/ogr_sqlite.py | 274 ++++++++++++++++++ doc/source/drivers/vector/sqlite.rst | 9 + ogr/ogrsf_frmts/sqlite/ogrsqlitebase.h | 2 + .../sqlite/ogrsqlitedatasource.cpp | 136 ++++++++- ogr/ogrsf_frmts/sqlite/ogrsqlitedriver.cpp | 5 + ogr/ogrsf_frmts/sqlite/ogrsqlitelayer.cpp | 15 +- 6 files changed, 435 insertions(+), 6 deletions(-) diff --git a/autotest/ogr/ogr_sqlite.py b/autotest/ogr/ogr_sqlite.py index 9bbd8382f69c..a7094a1658c0 100755 --- a/autotest/ogr/ogr_sqlite.py +++ b/autotest/ogr/ogr_sqlite.py @@ -13,6 +13,7 @@ # SPDX-License-Identifier: MIT ############################################################################### +import json import os import shutil @@ -4262,3 +4263,276 @@ def test_ogr_sqlite_run_deferred_actions_before_start_transaction(): lyr.ResetReading() f = lyr.GetNextFeature() assert f.GetFID() == 1 + + +###################################################################### +# Test schema override open option with SQLite driver +# +@pytest.mark.parametrize( + "open_options, expected_field_types, expected_field_names, expected_warning", + [ + ( + [], + [ + ogr.OFTString, + ogr.OFTInteger, + ogr.OFTReal, + ogr.OFTInteger, # bool subType + ogr.OFTString, # int string + ogr.OFTString, # real string + (ogr.OFTString, ogr.OFSTNone), # json subType + ogr.OFTString, # uuid subType + ], + [], + None, + ), + # Override string field with integer + ( + [ + r'OGR_SCHEMA={"layers": [{"name": "test_point", "fields": [{ "name": "str", "type": "Integer" }]}]}' + ], + [ + ogr.OFTInteger, # <-- overridden + ogr.OFTInteger, + ogr.OFTReal, + ogr.OFTInteger, # bool subType + ogr.OFTString, # int string + ogr.OFTString, # real string + ogr.OFTString, # json subType + ogr.OFTString, # uuid subType + ], + [], + None, + ), + # Override full schema and JSON/UUID subtype + ( + [ + r'OGR_SCHEMA={ "layers": [{"name": "test_point", "schemaType": "Full", "fields": [{ "name": "json_str", "subType": "JSON", "new_name": "json_str" }, {"name": "uuid_str", "subType": "UUID" }]}]}' + ], + [ + (ogr.OFTString, ogr.OFSTJSON), # json subType + (ogr.OFTString, ogr.OFSTUUID), # uuid subType + ], + ["json_str"], + None, + ), + # Test width and precision override + ( + [ + r'OGR_SCHEMA={ "layers": [{"name": "test_point", "fields": [{ "name": "real", "width": 7, "precision": 3 }]}]}' + ], + [ + ogr.OFTString, + ogr.OFTInteger, + ogr.OFTReal, + ogr.OFTInteger, # bool subType + ogr.OFTString, # int string + ogr.OFTString, # real string + (ogr.OFTString, ogr.OFSTNone), # json subType + ogr.OFTString, # uuid subType + ], + [], + None, + ), + # Test boolean and short integer subtype + ( + [ + r'OGR_SCHEMA={ "layers": [{"name": "test_point", "fields": [{ "name": "int", "subType": "Boolean" }, { "name": "real", "type": "Integer", "subType": "Int16" }]}]}' + ], + [ + ogr.OFTString, + (ogr.OFTInteger, ogr.OFSTBoolean), # bool overridden subType + (ogr.OFTInteger, ogr.OFSTInt16), # int16 overridden subType + ogr.OFTInteger, # bool subType + ogr.OFTString, # int string + ogr.OFTString, # real string + ogr.OFTString, # json subType + ogr.OFTString, # uuid subType + ], + [], + None, + ), + # Test real and int str override + ( + [ + r'OGR_SCHEMA={ "layers": [{"name": "test_point", "fields": [{ "name": "int_str", "type": "Integer" }, { "name": "real_str", "type": "Real" }]}]}' + ], + [ + ogr.OFTString, + ogr.OFTInteger, + ogr.OFTReal, + ogr.OFTInteger, # bool subType + ogr.OFTInteger, # int string + ogr.OFTReal, # real string + ogr.OFTString, # json subType + ogr.OFTString, # uuid subType + ], + [], + None, + ), + # Test invalid schema + ( + [ + r'OGR_SCHEMA={ "layers": [{"name": "test_point", "fields": [{ "name": "str", "type": "xxxxx" }]}]}' + ], + [], + [], + "Unsupported field type: xxxxx for field str", + ), + # Test invalid field name + ( + [ + r'OGR_SCHEMA={ "layers": [{"name": "test_point", "fields": [{ "name": "xxxxx", "type": "String", "new_name": "new_str" }]}]}' + ], + [], + [], + "Field xxxxx not found", + ), + # Test invalid layer name + ( + [ + r'OGR_SCHEMA={ "layers": [{"name": "xxxxx", "fields": [{ "name": "str", "type": "String" }]}]}' + ], + [], + [], + "Layer xxxxx not found", + ), + ], +) +def test_ogr_sqlite_schema_override( + tmp_path, open_options, expected_field_types, expected_field_names, expected_warning +): + + # Create SQLite database + sqlite_db = tmp_path / "test_ogr_sqlite_schema_override.db" + ds = ogr.GetDriverByName("SQLite").CreateDataSource(str(sqlite_db)) + lyr = ds.CreateLayer("test_point") + lyr.CreateField(ogr.FieldDefn("str", ogr.OFTString)) + lyr.CreateField(ogr.FieldDefn("int", ogr.OFTInteger)) + lyr.CreateField(ogr.FieldDefn("real", ogr.OFTReal)) + lyr.CreateField(ogr.FieldDefn("bool", ogr.OFTInteger)) + lyr.CreateField(ogr.FieldDefn("int_str", ogr.OFTString)) + lyr.CreateField(ogr.FieldDefn("real_str", ogr.OFTString)) + lyr.CreateField(ogr.FieldDefn("json_str", ogr.OFTString)) + lyr.CreateField(ogr.FieldDefn("uuid_str", ogr.OFTString)) + + # Insert some data + feat = ogr.Feature(lyr.GetLayerDefn()) + feat.SetField("str", "1") + feat.SetField("int", 2) + feat.SetField("real", 3.4) + feat.SetField("bool", 1) + feat.SetField("int_str", "2") + feat.SetField("real_str", "3.4") + feat.SetField("json_str", '{"key": "foo"}') + feat.SetField("uuid_str", "123e4567-e89b-12d3-a456-426614174000") + lyr.CreateFeature(feat) + feat = None + + gdal.ErrorReset() + + try: + schema = open_options[0].split("=")[1] + open_options = open_options[1:] + except IndexError: + schema = None + + with gdal.quiet_errors(): + + if schema: + open_options.append("OGR_SCHEMA=" + schema) + else: + open_options = [] + + # Validate the JSON schema + if not expected_warning and schema: + schema = json.loads(schema) + gdaltest.validate_json(schema, "ogr_fields_override.schema.json") + + # Check error if expected_field_types is empty + if not expected_field_types: + with gdaltest.disable_exceptions(): + ds = gdal.OpenEx( + sqlite_db, + gdal.OF_VECTOR | gdal.OF_READONLY, + open_options=open_options, + allowed_drivers=["SQLite"], + ) + assert ( + gdal.GetLastErrorMsg().find(expected_warning) != -1 + ), f"Warning {expected_warning} not found, got {gdal.GetLastErrorMsg()} instead" + assert ds is None + else: + + ds = gdal.OpenEx( + sqlite_db, + gdal.OF_VECTOR | gdal.OF_READONLY, + open_options=open_options, + allowed_drivers=["SQLite"], + ) + + assert ds is not None + + lyr = ds.GetLayer(0) + + assert lyr.GetFeatureCount() == 1 + + lyr_defn = lyr.GetLayerDefn() + + assert lyr_defn.GetFieldCount() == len(expected_field_types) + + if len(expected_field_names) == 0: + expected_field_names = [ + "str", + "int", + "real", + "bool", + "int_str", + "real_str", + "json_str", + "uuid_str", + ] + + feat = lyr.GetNextFeature() + + # Check field types + for i in range(len(expected_field_names)): + try: + expected_type, expected_subtype = expected_field_types[i] + assert feat.GetFieldDefnRef(i).GetType() == expected_type + assert feat.GetFieldDefnRef(i).GetSubType() == expected_subtype + except TypeError: + expected_type = expected_field_types[i] + assert feat.GetFieldDefnRef(i).GetType() == expected_type + assert feat.GetFieldDefnRef(i).GetName() == expected_field_names[i] + + # Test width and precision override + if len(open_options) > 0 and "precision" in open_options[0]: + assert feat.GetFieldDefnRef(2).GetWidth() == 7 + assert feat.GetFieldDefnRef(2).GetPrecision() == 3 + + # Check feature content + if len(expected_field_names) > 0: + if "int" in expected_field_names: + int_sub_type = feat.GetFieldDefnRef("int").GetSubType() + assert ( + feat.GetFieldAsInteger("int") == 1 + if int_sub_type == ogr.OFSTBoolean + else 2 + ) + if "str" in expected_field_names: + assert feat.GetFieldAsString("str") == "1" + if "new_str" in expected_field_names: + assert feat.GetFieldAsString("new_str") == "1" + if "real_str" in expected_field_names: + assert feat.GetFieldAsDouble("real_str") == 3.4 + if "int_str" in expected_field_names: + assert feat.GetFieldAsInteger("int_str") == 2 + else: + assert feat.GetFieldAsInteger("int") == 2 + assert feat.GetFieldAsString("str") == "1" + + if expected_warning: + assert ( + gdal.GetLastErrorMsg().find(expected_warning) != -1 + ), f"Warning {expected_warning} not found, got {gdal.GetLastErrorMsg()} instead" diff --git a/doc/source/drivers/vector/sqlite.rst b/doc/source/drivers/vector/sqlite.rst index 7242910c69c5..f863e4e93bb0 100644 --- a/doc/source/drivers/vector/sqlite.rst +++ b/doc/source/drivers/vector/sqlite.rst @@ -270,6 +270,15 @@ The following open options are supported: The other database must be of a type recognized by this driver, so its geometry blobs are properly recognized (so typically not a GeoPackage one) +- .. oo:: OGR_SCHEMA + :choices: | + :since: 3.11.0 + + Partially or totally overrides the auto-detected schema to use for creating the layer. + The overrides are defined as a JSON list of field definitions. + This can be a filename, a URL or JSON string conformant with the `ogr_fields_override.schema.json schema `_ + + Database creation options ~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/ogr/ogrsf_frmts/sqlite/ogrsqlitebase.h b/ogr/ogrsf_frmts/sqlite/ogrsqlitebase.h index 8c070416db32..c06babada8f0 100644 --- a/ogr/ogrsf_frmts/sqlite/ogrsqlitebase.h +++ b/ogr/ogrsf_frmts/sqlite/ogrsqlitebase.h @@ -148,6 +148,8 @@ class OGRSQLiteBaseDataSource CPL_NON_FINAL : public GDALPamDataset OGRErr DoTransactionCommand(const char *pszCommand); + bool DealWithOgrSchemaOpenOption(CSLConstList papszOpenOptionsIn); + CPL_DISALLOW_COPY_ASSIGN(OGRSQLiteBaseDataSource) public: diff --git a/ogr/ogrsf_frmts/sqlite/ogrsqlitedatasource.cpp b/ogr/ogrsf_frmts/sqlite/ogrsqlitedatasource.cpp index c0a1bd99b3dd..48f0d6bf1c5a 100644 --- a/ogr/ogrsf_frmts/sqlite/ogrsqlitedatasource.cpp +++ b/ogr/ogrsf_frmts/sqlite/ogrsqlitedatasource.cpp @@ -51,6 +51,7 @@ #include "ogr_feature.h" #include "ogr_geometry.h" #include "ogr_spatialref.h" +#include "ogr_schema_override.h" #include "ogrsf_frmts.h" #include "sqlite3.h" @@ -246,6 +247,121 @@ void OGRSQLiteDriverUnload(GDALDriver *) #endif } +/************************************************************************/ +/* DealWithOgrSchemaOpenOption() */ +/************************************************************************/ +bool OGRSQLiteBaseDataSource::DealWithOgrSchemaOpenOption( + CSLConstList papszOpenOptionsIn) +{ + std::string osFieldsSchemaOverrideParam = + CSLFetchNameValueDef(papszOpenOptionsIn, "OGR_SCHEMA", ""); + + if (!osFieldsSchemaOverrideParam.empty()) + { + if (GetUpdate()) + { + CPLError(CE_Failure, CPLE_NotSupported, + "OGR_SCHEMA open option is not supported in update mode."); + return false; + } + + OGRSchemaOverride osSchemaOverride; + if (!osSchemaOverride.LoadFromJSON(osFieldsSchemaOverrideParam) || + !osSchemaOverride.IsValid()) + { + return false; + } + + const auto &oLayerOverrides = osSchemaOverride.GetLayerOverrides(); + for (const auto &oLayer : oLayerOverrides) + { + const auto &oLayerName = oLayer.first; + const auto &oLayerFieldOverride = oLayer.second; + const bool bIsFullOverride{oLayerFieldOverride.IsFullOverride()}; + auto oFieldOverrides = oLayerFieldOverride.GetFieldOverrides(); + std::vector aoFields; + + CPLDebug("SQLite", "Applying schema override for layer %s", + oLayerName.c_str()); + + // Fail if the layer name does not exist + auto poLayer = GetLayerByName(oLayerName.c_str()); + if (poLayer == nullptr) + { + CPLError(CE_Failure, CPLE_AppDefined, + "Layer %s not found in SQLite DB", oLayerName.c_str()); + return false; + } + + // Patch field definitions + auto poLayerDefn = poLayer->GetLayerDefn(); + for (int i = 0; i < poLayerDefn->GetFieldCount(); i++) + { + auto poFieldDefn = poLayerDefn->GetFieldDefn(i); + auto oFieldOverride = + oFieldOverrides.find(poFieldDefn->GetNameRef()); + if (oFieldOverride != oFieldOverrides.cend()) + { + if (oFieldOverride->second.GetFieldType().has_value()) + whileUnsealing(poFieldDefn) + ->SetType( + oFieldOverride->second.GetFieldType().value()); + if (oFieldOverride->second.GetFieldWidth().has_value()) + whileUnsealing(poFieldDefn) + ->SetWidth( + oFieldOverride->second.GetFieldWidth().value()); + if (oFieldOverride->second.GetFieldPrecision().has_value()) + whileUnsealing(poFieldDefn) + ->SetPrecision( + oFieldOverride->second.GetFieldPrecision() + .value()); + if (oFieldOverride->second.GetFieldSubType().has_value()) + whileUnsealing(poFieldDefn) + ->SetSubType( + oFieldOverride->second.GetFieldSubType() + .value()); + if (oFieldOverride->second.GetFieldName().has_value()) + whileUnsealing(poFieldDefn) + ->SetName(oFieldOverride->second.GetFieldName() + .value() + .c_str()); + + if (bIsFullOverride) + { + aoFields.push_back(poFieldDefn); + } + oFieldOverrides.erase(oFieldOverride); + } + } + + // Error if any field override is not found + if (!oFieldOverrides.empty()) + { + CPLError(CE_Failure, CPLE_AppDefined, + "Field %s not found in layer %s", + oFieldOverrides.cbegin()->first.c_str(), + oLayerName.c_str()); + return false; + } + + // Remove fields not in the override + if (bIsFullOverride) + { + for (int i = poLayerDefn->GetFieldCount() - 1; i >= 0; i--) + { + auto poFieldDefn = poLayerDefn->GetFieldDefn(i); + if (std::find(aoFields.begin(), aoFields.end(), + poFieldDefn) == aoFields.end()) + { + whileUnsealing(poLayerDefn)->DeleteFieldDefn(i); + } + } + } + } + } + return true; +} + /************************************************************************/ /* GetSpatialiteVersionNumber() */ /************************************************************************/ @@ -2458,8 +2574,17 @@ bool OGRSQLiteDataSource::Open(GDALOpenInfo *poOpenInfo) continue; if (GDALDataset::GetLayerByName(pszTableName) == nullptr) - OpenTable(pszTableName, true, false, - /* bMayEmitError = */ true); + { + const bool bRet = OpenTable(pszTableName, true, false, + /* bMayEmitError = */ true); + if (!bRet) + { + CPLDebug("SQLITE", "Failed to open layer %s", pszTableName); + sqlite3_free_table(papszResult); + CPLHashSetDestroy(hSet); + return false; + } + } if (bListAllTables) CPLHashSetInsert(hSet, CPLStrdup(pszTableName)); @@ -2912,6 +3037,13 @@ bool OGRSQLiteDataSource::OpenTable(const char *pszTableName, bool bIsTable, /* -------------------------------------------------------------------- */ m_apoLayers.push_back(std::move(poLayer)); + // Remove in case of error in the schema processing + if (!DealWithOgrSchemaOpenOption(papszOpenOptions)) + { + m_apoLayers.pop_back(); + return false; + } + return true; } diff --git a/ogr/ogrsf_frmts/sqlite/ogrsqlitedriver.cpp b/ogr/ogrsf_frmts/sqlite/ogrsqlitedriver.cpp index 24ae73489ea7..1ee09e64de5d 100644 --- a/ogr/ogrsf_frmts/sqlite/ogrsqlitedriver.cpp +++ b/ogr/ogrsf_frmts/sqlite/ogrsqlitedriver.cpp @@ -317,6 +317,11 @@ void RegisterOGRSQLite() "description='Whether to promote 1-bit monochrome raster as 8-bit, so " "as to have higher quality overviews' default='YES'/>" #endif + "