From d859ca757508de7e951676c01fef9cb74d456950 Mon Sep 17 00:00:00 2001 From: duonganhthu43 Date: Wed, 3 Apr 2024 11:06:54 +0700 Subject: [PATCH] chore: add description in FieldDefinition for LLM semantics --- dozer-ingestion/aerospike/src/connector.rs | 1 + dozer-ingestion/ethereum/src/log/helper.rs | 13 +++++++++++++ dozer-ingestion/ethereum/src/trace/helper.rs | 8 ++++++++ dozer-ingestion/javascript/src/lib.rs | 1 + dozer-ingestion/kafka/src/debezium/schema.rs | 3 +++ .../kafka/src/debezium/schema_registry.rs | 1 + .../kafka/src/no_schema_registry_basic.rs | 2 ++ dozer-ingestion/kafka/src/schema_registry_basic.rs | 1 + dozer-ingestion/mongodb/src/lib.rs | 2 ++ dozer-ingestion/mysql/src/connector.rs | 1 + dozer-ingestion/object-store/src/schema_helper.rs | 1 + dozer-ingestion/oracle/src/connector/mapping.rs | 1 + dozer-ingestion/postgres/src/helper.rs | 1 + dozer-ingestion/snowflake/src/connection/client.rs | 1 + dozer-sink-clickhouse/src/metadata.rs | 2 ++ dozer-sink-oracle/src/lib.rs | 6 ++++++ dozer-types/src/arrow_types/from_arrow.rs | 1 + dozer-types/src/types/mod.rs | 2 ++ 18 files changed, 48 insertions(+) diff --git a/dozer-ingestion/aerospike/src/connector.rs b/dozer-ingestion/aerospike/src/connector.rs index ad6b728e43..663c5fdbcb 100644 --- a/dozer-ingestion/aerospike/src/connector.rs +++ b/dozer-ingestion/aerospike/src/connector.rs @@ -525,6 +525,7 @@ impl Connector for AerospikeConnector { }, nullable: name != "PK", source: Default::default(), + description: None, }) .collect(), primary_index, diff --git a/dozer-ingestion/ethereum/src/log/helper.rs b/dozer-ingestion/ethereum/src/log/helper.rs index 9afecb8113..609d49df6c 100644 --- a/dozer-ingestion/ethereum/src/log/helper.rs +++ b/dozer-ingestion/ethereum/src/log/helper.rs @@ -38,6 +38,7 @@ pub fn get_contract_event_schemas( }, nullable: false, source: SourceDefinition::Dynamic, + description: None, }); } @@ -231,72 +232,84 @@ pub fn get_eth_schema() -> Schema { typ: FieldType::UInt, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "address".to_string(), typ: FieldType::String, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "topics".to_string(), typ: FieldType::String, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "data".to_string(), typ: FieldType::Binary, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "block_hash".to_string(), typ: FieldType::String, nullable: true, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "block_number".to_string(), typ: FieldType::UInt, nullable: true, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "transaction_hash".to_string(), typ: FieldType::String, nullable: true, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "transaction_index".to_string(), typ: FieldType::Int, nullable: true, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "log_index".to_string(), typ: FieldType::Int, nullable: true, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "transaction_log_index".to_string(), typ: FieldType::Int, nullable: true, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "log_type".to_string(), typ: FieldType::String, nullable: true, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "removed".to_string(), typ: FieldType::Boolean, nullable: true, source: SourceDefinition::Dynamic, + description: None, }, ], diff --git a/dozer-ingestion/ethereum/src/trace/helper.rs b/dozer-ingestion/ethereum/src/trace/helper.rs index 1ae76db81d..b69fbf67bd 100644 --- a/dozer-ingestion/ethereum/src/trace/helper.rs +++ b/dozer-ingestion/ethereum/src/trace/helper.rs @@ -93,48 +93,56 @@ pub fn get_trace_schema() -> Schema { typ: FieldType::String, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "from".to_string(), typ: FieldType::String, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "to".to_string(), typ: FieldType::String, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "value".to_string(), typ: FieldType::UInt, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "gas".to_string(), typ: FieldType::UInt, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "gas_used".to_string(), typ: FieldType::UInt, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "input".to_string(), typ: FieldType::Text, nullable: true, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "output".to_string(), typ: FieldType::Text, nullable: true, source: SourceDefinition::Dynamic, + description: None, }, ], primary_index: vec![], diff --git a/dozer-ingestion/javascript/src/lib.rs b/dozer-ingestion/javascript/src/lib.rs index 5f857b47db..ba9fef603a 100644 --- a/dozer-ingestion/javascript/src/lib.rs +++ b/dozer-ingestion/javascript/src/lib.rs @@ -67,6 +67,7 @@ impl Connector for JavaScriptConnector { typ: FieldType::Json, nullable: false, source: SourceDefinition::Dynamic, + description: None, }], primary_index: vec![], }, diff --git a/dozer-ingestion/kafka/src/debezium/schema.rs b/dozer-ingestion/kafka/src/debezium/schema.rs index 7a801921ab..76ee9bb5a0 100644 --- a/dozer-ingestion/kafka/src/debezium/schema.rs +++ b/dozer-ingestion/kafka/src/debezium/schema.rs @@ -82,6 +82,7 @@ pub fn map_schema( typ, nullable: f.optional.map_or(false, |o| o), source: SourceDefinition::Dynamic, + description: None, }) }) .collect(), @@ -196,12 +197,14 @@ mod tests { typ: FieldType::Int, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "name".to_string(), typ: FieldType::String, nullable: true, source: SourceDefinition::Dynamic, + description: None, }, ], primary_index: vec![0], diff --git a/dozer-ingestion/kafka/src/debezium/schema_registry.rs b/dozer-ingestion/kafka/src/debezium/schema_registry.rs index 9f4f154869..d88644ca46 100644 --- a/dozer-ingestion/kafka/src/debezium/schema_registry.rs +++ b/dozer-ingestion/kafka/src/debezium/schema_registry.rs @@ -133,6 +133,7 @@ impl SchemaRegistry { typ, nullable, source: SourceDefinition::Dynamic, + description: None, }) }) .collect(); diff --git a/dozer-ingestion/kafka/src/no_schema_registry_basic.rs b/dozer-ingestion/kafka/src/no_schema_registry_basic.rs index 890020a4f2..20f93a7b35 100644 --- a/dozer-ingestion/kafka/src/no_schema_registry_basic.rs +++ b/dozer-ingestion/kafka/src/no_schema_registry_basic.rs @@ -16,12 +16,14 @@ impl NoSchemaRegistryBasic { typ: FieldType::String, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "message".to_string(), typ: FieldType::String, nullable: true, source: SourceDefinition::Dynamic, + description: None, }, ], primary_index: vec![0], diff --git a/dozer-ingestion/kafka/src/schema_registry_basic.rs b/dozer-ingestion/kafka/src/schema_registry_basic.rs index d07243c549..634ca1f030 100644 --- a/dozer-ingestion/kafka/src/schema_registry_basic.rs +++ b/dozer-ingestion/kafka/src/schema_registry_basic.rs @@ -51,6 +51,7 @@ impl SchemaRegistryBasic { typ, nullable, source: SourceDefinition::Dynamic, + description: None, }) }) .collect(); diff --git a/dozer-ingestion/mongodb/src/lib.rs b/dozer-ingestion/mongodb/src/lib.rs index 1e878c0296..190be41267 100644 --- a/dozer-ingestion/mongodb/src/lib.rs +++ b/dozer-ingestion/mongodb/src/lib.rs @@ -501,12 +501,14 @@ impl Connector for MongodbConnector { typ: FieldType::Json, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, FieldDefinition { name: "data".to_owned(), typ: FieldType::Json, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, ], primary_index: vec![0], diff --git a/dozer-ingestion/mysql/src/connector.rs b/dozer-ingestion/mysql/src/connector.rs index 44c99decc4..5fea4c8a50 100644 --- a/dozer-ingestion/mysql/src/connector.rs +++ b/dozer-ingestion/mysql/src/connector.rs @@ -177,6 +177,7 @@ impl Connector for MySQLConnector { typ, nullable, source: SourceDefinition::Dynamic, + description: None, } }, ) diff --git a/dozer-ingestion/object-store/src/schema_helper.rs b/dozer-ingestion/object-store/src/schema_helper.rs index 56735f80ae..2f18df30cc 100644 --- a/dozer-ingestion/object-store/src/schema_helper.rs +++ b/dozer-ingestion/object-store/src/schema_helper.rs @@ -52,6 +52,7 @@ pub fn map_schema_to_dozer<'a, I: Iterator>>( typ: mapped_field_type, nullable: field.is_nullable(), source: SourceDefinition::Dynamic, + description: None, }) }) .collect() diff --git a/dozer-ingestion/oracle/src/connector/mapping.rs b/dozer-ingestion/oracle/src/connector/mapping.rs index 0e8a545bb9..718dce3349 100644 --- a/dozer-ingestion/oracle/src/connector/mapping.rs +++ b/dozer-ingestion/oracle/src/connector/mapping.rs @@ -215,6 +215,7 @@ pub fn decide_schema( connection: connection.to_string(), name: table_name.clone(), }, + description: None, }), Err(err) => return Err(Error::DataType(err.clone())), } diff --git a/dozer-ingestion/postgres/src/helper.rs b/dozer-ingestion/postgres/src/helper.rs index 4a90722da1..9d2fb3f4c5 100644 --- a/dozer-ingestion/postgres/src/helper.rs +++ b/dozer-ingestion/postgres/src/helper.rs @@ -362,6 +362,7 @@ pub fn convert_column_to_field(column: &Column) -> Result Client<'env> { typ, nullable: *nullable, source: SourceDefinition::Dynamic, + description: None, }); } } diff --git a/dozer-sink-clickhouse/src/metadata.rs b/dozer-sink-clickhouse/src/metadata.rs index 4d3b409e02..d5c6075c80 100644 --- a/dozer-sink-clickhouse/src/metadata.rs +++ b/dozer-sink-clickhouse/src/metadata.rs @@ -29,6 +29,7 @@ impl ReplicationMetadata { typ: FieldType::String, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, true, ) @@ -38,6 +39,7 @@ impl ReplicationMetadata { typ: FieldType::UInt, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, false, ) diff --git a/dozer-sink-oracle/src/lib.rs b/dozer-sink-oracle/src/lib.rs index 66970af5ae..77efb4480e 100644 --- a/dozer-sink-oracle/src/lib.rs +++ b/dozer-sink-oracle/src/lib.rs @@ -516,6 +516,7 @@ impl SinkFactory for OracleSinkFactory { typ: FieldType::UInt, nullable: true, source: dozer_types::types::SourceDefinition::Dynamic, + description: None, }, false, ); @@ -525,6 +526,7 @@ impl SinkFactory for OracleSinkFactory { typ: FieldType::UInt, nullable: true, source: dozer_types::types::SourceDefinition::Dynamic, + description: None, }, false, ); @@ -558,6 +560,7 @@ impl SinkFactory for OracleSinkFactory { typ: FieldType::String, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, true, ) @@ -567,6 +570,7 @@ impl SinkFactory for OracleSinkFactory { typ: FieldType::UInt, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, false, ), @@ -944,6 +948,7 @@ mod tests { typ: FieldType::UInt, nullable: false, source: SourceDefinition::Dynamic, + description: None, }, true, ); @@ -976,6 +981,7 @@ mod tests { typ: FieldType::String, nullable: false, source: SourceDefinition::Dynamic, + description: None, } } } diff --git a/dozer-types/src/arrow_types/from_arrow.rs b/dozer-types/src/arrow_types/from_arrow.rs index 6117d4fd5e..b76943a5a9 100644 --- a/dozer-types/src/arrow_types/from_arrow.rs +++ b/dozer-types/src/arrow_types/from_arrow.rs @@ -189,6 +189,7 @@ fn handle_with_dozer_schema( typ, nullable: field.is_nullable(), source: SourceDefinition::Dynamic, + description: None, }); } diff --git a/dozer-types/src/types/mod.rs b/dozer-types/src/types/mod.rs index 507d226f03..31d111d289 100644 --- a/dozer-types/src/types/mod.rs +++ b/dozer-types/src/types/mod.rs @@ -53,6 +53,7 @@ pub struct FieldDefinition { pub nullable: bool, #[serde(default)] pub source: SourceDefinition, + pub description: Option, } impl FieldDefinition { @@ -62,6 +63,7 @@ impl FieldDefinition { typ, nullable, source, + description: None, } }