From b0a58ea6fc4da532665cdb54d82124659b11c7c4 Mon Sep 17 00:00:00 2001 From: Maud Ehrmann Date: Fri, 29 Nov 2024 10:32:18 +0100 Subject: [PATCH] add markdown documentation for doc embedding schema --- ...s-backup-properties-the-embedder-schema.md | 9 ++ ...s-the-embedding-schema-the-items-schema.md | 9 ++ ...-backup-properties-the-embedding-schema.md | 3 + ...gs-docs-backup-properties-the-id-schema.md | 19 +++ ...ocs-backup-properties-the-length-schema.md | 9 ++ ...gs-docs-backup-properties-the-ts-schema.md | 19 +++ docs/embeddings-docs-backup.md | 147 ++++++++++++++++++ docs/embeddings-docs-properties-ci_id.md | 9 ++ docs/embeddings-docs-properties-ci_type.md | 3 + ...docs-properties-embedding-oneof-0-items.md | 9 ++ ...dings-docs-properties-embedding-oneof-0.md | 3 + ...roperties-embedding-oneof-1-items-items.md | 9 ++ ...docs-properties-embedding-oneof-1-items.md | 3 + ...dings-docs-properties-embedding-oneof-1.md | 3 + docs/embeddings-docs-properties-embedding.md | 9 ++ docs/embeddings-docs-properties-model_id.md | 3 + docs/embeddings-docs-properties-size.md | 3 + docs/embeddings-docs-properties-ts.md | 7 + docs/embeddings-docs.md | 138 ++++++++++++++++ 19 files changed, 414 insertions(+) create mode 100644 docs/embeddings-docs-backup-properties-the-embedder-schema.md create mode 100644 docs/embeddings-docs-backup-properties-the-embedding-schema-the-items-schema.md create mode 100644 docs/embeddings-docs-backup-properties-the-embedding-schema.md create mode 100644 docs/embeddings-docs-backup-properties-the-id-schema.md create mode 100644 docs/embeddings-docs-backup-properties-the-length-schema.md create mode 100644 docs/embeddings-docs-backup-properties-the-ts-schema.md create mode 100644 docs/embeddings-docs-backup.md create mode 100644 docs/embeddings-docs-properties-ci_id.md create mode 100644 docs/embeddings-docs-properties-ci_type.md create mode 100644 docs/embeddings-docs-properties-embedding-oneof-0-items.md create mode 100644 docs/embeddings-docs-properties-embedding-oneof-0.md create mode 100644 docs/embeddings-docs-properties-embedding-oneof-1-items-items.md create mode 100644 docs/embeddings-docs-properties-embedding-oneof-1-items.md create mode 100644 docs/embeddings-docs-properties-embedding-oneof-1.md create mode 100644 docs/embeddings-docs-properties-embedding.md create mode 100644 docs/embeddings-docs-properties-model_id.md create mode 100644 docs/embeddings-docs-properties-size.md create mode 100644 docs/embeddings-docs-properties-ts.md create mode 100644 docs/embeddings-docs.md diff --git a/docs/embeddings-docs-backup-properties-the-embedder-schema.md b/docs/embeddings-docs-backup-properties-the-embedder-schema.md new file mode 100644 index 0000000..3505566 --- /dev/null +++ b/docs/embeddings-docs-backup-properties-the-embedder-schema.md @@ -0,0 +1,9 @@ +## embedder Type + +`string` ([The Embedder Schema](embeddings-docs-backup-properties-the-embedder-schema.md)) + +## embedder Examples + +```json +"Alibaba-NLP/gte-multilingual-base@f7d567e" +``` diff --git a/docs/embeddings-docs-backup-properties-the-embedding-schema-the-items-schema.md b/docs/embeddings-docs-backup-properties-the-embedding-schema-the-items-schema.md new file mode 100644 index 0000000..f02aff9 --- /dev/null +++ b/docs/embeddings-docs-backup-properties-the-embedding-schema-the-items-schema.md @@ -0,0 +1,9 @@ +## items Type + +`number` ([The Items Schema](embeddings-docs-backup-properties-the-embedding-schema-the-items-schema.md)) + +## items Examples + +```json +-0.11429 +``` diff --git a/docs/embeddings-docs-backup-properties-the-embedding-schema.md b/docs/embeddings-docs-backup-properties-the-embedding-schema.md new file mode 100644 index 0000000..61334a6 --- /dev/null +++ b/docs/embeddings-docs-backup-properties-the-embedding-schema.md @@ -0,0 +1,3 @@ +## embedding Type + +`number[]` ([The Items Schema](embeddings-docs-backup-properties-the-embedding-schema-the-items-schema.md)) diff --git a/docs/embeddings-docs-backup-properties-the-id-schema.md b/docs/embeddings-docs-backup-properties-the-id-schema.md new file mode 100644 index 0000000..4d28270 --- /dev/null +++ b/docs/embeddings-docs-backup-properties-the-id-schema.md @@ -0,0 +1,19 @@ +## id Type + +`string` ([The Id Schema](embeddings-docs-backup-properties-the-id-schema.md)) + +## id Constraints + +**pattern**: the string must match the following regular expression: + +```regexp +^(.*)$ +``` + +[try pattern](https://regexr.com/?expression=%5E\(.*\)%24 "try regular expression with regexr.com") + +## id Examples + +```json +"actionfem-1940-01-08-a-i0001" +``` diff --git a/docs/embeddings-docs-backup-properties-the-length-schema.md b/docs/embeddings-docs-backup-properties-the-length-schema.md new file mode 100644 index 0000000..d5116b3 --- /dev/null +++ b/docs/embeddings-docs-backup-properties-the-length-schema.md @@ -0,0 +1,9 @@ +## len Type + +`integer` ([The Length Schema](embeddings-docs-backup-properties-the-length-schema.md)) + +## len Examples + +```json +2976 +``` diff --git a/docs/embeddings-docs-backup-properties-the-ts-schema.md b/docs/embeddings-docs-backup-properties-the-ts-schema.md new file mode 100644 index 0000000..ab083eb --- /dev/null +++ b/docs/embeddings-docs-backup-properties-the-ts-schema.md @@ -0,0 +1,19 @@ +## ts Type + +`string` ([The Ts Schema](embeddings-docs-backup-properties-the-ts-schema.md)) + +## ts Constraints + +**pattern**: the string must match the following regular expression: + +```regexp +^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}(\+00:00|Z)$ +``` + +[try pattern](https://regexr.com/?expression=%5E%5B0-9%5D%7B4%7D-%5B0-9%5D%7B2%7D-%5B0-9%5D%7B2%7DT%5B0-9%5D%7B2%7D%3A%5B0-9%5D%7B2%7D%3A%5B0-9%5D%7B2%7D\(%5C%2B00%3A00%7CZ\)%24 "try regular expression with regexr.com") + +## ts Examples + +```json +"2024-08-29T06:42:53+00:00Z" +``` diff --git a/docs/embeddings-docs-backup.md b/docs/embeddings-docs-backup.md new file mode 100644 index 0000000..97241d2 --- /dev/null +++ b/docs/embeddings-docs-backup.md @@ -0,0 +1,147 @@ +## Document Embeddings JSON Schema Type + +`object` ([Document Embeddings JSON Schema](embeddings-docs-backup.md)) + +# Document Embeddings JSON Schema Properties + +| Property | Type | Required | Nullable | Defined by | +| :---------------------- | :-------- | :------- | :------------- | :------------------------------------------------------------------------------------------------------------------------------------------ | +| [id](#id) | `string` | Required | cannot be null | [Document Embeddings JSON Schema](embeddings-docs-backup-properties-the-id-schema.md "#/properties/id#/properties/id") | +| [ts](#ts) | `string` | Required | cannot be null | [Document Embeddings JSON Schema](embeddings-docs-backup-properties-the-ts-schema.md "#/properties/ts#/properties/ts") | +| [embedder](#embedder) | `string` | Required | cannot be null | [Document Embeddings JSON Schema](embeddings-docs-backup-properties-the-embedder-schema.md "#/properties/embedder#/properties/embedder") | +| [len](#len) | `integer` | Optional | cannot be null | [Document Embeddings JSON Schema](embeddings-docs-backup-properties-the-length-schema.md "#/properties/len#/properties/len") | +| [embedding](#embedding) | `array` | Required | cannot be null | [Document Embeddings JSON Schema](embeddings-docs-backup-properties-the-embedding-schema.md "#/properties/embedding#/properties/embedding") | + +## id + +The unique identifier for a content item, cf. + +`id` + +* is required + +* Type: `string` ([The Id Schema](embeddings-docs-backup-properties-the-id-schema.md)) + +* cannot be null + +* defined in: [Document Embeddings JSON Schema](embeddings-docs-backup-properties-the-id-schema.md "#/properties/id#/properties/id") + +### id Type + +`string` ([The Id Schema](embeddings-docs-backup-properties-the-id-schema.md)) + +### id Constraints + +**pattern**: the string must match the following regular expression: + +```regexp +^(.*)$ +``` + +[try pattern](https://regexr.com/?expression=%5E\(.*\)%24 "try regular expression with regexr.com") + +### id Examples + +```json +"actionfem-1940-01-08-a-i0001" +``` + +## ts + +The timestamp when the embeddings were created + +`ts` + +* is required + +* Type: `string` ([The Ts Schema](embeddings-docs-backup-properties-the-ts-schema.md)) + +* cannot be null + +* defined in: [Document Embeddings JSON Schema](embeddings-docs-backup-properties-the-ts-schema.md "#/properties/ts#/properties/ts") + +### ts Type + +`string` ([The Ts Schema](embeddings-docs-backup-properties-the-ts-schema.md)) + +### ts Constraints + +**pattern**: the string must match the following regular expression: + +```regexp +^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}(\+00:00|Z)$ +``` + +[try pattern](https://regexr.com/?expression=%5E%5B0-9%5D%7B4%7D-%5B0-9%5D%7B2%7D-%5B0-9%5D%7B2%7DT%5B0-9%5D%7B2%7D%3A%5B0-9%5D%7B2%7D%3A%5B0-9%5D%7B2%7D\(%5C%2B00%3A00%7CZ\)%24 "try regular expression with regexr.com") + +### ts Examples + +```json +"2024-08-29T06:42:53+00:00Z" +``` + +## embedder + +The model or tool used to generate the embeddings + +`embedder` + +* is required + +* Type: `string` ([The Embedder Schema](embeddings-docs-backup-properties-the-embedder-schema.md)) + +* cannot be null + +* defined in: [Document Embeddings JSON Schema](embeddings-docs-backup-properties-the-embedder-schema.md "#/properties/embedder#/properties/embedder") + +### embedder Type + +`string` ([The Embedder Schema](embeddings-docs-backup-properties-the-embedder-schema.md)) + +### embedder Examples + +```json +"Alibaba-NLP/gte-multilingual-base@f7d567e" +``` + +## len + +The length of the document in characters. + +`len` + +* is optional + +* Type: `integer` ([The Length Schema](embeddings-docs-backup-properties-the-length-schema.md)) + +* cannot be null + +* defined in: [Document Embeddings JSON Schema](embeddings-docs-backup-properties-the-length-schema.md "#/properties/len#/properties/len") + +### len Type + +`integer` ([The Length Schema](embeddings-docs-backup-properties-the-length-schema.md)) + +### len Examples + +```json +2976 +``` + +## embedding + +The vector embeddings of the document + +`embedding` + +* is required + +* Type: `number[]` ([The Items Schema](embeddings-docs-backup-properties-the-embedding-schema-the-items-schema.md)) + +* cannot be null + +* defined in: [Document Embeddings JSON Schema](embeddings-docs-backup-properties-the-embedding-schema.md "#/properties/embedding#/properties/embedding") + +### embedding Type + +`number[]` ([The Items Schema](embeddings-docs-backup-properties-the-embedding-schema-the-items-schema.md)) diff --git a/docs/embeddings-docs-properties-ci_id.md b/docs/embeddings-docs-properties-ci_id.md new file mode 100644 index 0000000..ded5308 --- /dev/null +++ b/docs/embeddings-docs-properties-ci_id.md @@ -0,0 +1,9 @@ +## ci\_id Type + +`string` + +## ci\_id Examples + +```json +"actionfem-1940-01-08-a-i0001" +``` diff --git a/docs/embeddings-docs-properties-ci_type.md b/docs/embeddings-docs-properties-ci_type.md new file mode 100644 index 0000000..f2d9efa --- /dev/null +++ b/docs/embeddings-docs-properties-ci_type.md @@ -0,0 +1,3 @@ +## ci\_type Type + +`string` diff --git a/docs/embeddings-docs-properties-embedding-oneof-0-items.md b/docs/embeddings-docs-properties-embedding-oneof-0-items.md new file mode 100644 index 0000000..9ff1ad5 --- /dev/null +++ b/docs/embeddings-docs-properties-embedding-oneof-0-items.md @@ -0,0 +1,9 @@ +## items Type + +`number` + +## items Examples + +```json +-0.11429 +``` diff --git a/docs/embeddings-docs-properties-embedding-oneof-0.md b/docs/embeddings-docs-properties-embedding-oneof-0.md new file mode 100644 index 0000000..1a247e3 --- /dev/null +++ b/docs/embeddings-docs-properties-embedding-oneof-0.md @@ -0,0 +1,3 @@ +## 0 Type + +`number[]` diff --git a/docs/embeddings-docs-properties-embedding-oneof-1-items-items.md b/docs/embeddings-docs-properties-embedding-oneof-1-items-items.md new file mode 100644 index 0000000..9ff1ad5 --- /dev/null +++ b/docs/embeddings-docs-properties-embedding-oneof-1-items-items.md @@ -0,0 +1,9 @@ +## items Type + +`number` + +## items Examples + +```json +-0.11429 +``` diff --git a/docs/embeddings-docs-properties-embedding-oneof-1-items.md b/docs/embeddings-docs-properties-embedding-oneof-1-items.md new file mode 100644 index 0000000..a86278e --- /dev/null +++ b/docs/embeddings-docs-properties-embedding-oneof-1-items.md @@ -0,0 +1,3 @@ +## items Type + +`number[]` diff --git a/docs/embeddings-docs-properties-embedding-oneof-1.md b/docs/embeddings-docs-properties-embedding-oneof-1.md new file mode 100644 index 0000000..2441c7b --- /dev/null +++ b/docs/embeddings-docs-properties-embedding-oneof-1.md @@ -0,0 +1,3 @@ +## 1 Type + +`number[][]` diff --git a/docs/embeddings-docs-properties-embedding.md b/docs/embeddings-docs-properties-embedding.md new file mode 100644 index 0000000..0d7aad5 --- /dev/null +++ b/docs/embeddings-docs-properties-embedding.md @@ -0,0 +1,9 @@ +## embedding Type + +merged type ([Details](embeddings-docs-properties-embedding.md)) + +one (and only one) of + +* [Untitled array in Document Embeddings JSON Schema](embeddings-docs-properties-embedding-oneof-0.md "check type definition") + +* [Untitled array in Document Embeddings JSON Schema](embeddings-docs-properties-embedding-oneof-1.md "check type definition") diff --git a/docs/embeddings-docs-properties-model_id.md b/docs/embeddings-docs-properties-model_id.md new file mode 100644 index 0000000..33559ba --- /dev/null +++ b/docs/embeddings-docs-properties-model_id.md @@ -0,0 +1,3 @@ +## model\_id Type + +`string` diff --git a/docs/embeddings-docs-properties-size.md b/docs/embeddings-docs-properties-size.md new file mode 100644 index 0000000..9286eaa --- /dev/null +++ b/docs/embeddings-docs-properties-size.md @@ -0,0 +1,3 @@ +## size Type + +`integer` diff --git a/docs/embeddings-docs-properties-ts.md b/docs/embeddings-docs-properties-ts.md new file mode 100644 index 0000000..669f217 --- /dev/null +++ b/docs/embeddings-docs-properties-ts.md @@ -0,0 +1,7 @@ +## ts Type + +`string` + +## ts Constraints + +**date time**: the string must be a date time string, according to [RFC 3339, section 5.6](https://tools.ietf.org/html/rfc3339 "check the specification") diff --git a/docs/embeddings-docs.md b/docs/embeddings-docs.md new file mode 100644 index 0000000..d6712fb --- /dev/null +++ b/docs/embeddings-docs.md @@ -0,0 +1,138 @@ +## Document Embeddings JSON Schema Type + +`object` ([Document Embeddings JSON Schema](embeddings-docs.md)) + +# Document Embeddings JSON Schema Properties + +| Property | Type | Required | Nullable | Defined by | +| :---------------------- | :-------- | :------- | :------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| [ci\_id](#ci_id) | `string` | Required | cannot be null | [Document Embeddings JSON Schema](embeddings-docs-properties-ci_id.md "https://impresso.github.io/impresso-schemas/json/embeddings/embeddings-docs.schema.json#/properties/ci_id") | +| [ci\_type](#ci_type) | `string` | Optional | cannot be null | [Document Embeddings JSON Schema](embeddings-docs-properties-ci_type.md "https://impresso.github.io/impresso-schemas/json/embeddings/embeddings-docs.schema.json#/properties/ci_type") | +| [model\_id](#model_id) | `string` | Required | cannot be null | [Document Embeddings JSON Schema](embeddings-docs-properties-model_id.md "https://impresso.github.io/impresso-schemas/json/embeddings/embeddings-docs.schema.json#/properties/model_id") | +| [embedding](#embedding) | Merged | Required | cannot be null | [Document Embeddings JSON Schema](embeddings-docs-properties-embedding.md "https://impresso.github.io/impresso-schemas/json/embeddings/embeddings-docs.schema.json#/properties/embedding") | +| [size](#size) | `integer` | Required | cannot be null | [Document Embeddings JSON Schema](embeddings-docs-properties-size.md "https://impresso.github.io/impresso-schemas/json/embeddings/embeddings-docs.schema.json#/properties/size") | +| [ts](#ts) | `string` | Optional | cannot be null | [Document Embeddings JSON Schema](embeddings-docs-properties-ts.md "https://impresso.github.io/impresso-schemas/json/embeddings/embeddings-docs.schema.json#/properties/ts") | + +## ci\_id + +Unique identifier for the content item. + +`ci_id` + +* is required + +* Type: `string` + +* cannot be null + +* defined in: [Document Embeddings JSON Schema](embeddings-docs-properties-ci_id.md "https://impresso.github.io/impresso-schemas/json/embeddings/embeddings-docs.schema.json#/properties/ci_id") + +### ci\_id Type + +`string` + +### ci\_id Examples + +```json +"actionfem-1940-01-08-a-i0001" +``` + +## ci\_type + +Type of content item as present in the rebuilt + +`ci_type` + +* is optional + +* Type: `string` + +* cannot be null + +* defined in: [Document Embeddings JSON Schema](embeddings-docs-properties-ci_type.md "https://impresso.github.io/impresso-schemas/json/embeddings/embeddings-docs.schema.json#/properties/ci_type") + +### ci\_type Type + +`string` + +## model\_id + +The ID of the model that produced this output, composed as specified in these slides: . + +`model_id` + +* is required + +* Type: `string` + +* cannot be null + +* defined in: [Document Embeddings JSON Schema](embeddings-docs-properties-model_id.md "https://impresso.github.io/impresso-schemas/json/embeddings/embeddings-docs.schema.json#/properties/model_id") + +### model\_id Type + +`string` + +## embedding + + + +`embedding` + +* is required + +* Type: merged type ([Details](embeddings-docs-properties-embedding.md)) + +* cannot be null + +* defined in: [Document Embeddings JSON Schema](embeddings-docs-properties-embedding.md "https://impresso.github.io/impresso-schemas/json/embeddings/embeddings-docs.schema.json#/properties/embedding") + +### embedding Type + +merged type ([Details](embeddings-docs-properties-embedding.md)) + +one (and only one) of + +* [Untitled array in Document Embeddings JSON Schema](embeddings-docs-properties-embedding-oneof-0.md "check type definition") + +* [Untitled array in Document Embeddings JSON Schema](embeddings-docs-properties-embedding-oneof-1.md "check type definition") + +## size + +The size of the embedding vectors. + +`size` + +* is required + +* Type: `integer` + +* cannot be null + +* defined in: [Document Embeddings JSON Schema](embeddings-docs-properties-size.md "https://impresso.github.io/impresso-schemas/json/embeddings/embeddings-docs.schema.json#/properties/size") + +### size Type + +`integer` + +## ts + +Timestamp indicating when the embedding was created (e.g., '2024-10-09T09:29:02Z'). + +`ts` + +* is optional + +* Type: `string` + +* cannot be null + +* defined in: [Document Embeddings JSON Schema](embeddings-docs-properties-ts.md "https://impresso.github.io/impresso-schemas/json/embeddings/embeddings-docs.schema.json#/properties/ts") + +### ts Type + +`string` + +### ts Constraints + +**date time**: the string must be a date time string, according to [RFC 3339, section 5.6](https://tools.ietf.org/html/rfc3339 "check the specification")