From cd6c35879737cb93e4a2f96ef9f18252020ebb34 Mon Sep 17 00:00:00 2001 From: ingvord Date: Thu, 19 Sep 2024 17:12:59 +0200 Subject: [PATCH] Add metadataTypes endpoint - readonly; available for everyone --- src/datasets/datasets.controller.ts | 23 ++++++++++++++++++ src/datasets/datasets.service.ts | 37 +++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/src/datasets/datasets.controller.ts b/src/datasets/datasets.controller.ts index 0d7706ee1..33d798bc4 100644 --- a/src/datasets/datasets.controller.ts +++ b/src/datasets/datasets.controller.ts @@ -888,6 +888,29 @@ export class DatasetsController { return this.datasetsService.metadataKeys(parsedFilters); } + // GET /datasets/metadataTypes + // @UseGuards(PoliciesGuard) + // @CheckPolicies((ability: AppAbility) => + // ability.can(Action.DatasetRead, DatasetClass), + // ) + @Get("/metadataTypes") + @ApiOperation({ + summary: "Retrieve all available scientific metadata field types.", + description: + "This endpoint returns the field names and their corresponding data types from the `scientificMetadata` fields of all datasets in the system. It aggregates all the unique field names and their associated types from the dataset collection.", + }) + @ApiResponse({ + status: 200, + type: Array, + description: + "Returns an array of objects where each object contains a metadata field name and its associated type.", + }) + async metadataTypes(): Promise { + const result = this.datasetsService.getScientificMetadataTypes(); + + return result; + } + // GET /datasets/findOne @UseGuards(PoliciesGuard) @CheckPolicies("datasets", (ability: AppAbility) => diff --git a/src/datasets/datasets.service.ts b/src/datasets/datasets.service.ts index c78974c1e..b49de4269 100644 --- a/src/datasets/datasets.service.ts +++ b/src/datasets/datasets.service.ts @@ -476,6 +476,43 @@ export class DatasetsService { } } + async getScientificMetadataTypes() { + // TODO performance research required, say 1K records, 100K and 1M + return this.datasetModel.aggregate([ + { + // Step 1: Convert the scientificMetadata object into an array of key-value pairs + $project: { + scientificMetadataArray: { $objectToArray: "$scientificMetadata" }, + }, + }, + { + // Step 2: Unwind the array to treat each field individually + $unwind: "$scientificMetadataArray", + }, + { + // Step 3: Group by the field name and accumulate types + $group: { + _id: "$scientificMetadataArray.k", // Group by field name (key) + types: { $addToSet: { $type: "$scientificMetadataArray.v.value" } }, // Add the data type of the value + }, + }, + { + // Step 4: Convert the array of types into a string for each field name + $project: { + _id: 0, // Don't include the default _id + metadataKey: "$_id", + metadataType: { + $cond: { + if: { $eq: [{ $size: "$types" }, 1] }, // If only one type is present + then: { $arrayElemAt: ["$types", 0] }, // Use the type + else: "mixed", // If there are multiple types, mark it as 'mixed' + }, + }, + }, + }, + ]); + } + async isElasticSearchDBEmpty() { if (!this.ESClient) return; const count = await this.ESClient.getCount();