-
Notifications
You must be signed in to change notification settings - Fork 18
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix(gravsearch): Prevent duplicate results #1626
Changes from 46 commits
3dbb59c
b6d3cf2
49f4214
9171746
15c3be5
cae954d
d6c8fc0
7f1d91f
623630c
5d1b28e
e86b20b
c25808e
d8d1c38
da23fc6
baeff8a
144863d
f3ed00c
5de2b47
ba61578
8a0c8c6
ce602dc
9499f85
62d843c
81ec5c9
06a2c41
d56811c
01e1eef
85541cd
40fa184
926c942
b7a132e
6bf6d43
d6a0f3a
3934254
717da58
b46c4f0
5232359
8617571
8e81c67
c3b19cd
9794698
8993daa
3db3a82
c7b9703
9f38418
8d4fdfb
06be757
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -146,13 +146,13 @@ class SearchResponderV2(responderData: ResponderData) extends ResponderWithStand | |
// _ = println(searchSparql) | ||
|
||
prequeryResponseNotMerged: SparqlSelectResponse <- (storeManager ? SparqlSelectRequest(searchSparql)).mapTo[SparqlSelectResponse] | ||
// _ = println(prequeryResponseNotMerged) | ||
|
||
mainResourceVar = QueryVariable("resource") | ||
|
||
// Merge rows with the same resource IRI. | ||
prequeryResponse = mergePrequeryResults(prequeryResponseNotMerged, mainResourceVar) | ||
|
||
// _ = println(prequeryResponse) | ||
|
||
// a sequence of resource IRIs that match the search criteria | ||
// attention: no permission checking has been done so far | ||
resourceIris: Seq[IRI] = prequeryResponse.results.bindings.map { | ||
|
@@ -240,6 +240,7 @@ class SearchResponderV2(responderData: ResponderData) extends ResponderWithStand | |
apiResponse: ReadResourcesSequenceV2 <- ConstructResponseUtilV2.createApiResponse( | ||
mainResourcesAndValueRdfData = mainResourcesAndValueRdfData, | ||
orderByResourceIri = resourceIris, | ||
pageSizeBeforeFiltering = resourceIris.size, | ||
mappings = mappingsAsMap, | ||
queryStandoff = queryStandoff, | ||
calculateMayHaveMoreResults = true, | ||
|
@@ -281,7 +282,8 @@ class SearchResponderV2(responderData: ResponderData) extends ResponderWithStand | |
|
||
// Create a Select prequery | ||
|
||
nonTriplestoreSpecificConstructToSelectTransformer: NonTriplestoreSpecificGravsearchToCountPrequeryGenerator = new NonTriplestoreSpecificGravsearchToCountPrequeryGenerator( | ||
nonTriplestoreSpecificConstructToSelectTransformer: NonTriplestoreSpecificGravsearchToCountPrequeryTransformer = new NonTriplestoreSpecificGravsearchToCountPrequeryTransformer( | ||
constructClause = inputQuery.constructClause, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I assume you need to know about the Construct clause because you only want to return the IRIs of the resources and values that the client asks for. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. |
||
typeInspectionResult = typeInspectionResult, | ||
querySchema = inputQuery.querySchema.getOrElse(throw AssertionException(s"WhereClause has no querySchema")) | ||
) | ||
|
@@ -339,7 +341,6 @@ class SearchResponderV2(responderData: ResponderData) extends ResponderWithStand | |
targetSchema: ApiV2Schema, | ||
schemaOptions: Set[SchemaOption], | ||
requestingUser: UserADM): Future[ReadResourcesSequenceV2] = { | ||
import org.knora.webapi.responders.v2.search.MainQueryResultProcessor | ||
import org.knora.webapi.responders.v2.search.gravsearch.mainquery.GravsearchMainQueryGenerator | ||
|
||
for { | ||
|
@@ -356,7 +357,8 @@ class SearchResponderV2(responderData: ResponderData) extends ResponderWithStand | |
|
||
// Create a Select prequery | ||
|
||
nonTriplestoreSpecificConstructToSelectTransformer: NonTriplestoreSpecificGravsearchToPrequeryGenerator = new NonTriplestoreSpecificGravsearchToPrequeryGenerator( | ||
nonTriplestoreSpecificConstructToSelectTransformer: NonTriplestoreSpecificGravsearchToPrequeryTransformer = new NonTriplestoreSpecificGravsearchToPrequeryTransformer( | ||
constructClause = inputQuery.constructClause, | ||
typeInspectionResult = typeInspectionResult, | ||
querySchema = inputQuery.querySchema.getOrElse(throw AssertionException(s"WhereClause has no querySchema")), | ||
settings = settings | ||
|
@@ -366,11 +368,14 @@ class SearchResponderV2(responderData: ResponderData) extends ResponderWithStand | |
// TODO: if the ORDER BY criterion is a property whose occurrence is not 1, then the logic does not work correctly | ||
// TODO: the ORDER BY criterion has to be included in a GROUP BY statement, returning more than one row if property occurs more than once | ||
|
||
nonTriplestoreSpecficPrequery: SelectQuery = QueryTraverser.transformConstructToSelect( | ||
nonTriplestoreSpecificPrequery: SelectQuery = QueryTraverser.transformConstructToSelect( | ||
inputQuery = inputQuery.copy(whereClause = whereClauseWithoutAnnotations), | ||
transformer = nonTriplestoreSpecificConstructToSelectTransformer | ||
) | ||
|
||
// variable representing the main resources | ||
mainResourceVar: QueryVariable = nonTriplestoreSpecificConstructToSelectTransformer.mainResourceVariable | ||
|
||
// Convert the non-triplestore-specific query to a triplestore-specific one. | ||
triplestoreSpecificQueryPatternTransformerSelect: SelectToSelectTransformer = { | ||
if (settings.triplestoreType.startsWith("graphdb")) { | ||
|
@@ -384,17 +389,16 @@ class SearchResponderV2(responderData: ResponderData) extends ResponderWithStand | |
|
||
// Convert the preprocessed query to a non-triplestore-specific query. | ||
triplestoreSpecificPrequery = QueryTraverser.transformSelectToSelect( | ||
inputQuery = nonTriplestoreSpecficPrequery, | ||
inputQuery = nonTriplestoreSpecificPrequery, | ||
transformer = triplestoreSpecificQueryPatternTransformerSelect | ||
) | ||
|
||
// _ = println(triplestoreSpecificPrequery.toSparql) | ||
|
||
prequeryResponseNotMerged: SparqlSelectResponse <- (storeManager ? SparqlSelectRequest(triplestoreSpecificPrequery.toSparql)).mapTo[SparqlSelectResponse] | ||
pageSizeBeforeFiltering: Int = prequeryResponseNotMerged.results.bindings.size | ||
|
||
// variable representing the main resources | ||
mainResourceVar: QueryVariable = nonTriplestoreSpecificConstructToSelectTransformer.getMainResourceVariable | ||
|
||
// Merge rows with the same main resource IRI. This could happen if there are unbound variables in a UNION. | ||
prequeryResponse = mergePrequeryResults(prequeryResponseNotMerged = prequeryResponseNotMerged, mainResourceVar = mainResourceVar) | ||
|
||
// a sequence of resource IRIs that match the search criteria | ||
|
@@ -404,15 +408,21 @@ class SearchResponderV2(responderData: ResponderData) extends ResponderWithStand | |
resultRow.rowMap(mainResourceVar.variableName) | ||
} | ||
|
||
queryResultsSeparatedWithFullGraphPattern: ConstructResponseUtilV2.MainResourcesAndValueRdfData <- if (mainResourceIris.nonEmpty) { | ||
mainQueryResults: ConstructResponseUtilV2.MainResourcesAndValueRdfData <- if (mainResourceIris.nonEmpty) { | ||
// at least one resource matched the prequery | ||
|
||
// get all the IRIs for variables representing dependent resources per main resource | ||
val dependentResourceIrisPerMainResource: MainQueryResultProcessor.DependentResourcesPerMainResource = MainQueryResultProcessor.getDependentResourceIrisPerMainResource(prequeryResponse, nonTriplestoreSpecificConstructToSelectTransformer, mainResourceVar) | ||
val dependentResourceIrisPerMainResource: GravsearchMainQueryGenerator.DependentResourcesPerMainResource = | ||
GravsearchMainQueryGenerator.getDependentResourceIrisPerMainResource( | ||
prequeryResponse = prequeryResponse, | ||
transformer = nonTriplestoreSpecificConstructToSelectTransformer, | ||
mainResourceVar = mainResourceVar | ||
) | ||
|
||
// collect all variables representing resources | ||
val allResourceVariablesFromTypeInspection: Set[QueryVariable] = typeInspectionResult.entities.collect { | ||
case (queryVar: TypeableVariable, nonPropTypeInfo: NonPropertyTypeInfo) if OntologyConstants.KnoraApi.isKnoraApiV2Resource(nonPropTypeInfo.typeIri) => QueryVariable(queryVar.variableName) | ||
case (queryVar: TypeableVariable, nonPropTypeInfo: NonPropertyTypeInfo) if OntologyConstants.KnoraApi.isKnoraApiV2Resource(nonPropTypeInfo.typeIri) => | ||
QueryVariable(queryVar.variableName) | ||
}.toSet | ||
|
||
// the user may have defined IRIs of dependent resources in the input query (type annotations) | ||
|
@@ -426,7 +436,11 @@ class SearchResponderV2(responderData: ResponderData) extends ResponderWithStand | |
val allDependentResourceIris: Set[IRI] = dependentResourceIrisPerMainResource.dependentResourcesPerMainResource.values.flatten.toSet ++ dependentResourceIrisFromTypeInspection | ||
|
||
// for each main resource, create a Map of value object variables and their Iris | ||
val valueObjectVarsAndIrisPerMainResource: MainQueryResultProcessor.ValueObjectVariablesAndValueObjectIris = MainQueryResultProcessor.getValueObjectVarsAndIrisPerMainResource(prequeryResponse, nonTriplestoreSpecificConstructToSelectTransformer, mainResourceVar) | ||
val valueObjectVarsAndIrisPerMainResource: GravsearchMainQueryGenerator.ValueObjectVariablesAndValueObjectIris = GravsearchMainQueryGenerator.getValueObjectVarsAndIrisPerMainResource( | ||
prequeryResponse = prequeryResponse, | ||
transformer = nonTriplestoreSpecificConstructToSelectTransformer, | ||
mainResourceVar = mainResourceVar | ||
) | ||
|
||
// collect all value objects IRIs (for all main resources and for all value object variables) | ||
val allValueObjectIris: Set[IRI] = valueObjectVarsAndIrisPerMainResource.valueObjectVariablesAndValueObjectIris.values.foldLeft(Set.empty[IRI]) { | ||
|
@@ -485,7 +499,6 @@ class SearchResponderV2(responderData: ResponderData) extends ResponderWithStand | |
typeInspectionResult, | ||
inputQuery | ||
) | ||
|
||
} yield queryResultsFilteredForPermissions.copy( | ||
resources = queryResWithFullGraphPatternOnlyRequestedValues | ||
) | ||
|
@@ -501,14 +514,15 @@ class SearchResponderV2(responderData: ResponderData) extends ResponderWithStand | |
|
||
// If we're querying standoff, get XML-to standoff mappings. | ||
mappingsAsMap: Map[IRI, MappingAndXSLTransformation] <- if (queryStandoff) { | ||
getMappingsFromQueryResultsSeparated(queryResultsSeparatedWithFullGraphPattern.resources, requestingUser) | ||
getMappingsFromQueryResultsSeparated(mainQueryResults.resources, requestingUser) | ||
} else { | ||
FastFuture.successful(Map.empty[IRI, MappingAndXSLTransformation]) | ||
} | ||
|
||
apiResponse: ReadResourcesSequenceV2 <- ConstructResponseUtilV2.createApiResponse( | ||
mainResourcesAndValueRdfData = queryResultsSeparatedWithFullGraphPattern, | ||
mainResourcesAndValueRdfData = mainQueryResults, | ||
orderByResourceIri = mainResourceIris, | ||
pageSizeBeforeFiltering = pageSizeBeforeFiltering, | ||
mappings = mappingsAsMap, | ||
queryStandoff = queryStandoff, | ||
versionDate = None, | ||
|
@@ -642,6 +656,7 @@ class SearchResponderV2(responderData: ResponderData) extends ResponderWithStand | |
readResourcesSequence: ReadResourcesSequenceV2 <- ConstructResponseUtilV2.createApiResponse( | ||
mainResourcesAndValueRdfData = mainResourcesAndValueRdfData, | ||
orderByResourceIri = mainResourceIris, | ||
pageSizeBeforeFiltering = mainResourceIris.size, | ||
mappings = mappings, | ||
queryStandoff = maybeStandoffMinStartIndex.nonEmpty, | ||
versionDate = None, | ||
|
@@ -765,6 +780,7 @@ class SearchResponderV2(responderData: ResponderData) extends ResponderWithStand | |
apiResponse: ReadResourcesSequenceV2 <- ConstructResponseUtilV2.createApiResponse( | ||
mainResourcesAndValueRdfData = mainResourcesAndValueRdfData, | ||
orderByResourceIri = mainResourceIris.toSeq.sorted, | ||
pageSizeBeforeFiltering = mainResourceIris.size, | ||
queryStandoff = false, | ||
versionDate = None, | ||
calculateMayHaveMoreResults = true, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am unaware of the construct
(GROUP_CONCAT(DISTINCT(IF(BOUND(?book), STR(?book), "")); SEPARATOR='') AS ?book__Concat)
(82f8a55). I presume this means that if the variable is bound, an IRI is returned, otherwise an empty string.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. This is explained in the paragraph starting on line 211: