Skip to content

Commit

Permalink
H-3375: Overhaul logic to detect inheritance depths for data types (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
TimDiekmann authored Sep 29, 2024
1 parent edeb9d6 commit bf74d01
Show file tree
Hide file tree
Showing 4 changed files with 131 additions and 140 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -169,13 +169,13 @@ where
})
.collect::<Vec<_>>();

let schema_metadata = ontology_type_resolver
.resolve_data_type_metadata(data_types.iter().map(Arc::clone))
.change_context(InsertionError)?;
for data_type in &data_types {
let schema_metadata = ontology_type_resolver
.resolve_data_type_metadata(&data_type.id)
.change_context(InsertionError)?;

for (schema_metadata, data_type) in schema_metadata.iter().zip(&data_types) {
postgres_client
.insert_data_type_references(DataTypeId::from_url(&data_type.id), schema_metadata)
.insert_data_type_references(DataTypeId::from_url(&data_type.id), &schema_metadata)
.await?;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -475,9 +475,17 @@ where
ontology_type_resolver.add_open(Arc::new(data_type.schema));
});

let schema_metadata = ontology_type_resolver
.resolve_data_type_metadata(inserted_data_types.iter().map(Arc::clone))
.change_context(InsertionError)?;
for inserted_data_type in &inserted_data_types {
ontology_type_resolver.add_open(Arc::clone(inserted_data_type));
}
let schema_metadata = inserted_data_types
.iter()
.map(|inserted_data_type| {
ontology_type_resolver
.resolve_data_type_metadata(&inserted_data_type.id)
.change_context(InsertionError)
})
.collect::<Result<Vec<_>, _>>()?;

let data_type_validator = DataTypeValidator;
for data_type in &inserted_data_types {
Expand Down Expand Up @@ -829,11 +837,11 @@ where
ontology_type_resolver.add_open(Arc::new(data_type.schema));
});

let [metadata] = ontology_type_resolver
.resolve_data_type_metadata([Arc::new(schema.clone().into_inner())])
.change_context(UpdateError)?
.try_into()
.expect("Expected exactly one closed data type metadata");
ontology_type_resolver.add_open(Arc::new(schema.clone().into_inner()));
let metadata = ontology_type_resolver
.resolve_data_type_metadata(&schema.id)
.change_context(UpdateError)?;

let closed_schema = data_type_validator
.validate(
ontology_type_resolver
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
use alloc::sync::Arc;
use std::collections::HashMap;
use core::cmp;
use std::collections::{HashMap, hash_map::RawEntryMut};

use serde::{Deserialize, Serialize};

use crate::{Valid, schema::DataType, url::VersionedUrl};
use crate::{
Valid,
schema::{DataType, data_type::DataTypeEdge},
url::VersionedUrl,
};

#[derive(Debug, Clone, Serialize, Deserialize)]
// #[cfg_attr(target_arch = "wasm32", derive(tsify::Tsify))]
Expand All @@ -26,3 +31,20 @@ impl ClosedDataType {
pub struct ClosedDataTypeMetadata {
pub inheritance_depths: HashMap<VersionedUrl, u32>,
}

impl ClosedDataTypeMetadata {
pub fn add_edge(&mut self, edge: DataTypeEdge, target: &VersionedUrl, depth: u32) {
match edge {
DataTypeEdge::Inheritance => {
match self.inheritance_depths.raw_entry_mut().from_key(target) {
RawEntryMut::Occupied(mut entry) => {
*entry.get_mut() = cmp::min(depth, *entry.get());
}
RawEntryMut::Vacant(entry) => {
entry.insert(target.clone(), depth);
}
}
}
}
}
}
211 changes: 86 additions & 125 deletions libs/@blockprotocol/type-system/rust/src/schema/data_type/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ mod reference;
mod validation;

use alloc::sync::Arc;
use core::{cmp, fmt, mem};
use core::{fmt, mem};
use std::collections::{HashMap, HashSet, hash_map::RawEntryMut};

use error_stack::{Report, bail};
Expand Down Expand Up @@ -476,7 +476,7 @@ impl OntologyTypeResolver {
) -> Option<Arc<ClosedDataTypeMetadata>> {
self.data_types
.get_mut(data_type_id)
.and_then(|entry| entry.metadata.replace(metadata))
.map(|entry| Arc::clone(entry.metadata.insert(metadata)))
}

fn get(&self, id: &VersionedUrl) -> Option<&DataTypeCacheEntry> {
Expand All @@ -493,144 +493,105 @@ impl OntologyTypeResolver {
/// Returns an error if the metadata for any of the data types could not be resolved.
pub fn resolve_data_type_metadata(
&mut self,
data_types: impl IntoIterator<Item = Arc<DataType>>,
) -> Result<Vec<Arc<ClosedDataTypeMetadata>>, Report<DataTypeResolveError>> {
data_type_id: &VersionedUrl,
) -> Result<Arc<ClosedDataTypeMetadata>, Report<DataTypeResolveError>> {
let Some(data_type_entry) = self.get(data_type_id) else {
bail!(DataTypeResolveError::MissingSchemas {
schemas: HashSet::from([data_type_id.clone()]),
});
};

if let Some(metadata) = &data_type_entry.metadata {
// If the metadata is already resolved, we can return it immediately.
return Ok(Arc::clone(metadata));
}

// We add all requested types to the cache to ensure that we can resolve all types. The
// cache will be updated with the resolved metadata. We extract the IDs so that we can
// resolve the metadata in the correct order.
let data_types_to_resolve = data_types
.into_iter()
.map(|data_type| {
let data_type_id = data_type.id.clone();
self.add_open(data_type);
data_type_id
})
.collect::<Vec<_>>();
// Double buffering is used to avoid unnecessary allocations.
let mut data_types_to_resolve = Vec::new();
let mut next_data_types_to_resolve = vec![Arc::clone(&data_type_entry.data_type)];

// We keep a list of all schemas that are missing from the cache. If we encounter a schema
// that is not in the cache, we add it to this list. If we are unable to resolve all
// schemas, we return an error with this list.
let mut missing_schemas = HashSet::new();
let mut processed_schemas = HashSet::new();

let resolved_types = data_types_to_resolve
.into_iter()
.filter_map(|current_data_type_id| {
// To avoid infinite loops, we keep track of all schemas that we already processed.
processed_schemas.insert(current_data_type_id.clone());

let Some(cache_entry) = self.get(&current_data_type_id) else {
// This should never happen as we previously inserted the data type
missing_schemas.insert(current_data_type_id);
return None;
};

// If the metadata is already resolved, we can return it immediately.
if let Some(metadata) = &cache_entry.metadata {
return Some(Arc::clone(metadata));
}

// We create a list of all types that we need to find in order to resolve the
// current data type.
let mut data_types_to_find = cache_entry
.data_type
.data_type_references()
.filter(|(data_type_ref, _edge)| {
// To prevent infinite loops, we only add the parent if it is not the same
// as the current data type.
data_type_ref.url != cache_entry.data_type.id
})
.map(|(data_type_ref, edge)| (data_type_ref.clone(), edge))
.collect::<Vec<_>>();

let mut current_depth = 0;
// We keep track of the inheritance depth of each data type in the inheritance
// chain. We start with the current data type at depth 0. It's worth noting that the
// type itself is not included in the inheritance chain, even if it is referenced in
// the `allOf` field.
let mut inheritance_depths = data_types_to_find
.iter()
.filter(|(_, edge)| *edge == DataTypeEdge::Inheritance)
.map(|(data_type_ref, _)| (data_type_ref.url.clone(), current_depth))
.collect::<HashMap<_, _>>();

while !data_types_to_find.is_empty() {
// We extend `data_types_to_find` with the parents recursively until we either
// find all types or encounter a schema we already resolved. For this reason, we
// don't consume the vector here but use `mem::take` to move the vector out of
// the loop.
for (data_type_ref, edge) in mem::take(&mut data_types_to_find) {
let Some(entry) = self.get(&data_type_ref.url) else {
// We ignore any missing schemas here and continue to resolve to find
// all missing schemas.
missing_schemas.insert(data_type_ref.url.clone());
continue;
};

if let Some(metadata) = &entry.metadata {
// If we already resolved the metadata for this schema, we update the
// inheritance depth of the current data type.
for (data_type_ref, depth) in &metadata.inheritance_depths {
if current_data_type_id != *data_type_ref {
match inheritance_depths.raw_entry_mut().from_key(data_type_ref)
{
RawEntryMut::Occupied(mut entry) => {
*entry.get_mut() =
cmp::min(*depth + current_depth + 1, *entry.get());
}
RawEntryMut::Vacant(entry) => {
entry.insert(
data_type_ref.clone(),
*depth + current_depth + 1,
);
}
}
}
}
} else {
if current_data_type_id != entry.data_type.id
&& edge == DataTypeEdge::Inheritance
{
inheritance_depths
.insert(entry.data_type.id.clone(), current_depth);
// We also keep a list of all schemas that we already processed. This is used to prevent
// infinite loops in the inheritance chain. New values are added to this list as we add new
// schemas to resolve.
let mut processed_schemas = HashSet::from([data_type_id.clone()]);

// The currently closed schema being resolved. This can be used later to resolve
let mut in_progress_schema = ClosedDataTypeMetadata {
inheritance_depths: HashMap::new(),
};

let mut current_depth = 0;
while !next_data_types_to_resolve.is_empty() {
mem::swap(&mut data_types_to_resolve, &mut next_data_types_to_resolve);
#[expect(
clippy::iter_with_drain,
reason = "False positive, we re-use the iterator to avoid unnecessary allocations.\
See https://github.com/rust-lang/rust-clippy/issues/8539"
)]
for data_type in data_types_to_resolve.drain(..) {
for (data_type_reference, edge) in data_type.data_type_references() {
if processed_schemas.contains(&data_type_reference.url) {
// We ignore the already processed schemas to prevent infinite loops.
continue;
}

in_progress_schema.add_edge(edge, &data_type_reference.url, current_depth);
processed_schemas.insert(data_type_reference.url.clone());

let Some(data_type_entry) = self.data_types.get(&data_type_reference.url)
else {
// If the data type is not in the cache, we add it to the list of missing
// schemas.
missing_schemas.insert(data_type_reference.url.clone());
continue;
};

if let Some(metadata) = &data_type_entry.metadata {
// If the metadata is already resolved, we can reuse it.
for (data_type_ref, depth) in &metadata.inheritance_depths {
if data_type.id != *data_type_ref {
in_progress_schema.add_edge(
edge,
data_type_ref,
*depth + current_depth + 1,
);
}
// We encountered a schema that we haven't resolved yet. We add it to
// the list of schemas to find and update the inheritance depth of the
// current type.
data_types_to_find.extend(
entry
.data_type
.data_type_references()
.filter(|(data_type_ref, _)| {
// To prevent infinite loops, we only add references it was
// not already processed.
!processed_schemas.contains(&data_type_ref.url)
})
.map(|(data_type_ref, edge)| (data_type_ref.clone(), edge)),
);
}
} else {
// We encountered a schema that we haven't resolved yet. We add it to the
// list of schemas to find and update the inheritance depth of the current
// type.
next_data_types_to_resolve.push(Arc::clone(&data_type_entry.data_type));
}
// As we resolve all parents in the current depth, we increment the depth for
// the next iteration.
current_depth += 1;
}
}

// We create the resolved metadata for the current data type and update the cache
// so that we don't need to resolve it again.
let resolved = Arc::new(ClosedDataTypeMetadata { inheritance_depths });
self.update_metadata(&current_data_type_id, Arc::clone(&resolved));
Some(resolved)
})
.collect();
current_depth += 1;
}

missing_schemas
.is_empty()
.then_some(resolved_types)
.ok_or_else(|| {
Report::from(DataTypeResolveError::MissingSchemas {
schemas: missing_schemas,
})
})
if missing_schemas.is_empty() {
// We create the resolved metadata for the current data type and update the cache so
// that we don't need to resolve it again.
Ok(self
.update_metadata(data_type_id, Arc::new(in_progress_schema))
.unwrap_or_else(|| {
unreachable!(
"The data type was removed from the cache while resolving the metadata"
)
}))
} else {
Err(Report::from(DataTypeResolveError::MissingSchemas {
schemas: missing_schemas,
}))
}
}

/// Returns the closed data type for the given data type.
Expand Down

0 comments on commit bf74d01

Please sign in to comment.