From 5933fc139c2aabdae05721b07eadf9f12b0cefe3 Mon Sep 17 00:00:00 2001 From: Dalvany <9901407+Dalvany@users.noreply.github.com> Date: Mon, 22 May 2023 22:07:56 +0200 Subject: [PATCH] Fix and handle publish (cherry picked from commit 3054040c8b7521c6cfe9c509c7037abfa1a3d9f1) --- .gitignore | 1 + crates/alexandrie/src/api/crates/publish.rs | 29 +++++++++++++++---- crates/alexandrie/src/fts/document.rs | 32 +++++++++++++++++++++ crates/alexandrie/src/fts/index.rs | 30 +++++++------------ crates/alexandrie/src/main.rs | 6 ++-- 5 files changed, 71 insertions(+), 27 deletions(-) diff --git a/.gitignore b/.gitignore index 10d8bc34..281fbff1 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ # Rust compilation folder /target +crates/*/target # hidden folders from code editors /.vscode diff --git a/crates/alexandrie/src/api/crates/publish.rs b/crates/alexandrie/src/api/crates/publish.rs index c4607fc1..b6250db0 100644 --- a/crates/alexandrie/src/api/crates/publish.rs +++ b/crates/alexandrie/src/api/crates/publish.rs @@ -4,12 +4,12 @@ use std::path::PathBuf; use std::pin::pin; use async_std::io::prelude::*; - use byteorder::{LittleEndian, ReadBytesExt}; use chrono::Utc; use diesel::dsl as sql; use diesel::prelude::*; use flate2::read::GzDecoder; +use log::warn; use ring::digest as hasher; use semver::{Version, VersionReq}; use serde::{Deserialize, Serialize}; @@ -26,6 +26,7 @@ use crate::db::schema::*; use crate::db::Connection; use crate::db::DATETIME_FORMAT; use crate::error::{AlexError, Error}; +use crate::fts::TantivyDocument; use crate::utils; use crate::State; @@ -70,7 +71,7 @@ struct CrateMetaDependency { fn link_keywords( conn: &mut Connection, crate_id: i64, - keywords: Option>, + keywords: &Option>, ) -> Result<(), Error> { diesel::delete(crate_keywords::table.filter(crate_keywords::crate_id.eq(crate_id))) .execute(conn)?; @@ -116,7 +117,7 @@ fn link_keywords( fn link_categories( conn: &mut Connection, crate_id: i64, - categories: Option>, + categories: &Option>, ) -> Result<(), Error> { diesel::delete(crate_categories::table.filter(crate_categories::crate_id.eq(crate_id))) .execute(conn)?; @@ -354,10 +355,10 @@ pub(crate) async fn put(mut req: Request) -> tide::Result { }; //? Update keywords. - link_keywords(conn, krate.id, metadata.keywords)?; + link_keywords(conn, krate.id, &metadata.keywords)?; //? Update categories. - link_categories(conn, krate.id, metadata.categories)?; + link_categories(conn, krate.id, &metadata.categories)?; //? Update badges. link_badges(conn, krate.id, metadata.badges)?; @@ -396,6 +397,24 @@ pub(crate) async fn put(mut req: Request) -> tide::Result { .storage .store_crate(&crate_desc.name, crate_desc.vers.clone(), crate_bytes)?; + let id = krate.id; + let name = krate.name.clone(); + + // Index into full text index + let mut document: TantivyDocument = krate.into(); + if let Some(keywords) = metadata.keywords { + document.add_all_keywords(keywords); + } + if let Some(categories) = metadata.categories { + document.add_all_categories(categories); + } + + if let Err(error) = state.search.create_or_update(document) { + warn!("Can't convert crate '{id}' ({name}) into Tantivy document : {error}"); + } else { + state.search.commit()?; + } + //? Store the crate's readme. if let Some(rendered) = rendered_readme { state diff --git a/crates/alexandrie/src/fts/document.rs b/crates/alexandrie/src/fts/document.rs index 0d52bf97..9b4149b1 100644 --- a/crates/alexandrie/src/fts/document.rs +++ b/crates/alexandrie/src/fts/document.rs @@ -1,5 +1,6 @@ use std::fmt::Formatter; +use crate::db::models::Crate; use tantivy::schema::Schema; use tantivy::Document; @@ -47,6 +48,19 @@ impl std::fmt::Display for TantivyDocument { } } +impl From for TantivyDocument { + fn from(value: Crate) -> Self { + Self { + id: value.id, + name: value.name, + description: value.description, + readme: None, + keywords: vec![], + categories: vec![], + } + } +} + impl TantivyDocument { pub fn new(id: i64, name: String) -> Self { Self { @@ -124,6 +138,10 @@ impl TantivyDocument { Ok(document) } + pub fn id(&self) -> i64 { + self.id + } + /// Set crate's description pub fn set_description(&mut self, description: String) { self.description = Some(description); @@ -139,8 +157,22 @@ impl TantivyDocument { self.keywords.push(keyword); } + /// Add all keywords + pub fn add_all_keywords(&mut self, keywords: Vec) { + for keyword in keywords { + self.add_keyword(keyword); + } + } + /// Add new crate's category pub fn add_category(&mut self, category: String) { self.categories.push(category); } + + /// Add all keywords + pub fn add_all_categories(&mut self, categories: Vec) { + for category in categories { + self.add_category(category); + } + } } diff --git a/crates/alexandrie/src/fts/index.rs b/crates/alexandrie/src/fts/index.rs index 864001b4..b9b204e3 100644 --- a/crates/alexandrie/src/fts/index.rs +++ b/crates/alexandrie/src/fts/index.rs @@ -162,7 +162,8 @@ impl Tantivy { /// Method that create or update a document in Tantivy index. As there is no update, we need /// to first delete the document then create a new document. - pub fn create_or_update(&self, id: i64, document: TantivyDocument) -> Result<(), Error> { + pub fn create_or_update(&self, document: TantivyDocument) -> Result<(), Error> { + let id = document.id(); let document = document.try_into(&self.schema)?; if let Some(field) = self.schema.get_field(super::ID_FIELD_NAME) { let term = Term::from_field_i64(field, id); @@ -384,29 +385,22 @@ impl Tantivy { for krate in krates.into_iter() { debug!("crate {:?}", krate); // Create a document with database ID and crate name - let mut doc: TantivyDocument = - TantivyDocument::new(krate.id, krate.name.clone()); + let id = krate.id; + let name = krate.name.clone(); - // If there is some description, then set it - if let Some(description) = krate.description.as_ref() { - doc.set_description(description.clone()); - } + let mut doc: TantivyDocument = krate.into(); // Skip keywords that might be orphan and add keywords that match ids - while current_keyword.is_some() - && current_keyword.as_ref().unwrap().0 <= krate.id - { - if current_keyword.as_ref().unwrap().0 == krate.id { + while current_keyword.is_some() && current_keyword.as_ref().unwrap().0 <= id { + if current_keyword.as_ref().unwrap().0 == id { doc.add_keyword(current_keyword.unwrap().1); } current_keyword = keywords_iterator.next(); } // Skip keywords that might be orphan and add keywords that match ids - while current_category.is_some() - && current_category.as_ref().unwrap().0 <= krate.id - { - if current_category.as_ref().unwrap().0 == krate.id { + while current_category.is_some() && current_category.as_ref().unwrap().0 <= id { + if current_category.as_ref().unwrap().0 == id { doc.add_keyword(current_category.unwrap().1); } current_category = categories_iterator.next(); @@ -414,11 +408,9 @@ impl Tantivy { // TODO get README - if let Err(error) = self.create_or_update(krate.id, doc) { + if let Err(error) = self.create_or_update(doc) { warn!( - "Can't convert crate '{}' ({}) into Tantivy document : {error}", - krate.id, - krate.name.clone() + "Can't convert crate '{id}' ({name}) into Tantivy document : {error}" ); } count_crate += 1; diff --git a/crates/alexandrie/src/main.rs b/crates/alexandrie/src/main.rs index 4a71d3b2..4da89d0d 100644 --- a/crates/alexandrie/src/main.rs +++ b/crates/alexandrie/src/main.rs @@ -244,9 +244,6 @@ async fn run() -> Result<(), Error> { let state: config::State = config.try_into()?; - let database = &state.db; - state.search.index_all(database).await?; - let state = Arc::new(state); log::info!("starting Alexandrie (version: {})", build::short()); @@ -256,6 +253,9 @@ async fn run() -> Result<(), Error> { state.db.run(|conn| conn.run_pending_migrations(db::MIGRATIONS).map(|_| ())).await .expect("migration execution error"); + let database = &state.db; + state.search.index_all(database).await?; + let mut app = tide::with_state(Arc::clone(&state)); log::info!("setting up request logger middleware");