From 33b49de620fdcbb78487581191776e8a4a70e3a3 Mon Sep 17 00:00:00 2001 From: withub-TanmayAgrawal Date: Tue, 26 Sep 2023 00:04:03 +0530 Subject: [PATCH] refresh function corrected, deleting document working with opensearch (#7664) --- CHANGELOG.md | 5 +++++ llama_index/indices/base.py | 12 +++++++----- llama_index/indices/vector_store/base.py | 5 ++++- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d1909b15f7284..6d37f38e1c85b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # ChangeLog +## Unreleased + +### Bug Fixes / Nits +- Fixed `refresh_ref_docs()` bug with order of operations (#7664) + ## [0.8.33] - 2023-09-25 ### New Features diff --git a/llama_index/indices/base.py b/llama_index/indices/base.py index 9f3b1a3f5e2a2..599fd1913cb26 100644 --- a/llama_index/indices/base.py +++ b/llama_index/indices/base.py @@ -275,7 +275,9 @@ def update_ref_doc(self, document: Document, **update_kwargs: Any) -> None: """ with self._service_context.callback_manager.as_trace("update"): self.delete_ref_doc( - document.get_doc_id(), **update_kwargs.pop("delete_kwargs", {}) + document.get_doc_id(), + delete_from_docstore=True, + **update_kwargs.pop("delete_kwargs", {}), ) self.insert(document, **update_kwargs.pop("insert_kwargs", {})) @@ -309,14 +311,14 @@ def refresh_ref_docs( existing_doc_hash = self._docstore.get_document_hash( document.get_doc_id() ) - if existing_doc_hash != document.hash: + if existing_doc_hash is None: + self.insert(document, **update_kwargs.pop("insert_kwargs", {})) + refreshed_documents[i] = True + elif existing_doc_hash != document.hash: self.update_ref_doc( document, **update_kwargs.pop("update_kwargs", {}) ) refreshed_documents[i] = True - elif existing_doc_hash is None: - self.insert(document, **update_kwargs.pop("insert_kwargs", {})) - refreshed_documents[i] = True return refreshed_documents diff --git a/llama_index/indices/vector_store/base.py b/llama_index/indices/vector_store/base.py index 316231270ecce..f64c1c5cad751 100644 --- a/llama_index/indices/vector_store/base.py +++ b/llama_index/indices/vector_store/base.py @@ -3,7 +3,7 @@ An index that that is built on top of an existing vector store. """ - +import logging from typing import Any, Dict, List, Optional, Sequence from llama_index.async_utils import run_async_tasks @@ -17,6 +17,8 @@ from llama_index.storage.storage_context import StorageContext from llama_index.vector_stores.types import VectorStore +logger = logging.getLogger(__name__) + class VectorStoreIndex(BaseIndex[IndexDict]): """Vector Store Index. @@ -281,6 +283,7 @@ def delete_ref_doc( if ref_doc_info is not None: for node_id in ref_doc_info.node_ids: self._index_struct.delete(node_id) + self._vector_store.delete(node_id) # delete from docstore only if needed if (