diff --git a/llama-index-integrations/readers/llama-index-readers-microsoft-onedrive/CHANGELOG.md b/llama-index-integrations/readers/llama-index-readers-microsoft-onedrive/CHANGELOG.md index 36bff877abcbe..566e1af84fe12 100644 --- a/llama-index-integrations/readers/llama-index-readers-microsoft-onedrive/CHANGELOG.md +++ b/llama-index-integrations/readers/llama-index-readers-microsoft-onedrive/CHANGELOG.md @@ -1,5 +1,9 @@ # CHANGELOG +## [0.1.4] - 2024-03-26 + +- Make OneDriveReader serializable + ## [0.1.2] - 2024-02-13 - Add maintainers and keywords from library.json (llamahub) diff --git a/llama-index-integrations/readers/llama-index-readers-microsoft-onedrive/llama_index/readers/microsoft_onedrive/base.py b/llama-index-integrations/readers/llama-index-readers-microsoft-onedrive/llama_index/readers/microsoft_onedrive/base.py index 9f770e967b598..0369976e4f0b1 100644 --- a/llama-index-integrations/readers/llama-index-readers-microsoft-onedrive/llama_index/readers/microsoft_onedrive/base.py +++ b/llama-index-integrations/readers/llama-index-readers-microsoft-onedrive/llama_index/readers/microsoft_onedrive/base.py @@ -8,7 +8,8 @@ import requests from llama_index.core.readers import SimpleDirectoryReader -from llama_index.core.readers.base import BaseReader +from llama_index.core.bridge.pydantic import PrivateAttr +from llama_index.core.readers.base import BasePydanticReader from llama_index.core.schema import Document logger = logging.getLogger(__name__) @@ -18,32 +19,48 @@ CLIENTCREDENTIALSCOPES = ["https://graph.microsoft.com/.default"] -class OneDriveReader(BaseReader): - """Microsoft OneDrive reader.""" +class OneDriveReader(BasePydanticReader): + """ + Microsoft OneDrive reader. + + Initializes a new instance of the OneDriveReader. + + :param client_id: The Application (client) ID for the app registered in the Azure Entra (formerly Azure Active directory) portal with MS Graph permission "Files.Read.All". + :param tenant_id: The Directory (tenant) ID of the Azure Active Directory (AAD) tenant the app is registered with. + Defaults to "consumers" for multi-tenant applications and onderive personal. + :param client_secret: The Application Secret for the app registered in the Azure portal. + If provided, the MSAL client credential flow will be used for authentication (ConfidentialClientApplication). + If not provided, interactive authentication will be used (Not recommended for CI/CD or scenarios where manual interaction for authentication is not feasible). + + For interactive authentication to work, a browser is used to authenticate, hence the registered application should have a redirect URI set to 'https://localhost' + for mobile and native applications. + """ + + client_id: str = None + client_secret: Optional[str] = None + tenant_id: Optional[str] = None + + _is_interactive_auth: bool = PrivateAttr(False) def __init__( self, client_id: str, client_secret: Optional[str] = None, - tenant_id: str = "consumers", + tenant_id: Optional[str] = "consumers", + **kwargs: Any, ) -> None: - """ - Initializes a new instance of the OneDriveReader. + self._is_interactive_auth = not client_secret - :param client_id: The Application (client) ID for the app registered in the Azure Entra (formerly Azure Active directory) portal with MS Graph permission "Files.Read.All". - :param tenant_id: The Directory (tenant) ID of the Azure Active Directory (AAD) tenant the app is registered with. - Defaults to "consumers" for multi-tenant applications and onderive personal. - :param client_secret: The Application Secret for the app registered in the Azure portal. - If provided, the MSAL client credential flow will be used for authentication (ConfidentialClientApplication). - If not provided, interactive authentication will be used (Not recommended for CI/CD or scenarios where manual interaction for authentication is not feasible). + super().__init__( + client_id=client_id, + client_secret=client_secret, + tenant_id=tenant_id, + **kwargs, + ) - For interactive authentication to work, a browser is used to authenticate, hence the registered application should have a redirect URI set to 'https://localhost' - for mobile and native applications. - """ - self.client_id = client_id - self.tenant_id = tenant_id - self.client_secret = client_secret - self._is_interactive_auth = not self.client_secret + @classmethod + def class_name(cls) -> str: + return "OneDriveReader" def _authenticate_with_msal(self) -> Any: """Authenticate with MSAL. @@ -473,7 +490,7 @@ def load_data( recursive: bool = True, userprincipalname: Optional[str] = None, ) -> List[Document]: - """Load data from the folder id / file ids, f both are not provided download from the root. + """Load data from the folder id / file ids, if both are not provided download from the root. Args: folder_id: folder id of the folder in OneDrive. diff --git a/llama-index-integrations/readers/llama-index-readers-microsoft-onedrive/pyproject.toml b/llama-index-integrations/readers/llama-index-readers-microsoft-onedrive/pyproject.toml index eef8fe4c7e2cd..95a8618ebda2f 100644 --- a/llama-index-integrations/readers/llama-index-readers-microsoft-onedrive/pyproject.toml +++ b/llama-index-integrations/readers/llama-index-readers-microsoft-onedrive/pyproject.toml @@ -29,7 +29,7 @@ license = "MIT" maintainers = ["godwin3737"] name = "llama-index-readers-microsoft-onedrive" readme = "README.md" -version = "0.1.3" +version = "0.1.4" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" diff --git a/llama-index-integrations/readers/llama-index-readers-microsoft-onedrive/tests/test_readers_microsoft_onedrive.py b/llama-index-integrations/readers/llama-index-readers-microsoft-onedrive/tests/test_readers_microsoft_onedrive.py index 600e7708e0589..da26f2e040911 100644 --- a/llama-index-integrations/readers/llama-index-readers-microsoft-onedrive/tests/test_readers_microsoft_onedrive.py +++ b/llama-index-integrations/readers/llama-index-readers-microsoft-onedrive/tests/test_readers_microsoft_onedrive.py @@ -1,7 +1,28 @@ from llama_index.core.readers.base import BaseReader from llama_index.readers.microsoft_onedrive import OneDriveReader +test_client_id = "test_client_id" +test_tenant_id = "test_tenant_id" + def test_class(): names_of_base_classes = [b.__name__ for b in OneDriveReader.__mro__] assert BaseReader.__name__ in names_of_base_classes + + +def test_serialize(): + reader = OneDriveReader( + client_id=test_client_id, + tenant_id=test_tenant_id, + ) + + schema = reader.schema() + assert schema is not None + assert len(schema) > 0 + assert "client_id" in schema["properties"] + + json = reader.json(exclude_unset=True) + + new_reader = OneDriveReader.parse_raw(json) + assert new_reader.client_id == reader.client_id + assert new_reader.tenant_id == reader.tenant_id