Skip to content

Commit

Permalink
Make OneDriverReader serializable (run-llama#12342)
Browse files Browse the repository at this point in the history
  • Loading branch information
nfiacco authored Mar 27, 2024
1 parent a0a3f24 commit e2396a7
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 21 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# CHANGELOG

## [0.1.4] - 2024-03-26

- Make OneDriveReader serializable

## [0.1.2] - 2024-02-13

- Add maintainers and keywords from library.json (llamahub)
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

import requests
from llama_index.core.readers import SimpleDirectoryReader
from llama_index.core.readers.base import BaseReader
from llama_index.core.bridge.pydantic import PrivateAttr
from llama_index.core.readers.base import BasePydanticReader
from llama_index.core.schema import Document

logger = logging.getLogger(__name__)
Expand All @@ -18,32 +19,48 @@
CLIENTCREDENTIALSCOPES = ["https://graph.microsoft.com/.default"]


class OneDriveReader(BaseReader):
"""Microsoft OneDrive reader."""
class OneDriveReader(BasePydanticReader):
"""
Microsoft OneDrive reader.
Initializes a new instance of the OneDriveReader.
:param client_id: The Application (client) ID for the app registered in the Azure Entra (formerly Azure Active directory) portal with MS Graph permission "Files.Read.All".
:param tenant_id: The Directory (tenant) ID of the Azure Active Directory (AAD) tenant the app is registered with.
Defaults to "consumers" for multi-tenant applications and onderive personal.
:param client_secret: The Application Secret for the app registered in the Azure portal.
If provided, the MSAL client credential flow will be used for authentication (ConfidentialClientApplication).
If not provided, interactive authentication will be used (Not recommended for CI/CD or scenarios where manual interaction for authentication is not feasible).
For interactive authentication to work, a browser is used to authenticate, hence the registered application should have a redirect URI set to 'https://localhost'
for mobile and native applications.
"""

client_id: str = None
client_secret: Optional[str] = None
tenant_id: Optional[str] = None

_is_interactive_auth: bool = PrivateAttr(False)

def __init__(
self,
client_id: str,
client_secret: Optional[str] = None,
tenant_id: str = "consumers",
tenant_id: Optional[str] = "consumers",
**kwargs: Any,
) -> None:
"""
Initializes a new instance of the OneDriveReader.
self._is_interactive_auth = not client_secret

:param client_id: The Application (client) ID for the app registered in the Azure Entra (formerly Azure Active directory) portal with MS Graph permission "Files.Read.All".
:param tenant_id: The Directory (tenant) ID of the Azure Active Directory (AAD) tenant the app is registered with.
Defaults to "consumers" for multi-tenant applications and onderive personal.
:param client_secret: The Application Secret for the app registered in the Azure portal.
If provided, the MSAL client credential flow will be used for authentication (ConfidentialClientApplication).
If not provided, interactive authentication will be used (Not recommended for CI/CD or scenarios where manual interaction for authentication is not feasible).
super().__init__(
client_id=client_id,
client_secret=client_secret,
tenant_id=tenant_id,
**kwargs,
)

For interactive authentication to work, a browser is used to authenticate, hence the registered application should have a redirect URI set to 'https://localhost'
for mobile and native applications.
"""
self.client_id = client_id
self.tenant_id = tenant_id
self.client_secret = client_secret
self._is_interactive_auth = not self.client_secret
@classmethod
def class_name(cls) -> str:
return "OneDriveReader"

def _authenticate_with_msal(self) -> Any:
"""Authenticate with MSAL.
Expand Down Expand Up @@ -473,7 +490,7 @@ def load_data(
recursive: bool = True,
userprincipalname: Optional[str] = None,
) -> List[Document]:
"""Load data from the folder id / file ids, f both are not provided download from the root.
"""Load data from the folder id / file ids, if both are not provided download from the root.
Args:
folder_id: folder id of the folder in OneDrive.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ license = "MIT"
maintainers = ["godwin3737"]
name = "llama-index-readers-microsoft-onedrive"
readme = "README.md"
version = "0.1.3"
version = "0.1.4"

[tool.poetry.dependencies]
python = ">=3.8.1,<4.0"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,28 @@
from llama_index.core.readers.base import BaseReader
from llama_index.readers.microsoft_onedrive import OneDriveReader

test_client_id = "test_client_id"
test_tenant_id = "test_tenant_id"


def test_class():
names_of_base_classes = [b.__name__ for b in OneDriveReader.__mro__]
assert BaseReader.__name__ in names_of_base_classes


def test_serialize():
reader = OneDriveReader(
client_id=test_client_id,
tenant_id=test_tenant_id,
)

schema = reader.schema()
assert schema is not None
assert len(schema) > 0
assert "client_id" in schema["properties"]

json = reader.json(exclude_unset=True)

new_reader = OneDriveReader.parse_raw(json)
assert new_reader.client_id == reader.client_id
assert new_reader.tenant_id == reader.tenant_id

0 comments on commit e2396a7

Please sign in to comment.