From e58292fab20b4a14faae0023774be365ba870554 Mon Sep 17 00:00:00 2001 From: Henri Lemoine Date: Fri, 8 Sep 2023 18:02:41 -0400 Subject: [PATCH] nits --- align_data/common/alignment_dataset.py | 1 - align_data/embeddings/pinecone/pinecone_db_handler.py | 1 - align_data/embeddings/pinecone/update_pinecone.py | 2 -- align_data/sources/articles/articles.py | 1 - align_data/sources/articles/google_cloud.py | 2 -- align_data/sources/articles/html.py | 1 - align_data/sources/articles/parsers.py | 1 - align_data/sources/articles/updater.py | 2 ++ align_data/sources/youtube/youtube.py | 1 - 9 files changed, 2 insertions(+), 10 deletions(-) diff --git a/align_data/common/alignment_dataset.py b/align_data/common/alignment_dataset.py index f77c21bc..5e35d9fd 100644 --- a/align_data/common/alignment_dataset.py +++ b/align_data/common/alignment_dataset.py @@ -20,7 +20,6 @@ from align_data.settings import ARTICLE_MAIN_KEYS from align_data.sources.utils import merge_dicts - logger = logging.getLogger(__name__) diff --git a/align_data/embeddings/pinecone/pinecone_db_handler.py b/align_data/embeddings/pinecone/pinecone_db_handler.py index b0b09b9f..3cf32112 100644 --- a/align_data/embeddings/pinecone/pinecone_db_handler.py +++ b/align_data/embeddings/pinecone/pinecone_db_handler.py @@ -19,7 +19,6 @@ PINECONE_NAMESPACE, ) - logger = logging.getLogger(__name__) diff --git a/align_data/embeddings/pinecone/update_pinecone.py b/align_data/embeddings/pinecone/update_pinecone.py index 321fe6b1..1f0ef775 100644 --- a/align_data/embeddings/pinecone/update_pinecone.py +++ b/align_data/embeddings/pinecone/update_pinecone.py @@ -22,10 +22,8 @@ ) from align_data.embeddings.text_splitter import ParagraphSentenceUnitTextSplitter - logger = logging.getLogger(__name__) - # Define type aliases for the Callables LengthFunctionType = Callable[[str], int] TruncateFunctionType = Callable[[str, int], str] diff --git a/align_data/sources/articles/articles.py b/align_data/sources/articles/articles.py index a6a343f6..4d497bb1 100644 --- a/align_data/sources/articles/articles.py +++ b/align_data/sources/articles/articles.py @@ -23,7 +23,6 @@ logger = logging.getLogger(__name__) - # Careful changing these - the sheets assume this ordering REQUIRED_FIELDS = ["url", "source_url", "title", "source_type", "date_published"] OPTIONAL_FIELDS = ["authors", "summary"] diff --git a/align_data/sources/articles/google_cloud.py b/align_data/sources/articles/google_cloud.py index ee6f2ddc..3a9f096c 100644 --- a/align_data/sources/articles/google_cloud.py +++ b/align_data/sources/articles/google_cloud.py @@ -22,13 +22,11 @@ logger = logging.getLogger(__name__) - SCOPES = [ "https://www.googleapis.com/auth/spreadsheets", "https://www.googleapis.com/auth/drive", ] - OK = "ok" OUTPUT_SPREADSHEET_ID = "1bg-6vL-I82CBRkxvWQs1-Ao0nTvHyfn4yns5MdlbCmY" # TODO: remove this sheet_name = "Sheet1" # TODO: remove this diff --git a/align_data/sources/articles/html.py b/align_data/sources/articles/html.py index 9c1c15a1..fb280c37 100644 --- a/align_data/sources/articles/html.py +++ b/align_data/sources/articles/html.py @@ -8,7 +8,6 @@ logger = logging.getLogger(__name__) - DEFAULT_HEADERS = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/113.0", } diff --git a/align_data/sources/articles/parsers.py b/align_data/sources/articles/parsers.py index 5dc1da3d..d5fe2578 100644 --- a/align_data/sources/articles/parsers.py +++ b/align_data/sources/articles/parsers.py @@ -10,7 +10,6 @@ from align_data.sources.arxiv_papers import fetch_arxiv from align_data.common.html_dataset import HTMLDataset - logger = logging.getLogger(__name__) ParserFunc = Callable[[str], Dict[str, Any]] diff --git a/align_data/sources/articles/updater.py b/align_data/sources/articles/updater.py index 5a5e56c9..d9dccaef 100644 --- a/align_data/sources/articles/updater.py +++ b/align_data/sources/articles/updater.py @@ -13,10 +13,12 @@ logger = logging.getLogger(__name__) + class Item(NamedTuple): updates: NamedTuple article: Article + @dataclass class ReplacerDataset(AlignmentDataset): csv_path: str | Path diff --git a/align_data/sources/youtube/youtube.py b/align_data/sources/youtube/youtube.py index 60d40c5a..608cd96d 100644 --- a/align_data/sources/youtube/youtube.py +++ b/align_data/sources/youtube/youtube.py @@ -13,7 +13,6 @@ from align_data.settings import YOUTUBE_API_KEY from align_data.common.alignment_dataset import AlignmentDataset - logger = logging.getLogger(__name__)