diff --git a/align_data/common/alignment_dataset.py b/align_data/common/alignment_dataset.py index f77c21b..5e35d9f 100644 --- a/align_data/common/alignment_dataset.py +++ b/align_data/common/alignment_dataset.py @@ -20,7 +20,6 @@ from align_data.settings import ARTICLE_MAIN_KEYS from align_data.sources.utils import merge_dicts - logger = logging.getLogger(__name__) diff --git a/align_data/embeddings/pinecone/pinecone_db_handler.py b/align_data/embeddings/pinecone/pinecone_db_handler.py index b0b09b9..3cf3211 100644 --- a/align_data/embeddings/pinecone/pinecone_db_handler.py +++ b/align_data/embeddings/pinecone/pinecone_db_handler.py @@ -19,7 +19,6 @@ PINECONE_NAMESPACE, ) - logger = logging.getLogger(__name__) diff --git a/align_data/embeddings/pinecone/update_pinecone.py b/align_data/embeddings/pinecone/update_pinecone.py index 321fe6b..1f0ef77 100644 --- a/align_data/embeddings/pinecone/update_pinecone.py +++ b/align_data/embeddings/pinecone/update_pinecone.py @@ -22,10 +22,8 @@ ) from align_data.embeddings.text_splitter import ParagraphSentenceUnitTextSplitter - logger = logging.getLogger(__name__) - # Define type aliases for the Callables LengthFunctionType = Callable[[str], int] TruncateFunctionType = Callable[[str, int], str] diff --git a/align_data/sources/articles/articles.py b/align_data/sources/articles/articles.py index a6a343f..4d497bb 100644 --- a/align_data/sources/articles/articles.py +++ b/align_data/sources/articles/articles.py @@ -23,7 +23,6 @@ logger = logging.getLogger(__name__) - # Careful changing these - the sheets assume this ordering REQUIRED_FIELDS = ["url", "source_url", "title", "source_type", "date_published"] OPTIONAL_FIELDS = ["authors", "summary"] diff --git a/align_data/sources/articles/google_cloud.py b/align_data/sources/articles/google_cloud.py index ee6f2dd..3a9f096 100644 --- a/align_data/sources/articles/google_cloud.py +++ b/align_data/sources/articles/google_cloud.py @@ -22,13 +22,11 @@ logger = logging.getLogger(__name__) - SCOPES = [ "https://www.googleapis.com/auth/spreadsheets", "https://www.googleapis.com/auth/drive", ] - OK = "ok" OUTPUT_SPREADSHEET_ID = "1bg-6vL-I82CBRkxvWQs1-Ao0nTvHyfn4yns5MdlbCmY" # TODO: remove this sheet_name = "Sheet1" # TODO: remove this diff --git a/align_data/sources/articles/html.py b/align_data/sources/articles/html.py index 9c1c15a..fb280c3 100644 --- a/align_data/sources/articles/html.py +++ b/align_data/sources/articles/html.py @@ -8,7 +8,6 @@ logger = logging.getLogger(__name__) - DEFAULT_HEADERS = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/113.0", } diff --git a/align_data/sources/articles/parsers.py b/align_data/sources/articles/parsers.py index 5dc1da3..d5fe257 100644 --- a/align_data/sources/articles/parsers.py +++ b/align_data/sources/articles/parsers.py @@ -10,7 +10,6 @@ from align_data.sources.arxiv_papers import fetch_arxiv from align_data.common.html_dataset import HTMLDataset - logger = logging.getLogger(__name__) ParserFunc = Callable[[str], Dict[str, Any]] diff --git a/align_data/sources/articles/updater.py b/align_data/sources/articles/updater.py index 5a5e56c..d9dccae 100644 --- a/align_data/sources/articles/updater.py +++ b/align_data/sources/articles/updater.py @@ -13,10 +13,12 @@ logger = logging.getLogger(__name__) + class Item(NamedTuple): updates: NamedTuple article: Article + @dataclass class ReplacerDataset(AlignmentDataset): csv_path: str | Path diff --git a/align_data/sources/youtube/youtube.py b/align_data/sources/youtube/youtube.py index 60d40c5..608cd96 100644 --- a/align_data/sources/youtube/youtube.py +++ b/align_data/sources/youtube/youtube.py @@ -13,7 +13,6 @@ from align_data.settings import YOUTUBE_API_KEY from align_data.common.alignment_dataset import AlignmentDataset - logger = logging.getLogger(__name__)