diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index f0d30cf..4b3d11d 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -175,7 +175,7 @@ jobs: uses: actions/checkout@v4 - name: Log in to the Container registry - uses: docker/login-action@v3.2.0 + uses: docker/login-action@v3.3.0 with: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} diff --git a/google_sheets/app.py b/google_sheets/app.py index 2971840..134d843 100644 --- a/google_sheets/app.py +++ b/google_sheets/app.py @@ -48,9 +48,7 @@ async def is_authenticated_for_ads(user_id: int) -> bool: async with get_db_connection() as db: data = await db.gauth.find_unique(where={"user_id": user_id}) - if not data: - return False - return True + return bool(data) # Route 1: Redirect to Google OAuth @@ -299,24 +297,26 @@ async def get_all_sheet_titles( return sheets -NEW_CAMPAIGN_MANDATORY_COLUMNS = ["Country", "Station From", "Station To"] +NEW_CAMPAIGN_MANDATORY_COLUMNS = [ + "Country", + "Station From", + "Station To", + "Final Url From", + "Final Url To", +] MANDATORY_AD_TEMPLATE_COLUMNS = [ - "Campaign", - "Ad Group", "Headline 1", "Headline 2", "Headline 3", "Description Line 1", "Description Line 2", - "Final Url", ] MANDATORY_KEYWORD_TEMPLATE_COLUMNS = [ - "Campaign", - "Ad Group", "Keyword", - "Criterion Type", - "Max CPC", + "Keyword Match Type", + "Level", + "Negative", ] @@ -328,40 +328,12 @@ def _validate_target_resource(target_resource: Optional[str]) -> None: ) -@app.post( - "/process-data", - description="Process data to generate new ads or keywords based on the template", -) async def process_data( - template_sheet_values: Annotated[ - Optional[GoogleSheetValues], - Body( - embed=True, - description="Template values to be used for generating new ads or keywords", - ), - ] = None, - new_campaign_sheet_values: Annotated[ - Optional[GoogleSheetValues], - Body( - embed=True, - description="New campaign values to be used for generating new ads or keywords", - ), - ] = None, - target_resource: Annotated[ - Optional[str], - Query( - description="The target resource to be updated. This can be 'ad' or 'keyword'" - ), - ] = None, + template_sheet_values: GoogleSheetValues, + new_campaign_sheet_values: GoogleSheetValues, + merged_campaigns_ad_groups_df: pd.DataFrame, + target_resource: str, ) -> GoogleSheetValues: - _check_parameters_are_not_none( - { - "template_sheet_values": template_sheet_values, - "new_campaign_sheet_values": new_campaign_sheet_values, - "target_resource": target_resource, - } - ) - _validate_target_resource(target_resource) if ( len(template_sheet_values.values) < 2 # type: ignore or len(new_campaign_sheet_values.values) < 2 # type: ignore @@ -408,7 +380,12 @@ async def process_data( status_code=status.HTTP_400_BAD_REQUEST, detail=validation_error_msg ) - processed_df = process_data_f(template_df, new_campaign_df) + processed_df = process_data_f( + merged_campaigns_ad_groups_df, + template_df, + new_campaign_df, + target_resource=target_resource, + ) validated_df = validate_output_data( processed_df, @@ -420,6 +397,43 @@ async def process_data( return GoogleSheetValues(values=values) +async def process_campaigns_and_ad_groups( + campaign_template_values: GoogleSheetValues, + ad_group_template_values: GoogleSheetValues, +) -> pd.DataFrame: + _check_parameters_are_not_none( + { + "campaign_template_values": campaign_template_values, + "ad_group_template_values": ad_group_template_values, + } + ) + if ( + len(campaign_template_values.values) < 2 # type: ignore + or len(ad_group_template_values.values) < 2 # type: ignore + ): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Both template campaigns and ad groups data should have at least two rows (header and data).", + ) + + try: + campaign_template_df = pd.DataFrame( + campaign_template_values.values[1:], # type: ignore + columns=campaign_template_values.values[0], # type: ignore + ) + ad_group_template_df = pd.DataFrame( + ad_group_template_values.values[1:], # type: ignore + columns=ad_group_template_values.values[0], # type: ignore + ) + except Exception as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Invalid data format. Please provide data in the correct format: {e}", + ) from e + + return pd.merge(campaign_template_df, ad_group_template_df, how="cross") + + @app.post( "/process-spreadsheet", description="Process data to generate new ads or keywords based on the template", @@ -432,10 +446,6 @@ async def process_spreadsheet( Optional[str], Query(description="ID of the Google Sheet with the template data"), ] = None, - template_sheet_title: Annotated[ - Optional[str], - Query(description="The title of the sheet with the template data"), - ] = None, new_campaign_spreadsheet_id: Annotated[ Optional[str], Query(description="ID of the Google Sheet with the new campaign data"), @@ -444,66 +454,95 @@ async def process_spreadsheet( Optional[str], Query(description="The title of the sheet with the new campaign data"), ] = None, - target_resource: Annotated[ - Optional[str], - Query( - description="The target resource to be updated, options: 'ad' or 'keyword'" - ), - ] = None, ) -> str: _check_parameters_are_not_none( { "template_spreadsheet_id": template_spreadsheet_id, - "template_sheet_title": template_sheet_title, "new_campaign_spreadsheet_id": new_campaign_spreadsheet_id, "new_campaign_sheet_title": new_campaign_sheet_title, - "target_resource": target_resource, } ) - _validate_target_resource(target_resource) - template_values = await get_sheet( - user_id=user_id, - spreadsheet_id=template_spreadsheet_id, - title=template_sheet_title, - ) new_campaign_values = await get_sheet( user_id=user_id, spreadsheet_id=new_campaign_spreadsheet_id, title=new_campaign_sheet_title, ) + try: + ads_template_values = await get_sheet( + user_id=user_id, + spreadsheet_id=template_spreadsheet_id, + title="Ads", + ) + keywords_template_values = await get_sheet( + user_id=user_id, + spreadsheet_id=template_spreadsheet_id, + title="Keywords", + ) + campaign_template_values = await get_sheet( + user_id=user_id, spreadsheet_id=template_spreadsheet_id, title="Campaigns" + ) + ad_group_template_values = await get_sheet( + user_id=user_id, spreadsheet_id=template_spreadsheet_id, title="Ad Groups" + ) + if not isinstance( + campaign_template_values, GoogleSheetValues + ) or not isinstance(ad_group_template_values, GoogleSheetValues): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"""Please provide Campaigns, Ad Groups, Ads and KEywords tables in the template spreadsheet with id '{template_spreadsheet_id}'""", + ) + + merged_campaigns_ad_groups_df = await process_campaigns_and_ad_groups( + campaign_template_values=campaign_template_values, + ad_group_template_values=ad_group_template_values, + ) + + except Exception as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"""Make sure tables 'Campaigns', 'Ad Groups', 'Ads' and 'Keywords' are present in the template spreadsheet with id '{template_spreadsheet_id}'.""", + ) from e - if not isinstance(template_values, GoogleSheetValues) or not isinstance( - new_campaign_values, GoogleSheetValues + if ( + not isinstance(ads_template_values, GoogleSheetValues) + or not isinstance(keywords_template_values, GoogleSheetValues) + or not isinstance(new_campaign_values, GoogleSheetValues) ): raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=f"""Invalid data format. -template_values: {template_values} +ads_template_values: {ads_template_values} + +keywords_template_values: {keywords_template_values} new_campaign_values: {new_campaign_values} Please provide data in the correct format.""", ) - processed_values = await process_data( - template_sheet_values=template_values, - new_campaign_sheet_values=new_campaign_values, - target_resource=target_resource, - ) + response = "" + for template_values, target_resource in zip( + [ads_template_values, keywords_template_values], ["ad", "keyword"] + ): + processed_values = await process_data( + template_sheet_values=template_values, + new_campaign_sheet_values=new_campaign_values, + merged_campaigns_ad_groups_df=merged_campaigns_ad_groups_df, + target_resource=target_resource, + ) - title = ( - f"Captn - {target_resource.capitalize()}s {datetime.now():%Y-%m-%d %H:%M:%S}" # type: ignore - ) - await create_sheet( - user_id=user_id, - spreadsheet_id=new_campaign_spreadsheet_id, - title=title, - ) - await update_sheet( - user_id=user_id, - spreadsheet_id=new_campaign_spreadsheet_id, - title=title, - sheet_values=processed_values, - ) + title = f"Captn - {target_resource.capitalize()}s {datetime.now():%Y-%m-%d %H:%M:%S}" # type: ignore + await create_sheet( + user_id=user_id, + spreadsheet_id=new_campaign_spreadsheet_id, + title=title, + ) + await update_sheet( + user_id=user_id, + spreadsheet_id=new_campaign_spreadsheet_id, + title=title, + sheet_values=processed_values, + ) + response += f"Sheet with the name '{title}' has been created successfully.\n" - return f"Sheet with the name 'Captn - {target_resource.capitalize()}s' has been created successfully." # type: ignore + return response diff --git a/google_sheets/data_processing/processing.py b/google_sheets/data_processing/processing.py index fe50755..4c97ac0 100644 --- a/google_sheets/data_processing/processing.py +++ b/google_sheets/data_processing/processing.py @@ -1,4 +1,4 @@ -from typing import List, Literal +from typing import List, Literal, Optional import pandas as pd @@ -24,15 +24,20 @@ def validate_input_data( INSERT_STATION_FROM = "INSERT_STATION_FROM" INSERT_STATION_TO = "INSERT_STATION_TO" +INSERT_COUNTRY = "INSERT_COUNTRY" +INSERT_CRITERION_TYPE = "INSERT_CRITERION_TYPE" def process_data_f( - template_df: pd.DataFrame, new_campaign_df: pd.DataFrame + merged_campaigns_ad_groups_df: pd.DataFrame, + template_df: pd.DataFrame, + new_campaign_df: pd.DataFrame, + target_resource: Optional[str] = None, ) -> pd.DataFrame: + template_df = pd.merge(merged_campaigns_ad_groups_df, template_df, how="cross") final_df = pd.DataFrame(columns=template_df.columns) for _, template_row in template_df.iterrows(): for _, new_campaign_row in new_campaign_df.iterrows(): - campaign = f"{new_campaign_row['Country']} - {new_campaign_row['Station From']} - {new_campaign_row['Station To']}" stations = [ { "Station From": new_campaign_row["Station From"], @@ -44,23 +49,58 @@ def process_data_f( "Station To": new_campaign_row["Station From"], }, ] + if target_resource == "ad": + stations[0]["Final Url"] = new_campaign_row["Final Url From"] + stations[1]["Final Url"] = new_campaign_row["Final Url To"] + for station in stations: new_row = template_row.copy() - new_row["Campaign"] = campaign - new_row["Ad Group"] = ( - f"{station['Station From']} - {station['Station To']}" + new_row["Campaign Name"] = new_row["Campaign Name"].replace( + INSERT_COUNTRY, new_campaign_row["Country"] + ) + new_row["Campaign Name"] = new_row["Campaign Name"].replace( + INSERT_STATION_FROM, new_campaign_row["Station From"] + ) + new_row["Campaign Name"] = new_row["Campaign Name"].replace( + INSERT_STATION_TO, new_campaign_row["Station To"] + ) + + new_row["Ad Group Name"] = new_row["Ad Group Name"].replace( + INSERT_CRITERION_TYPE, new_row["Match Type"] ) - # Replace the placeholders in all columns with the actual station names INSERT_STATION_FROM + new_row = new_row.str.replace( + INSERT_COUNTRY, new_campaign_row["Country"] + ) new_row = new_row.str.replace( INSERT_STATION_FROM, station["Station From"] ) new_row = new_row.str.replace(INSERT_STATION_TO, station["Station To"]) + if target_resource == "ad": + new_row["Final URL"] = station["Final Url"] + elif ( + target_resource == "keyword" + and new_row["Negative"] + and new_row["Negative"].lower() == "true" + ): + new_row["Match Type"] = new_row["Keyword Match Type"] + + if new_row["Level"] == "Campaign": + new_row["Ad Group Name"] = None + final_df = pd.concat( [final_df, pd.DataFrame([new_row])], ignore_index=True ) + if target_resource == "keyword": + final_df = final_df.drop(columns=["Keyword Match Type"]) + final_df = final_df.drop_duplicates(ignore_index=True) + + final_df = final_df.sort_values( + by=["Campaign Name", "Ad Group Name"], ignore_index=True + ) + return final_df @@ -72,6 +112,8 @@ def process_data_f( MAX_HEADLINE_LENGTH = 30 MAX_DESCRIPTION_LENGTH = 90 +MAX_PATH_LENGTH = 15 + def _validate_output_data_ad(df: pd.DataFrame) -> pd.DataFrame: # noqa: C901 df["Issues"] = "" @@ -125,9 +167,14 @@ def _validate_output_data_ad(df: pd.DataFrame) -> pd.DataFrame: # noqa: C901 f"Description length should be less than {MAX_DESCRIPTION_LENGTH} characters, found {len(description)} in column {description_column}.\n" ) - # TODO: Check for the final URL - # if not row["Final URL"]: - # df.loc[index, "Issues"] += "Final URL is missing.\n" + for path in ["Path 1", "Path 2"]: + if row[path] and len(row[path]) > MAX_PATH_LENGTH: + df.loc[index, "Issues"] += ( + f"{path} length should be less than {MAX_PATH_LENGTH} characters, found {len(row[path])}.\n" + ) + + if not row["Final URL"]: + df.loc[index, "Issues"] += "Final URL is missing.\n" if not df["Issues"].any(): df = df.drop(columns=["Issues"]) diff --git a/google_sheets/google_api/service.py b/google_sheets/google_api/service.py index 35ccc5e..bf045a7 100644 --- a/google_sheets/google_api/service.py +++ b/google_sheets/google_api/service.py @@ -71,8 +71,14 @@ def get_files_f(service: Any) -> List[Dict[str, str]]: def get_sheet_f(service: Any, spreadsheet_id: str, range: str) -> Any: # Call the Sheets API sheet = service.spreadsheets() - result = sheet.values().get(spreadsheetId=spreadsheet_id, range=range).execute() - values = result.get("values", []) + try: + result = sheet.values().get(spreadsheetId=spreadsheet_id, range=range).execute() + values = result.get("values", []) + except Exception as e: + raise HTTPException( + status_code=404, + detail=f"Unable to read from spreadsheet with id '{spreadsheet_id}', and range '{range}'", + ) from e return values diff --git a/pyproject.toml b/pyproject.toml index 4ff95d2..f3a05d3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,7 +47,7 @@ dependencies = [ "pydantic>=2.3,<3", "fastapi>=0.110.2", "prisma==0.13.1", - "google-api-python-client==2.133.0", + "google-api-python-client==2.137.0", "asyncify==0.10.0", "pandas==2.2.2" ] @@ -56,7 +56,7 @@ dependencies = [ # public distributions server = [ - "uvicorn==0.30.1", + "uvicorn==0.30.3", ] # dev dependencies @@ -66,16 +66,16 @@ lint = [ "types-ujson", "types-Pygments", "types-docutils", - "mypy==1.10.0", - "ruff==0.4.10", + "mypy==1.11.0", + "ruff==0.5.4", "pyupgrade-directories==0.3.0", "bandit==1.7.9", - "semgrep==1.77.0", + "semgrep==1.80.0", "pytest-mypy-plugins==3.1.2", ] test-core = [ - "coverage[toml]==7.5.4", + "coverage[toml]==7.6.0", "pytest==8.2.2", "pytest-asyncio==0.23.7", ] diff --git a/tests/app/test_app.py b/tests/app/test_app.py index f60b806..a3401e9 100644 --- a/tests/app/test_app.py +++ b/tests/app/test_app.py @@ -1,12 +1,13 @@ from typing import Any, Dict, Optional, Union from unittest.mock import MagicMock, patch +import pandas as pd import pytest from fastapi import HTTPException from fastapi.testclient import TestClient from googleapiclient.errors import HttpError -from google_sheets.app import _check_parameters_are_not_none, app +from google_sheets.app import _check_parameters_are_not_none, app, process_data from google_sheets.model import GoogleSheetValues client = TestClient(app) @@ -167,107 +168,165 @@ def test_get_all_file_names(self) -> None: class TestProcessData: @pytest.mark.parametrize( - ("template_sheet_values", "new_campaign_sheet_values", "status_code", "detail"), + ("template_sheet_values", "new_campaign_sheet_values", "detail"), [ ( GoogleSheetValues( values=[ - ["Campaign", "Ad Group", "Keyword"], + ["Keyword"], ] ), GoogleSheetValues( values=[ - ["Country", "Station From", "Station To"], - ["India", "Delhi", "Mumbai"], + [ + "Country", + "Station From", + "Station To", + "Final Url From", + "Final Url To", + ], + [ + "India", + "Delhi", + "Mumbai", + "https://www.example.com/from", + "https://www.example.com/to", + ], ] ), - 400, "Both template and new campaign data should have at least two rows", ), ( GoogleSheetValues( values=[ - ["Campaign", "Ad Group", "Keyword"], - ["Campaign A", "Ad group A", "Keyword A"], + ["Fake column"], + ["fake"], ] ), GoogleSheetValues( values=[ - ["Country", "Station From", "Station To"], - ["India", "Delhi", "Mumbai"], + [ + "Country", + "Station From", + "Station To", + "Final Url From", + "Final Url To", + ], + [ + "India", + "Delhi", + "Mumbai", + "https://www.example.com/from", + "https://www.example.com/to", + ], ] ), - 400, "Mandatory columns missing in the keyword template data.", ), ( GoogleSheetValues( values=[ [ - "Campaign", - "Ad Group", "Keyword", - "Criterion Type", - "Max CPC", + "Keyword Match Type", + "Level", + "Negative", ], - ["Campaign A", "Ad group A", "Keyword A", "Exact", "1"], + ["Keyword A", "Exact", None, "False"], + ["Keyword N", "Broad", "Campaign", "True"], ] ), GoogleSheetValues( values=[ - ["Country", "Station From", "Station To"], - ["India", "Delhi", "Mumbai"], + [ + "Country", + "Station From", + "Station To", + "Final Url From", + "Final Url To", + ], + [ + "India", + "Delhi", + "Mumbai", + "https://www.example.com/from", + "https://www.example.com/to", + ], ] ), - 200, GoogleSheetValues( values=[ [ - "Campaign", - "Ad Group", + "Campaign Name", + "Ad Group Name", + "Match Type", "Keyword", - "Criterion Type", - "Max CPC", + "Level", + "Negative", ], [ "India - Delhi - Mumbai", "Delhi - Mumbai", - "Keyword A", "Exact", - "1", + "Keyword A", + None, + "False", ], [ "India - Delhi - Mumbai", "Mumbai - Delhi", - "Keyword A", "Exact", - "1", + "Keyword A", + None, + "False", + ], + [ + "India - Delhi - Mumbai", + None, + "Broad", + "Keyword N", + "Campaign", + "True", ], ], ), ), ], ) - def test_process_data( + @pytest.mark.asyncio() + async def test_process_data( self, template_sheet_values: GoogleSheetValues, new_campaign_sheet_values: GoogleSheetValues, - status_code: int, detail: Union[str, GoogleSheetValues], ) -> None: - response = client.post( - "/process-data?target_resource=keyword", - json={ - "template_sheet_values": template_sheet_values.model_dump(), - "new_campaign_sheet_values": new_campaign_sheet_values.model_dump(), - }, + merged_campaigns_ad_groups_df = pd.DataFrame( + { + "Campaign Name": [ + "INSERT_COUNTRY - INSERT_STATION_FROM - INSERT_STATION_TO" + ], + "Ad Group Name": ["INSERT_STATION_FROM - INSERT_STATION_TO"], + "Match Type": ["Exact"], + } ) - - assert response.status_code == status_code if isinstance(detail, GoogleSheetValues): - assert response.json() == detail.model_dump() + processed_data = await process_data( + template_sheet_values=template_sheet_values, + new_campaign_sheet_values=new_campaign_sheet_values, + merged_campaigns_ad_groups_df=merged_campaigns_ad_groups_df, + target_resource="keyword", + ) + assert processed_data.model_dump() == detail.model_dump() + else: - assert detail in response.json()["detail"] + with pytest.raises(HTTPException) as exc: + await process_data( + template_sheet_values=template_sheet_values, + new_campaign_sheet_values=new_campaign_sheet_values, + merged_campaigns_ad_groups_df=merged_campaigns_ad_groups_df, + target_resource="keyword", + ) + assert detail in exc.value.detail class TestOpenAPIJSON: @@ -284,7 +343,6 @@ def test_openapi(self) -> None: "/create-sheet", "/get-all-file-names", "/get-all-sheet-titles", - "/process-data", "/process-spreadsheet", ] diff --git a/tests/data_processing/test_processing.py b/tests/data_processing/test_processing.py index 089a397..f1cc9e6 100644 --- a/tests/data_processing/test_processing.py +++ b/tests/data_processing/test_processing.py @@ -51,13 +51,20 @@ def test_validate_input_data(df: pd.DataFrame, expected: str) -> None: @pytest.mark.parametrize( - ("template_df", "new_campaign_df", "expected"), + ("merged_campaigns_ad_groups_df", "template_df", "new_campaign_df", "expected"), [ ( pd.DataFrame( { - "Campaign": ["", ""], - "Ad Group": ["", ""], + "Campaign Name": [ + "INSERT_COUNTRY - INSERT_STATION_FROM - INSERT_STATION_TO" + ], + "Ad Group Name": ["INSERT_STATION_FROM - INSERT_STATION_TO"], + "Match Type": ["Exact"], + } + ), + pd.DataFrame( + { "Keyword": ["k1", "k2"], "Max CPC": ["", ""], } @@ -99,8 +106,15 @@ def test_validate_input_data(df: pd.DataFrame, expected: str) -> None: ( pd.DataFrame( { - "Campaign": ["", ""], - "Ad Group": ["", ""], + "Campaign Name": [ + "INSERT_COUNTRY - INSERT_STATION_FROM - INSERT_STATION_TO" + ], + "Ad Group Name": ["INSERT_STATION_FROM - INSERT_STATION_TO"], + "Match Type": ["Exact"], + } + ), + pd.DataFrame( + { "Keyword": ["k1 INSERT_STATION_FROM", "k2"], "Max CPC": ["", ""], } @@ -142,9 +156,14 @@ def test_validate_input_data(df: pd.DataFrame, expected: str) -> None: ], ) def test_process_data_f( - template_df: pd.DataFrame, new_campaign_df: pd.DataFrame, expected: List[List[str]] + merged_campaigns_ad_groups_df: pd.DataFrame, + template_df: pd.DataFrame, + new_campaign_df: pd.DataFrame, + expected: List[List[str]], ) -> None: - process_data_f(template_df, new_campaign_df).equals(expected) + process_data_f(merged_campaigns_ad_groups_df, template_df, new_campaign_df).equals( + expected + ) @pytest.mark.parametrize( @@ -167,6 +186,9 @@ def test_validate_output_data(issues_column: Optional[List[str]]) -> None: "Headline 3": ["H3", "H3", "H3", ""], "Description 1": ["D1", "D1", "D2", "D3"], "Description 2": ["D1", "D1", "D3", ""], + "Path 1": ["P1", "P1", "P1", "P1"], + "Path 2": ["P2", "P2", "P2", "P2"], + "Final URL": ["URL", "URL", "URL", "URL"], } ) result = validate_output_data(df, "ad")