From a85f2f141387a9fa6fc4610064fc1395f52d693a Mon Sep 17 00:00:00 2001 From: Tamara Janina Fingerlin <90063506+TJaniF@users.noreply.github.com> Date: Fri, 13 Oct 2023 14:33:34 +0200 Subject: [PATCH] Docs update: retries & note about Dagbag error (#592) Add retries of 2 to code samples Add a note about the config variable to fix DagBag timeout errors --- README.rst | 2 +- dev/dags/basic_cosmos_dag.py | 1 + dev/dags/basic_cosmos_task_group.py | 1 + dev/dags/cosmos_manifest_example.py | 1 + dev/dags/cosmos_profile_mapping.py | 1 + dev/dags/cosmos_seed_dag.py | 4 ++-- dev/dags/dbt_docs.py | 1 + dev/dags/example_cosmos_python_models.py | 1 + dev/dags/example_model_version.py | 1 + dev/dags/example_virtualenv.py | 1 + dev/dags/user_defined_profile.py | 1 + docs/configuration/scheduling.rst | 2 +- docs/getting_started/astro.rst | 4 ++++ docs/getting_started/gcc.rst | 6 ++++++ docs/getting_started/kubernetes.rst | 2 +- docs/getting_started/mwaa.rst | 6 ++++++ docs/getting_started/open-source.rst | 6 ++++++ docs/index.rst | 3 ++- 18 files changed, 38 insertions(+), 6 deletions(-) diff --git a/README.rst b/README.rst index d75c4eec9..e4f69af63 100644 --- a/README.rst +++ b/README.rst @@ -61,7 +61,7 @@ You can render an Airflow Task Group using the ``DbtTaskGroup`` class. Here's an with DAG( dag_id="extract_dag", start_date=datetime(2022, 11, 27), - schedule="@daily", + schedule_interval="@daily", ): e1 = EmptyOperator(task_id="pre_dbt") diff --git a/dev/dags/basic_cosmos_dag.py b/dev/dags/basic_cosmos_dag.py index e7973e970..8bd49b0b3 100644 --- a/dev/dags/basic_cosmos_dag.py +++ b/dev/dags/basic_cosmos_dag.py @@ -37,5 +37,6 @@ start_date=datetime(2023, 1, 1), catchup=False, dag_id="basic_cosmos_dag", + default_args={"retries": 2}, ) # [END local_example] diff --git a/dev/dags/basic_cosmos_task_group.py b/dev/dags/basic_cosmos_task_group.py index 4685d6275..2f875b8c3 100644 --- a/dev/dags/basic_cosmos_task_group.py +++ b/dev/dags/basic_cosmos_task_group.py @@ -42,6 +42,7 @@ def basic_cosmos_task_group() -> None: ), operator_args={"install_deps": True}, profile_config=profile_config, + default_args={"retries": 2}, ) post_dbt = EmptyOperator(task_id="post_dbt") diff --git a/dev/dags/cosmos_manifest_example.py b/dev/dags/cosmos_manifest_example.py index ddf5f1b10..14ce42606 100644 --- a/dev/dags/cosmos_manifest_example.py +++ b/dev/dags/cosmos_manifest_example.py @@ -36,5 +36,6 @@ start_date=datetime(2023, 1, 1), catchup=False, dag_id="cosmos_manifest_example", + default_args={"retries": 2}, ) # [END local_example] diff --git a/dev/dags/cosmos_profile_mapping.py b/dev/dags/cosmos_profile_mapping.py index b3f16c9e6..33619a39d 100644 --- a/dev/dags/cosmos_profile_mapping.py +++ b/dev/dags/cosmos_profile_mapping.py @@ -41,6 +41,7 @@ def cosmos_profile_mapping() -> None: ), ), operator_args={"install_deps": True}, + default_args={"retries": 2}, ) post_dbt = EmptyOperator(task_id="post_dbt") diff --git a/dev/dags/cosmos_seed_dag.py b/dev/dags/cosmos_seed_dag.py index afdf56c72..cef84dd66 100644 --- a/dev/dags/cosmos_seed_dag.py +++ b/dev/dags/cosmos_seed_dag.py @@ -36,11 +36,11 @@ with DAG( dag_id="extract_dag", start_date=datetime(2022, 11, 27), - schedule="@daily", + schedule_interval="@daily", doc_md=__doc__, catchup=False, max_active_runs=1, - default_args={"owner": "01-EXTRACT"}, + default_args={"owner": "01-EXTRACT", "retries": 2}, ) as dag: with TaskGroup(group_id="drop_seeds_if_exist") as drop_seeds: for seed in ["raw_customers", "raw_payments", "raw_orders"]: diff --git a/dev/dags/dbt_docs.py b/dev/dags/dbt_docs.py index 1bb5fb1c5..1fcd1c341 100644 --- a/dev/dags/dbt_docs.py +++ b/dev/dags/dbt_docs.py @@ -66,6 +66,7 @@ def which_upload(): schedule_interval="@daily", doc_md=__doc__, catchup=False, + default_args={"retries": 2}, ) as dag: generate_dbt_docs_aws = DbtDocsS3Operator( task_id="generate_dbt_docs_aws", diff --git a/dev/dags/example_cosmos_python_models.py b/dev/dags/example_cosmos_python_models.py index 92012fe62..7d9a61465 100644 --- a/dev/dags/example_cosmos_python_models.py +++ b/dev/dags/example_cosmos_python_models.py @@ -48,5 +48,6 @@ start_date=datetime(2023, 1, 1), catchup=False, dag_id="example_cosmos_python_models", + default_args={"retries": 2}, ) # [END example_cosmos_python_models] diff --git a/dev/dags/example_model_version.py b/dev/dags/example_model_version.py index a4226a611..78f38647d 100644 --- a/dev/dags/example_model_version.py +++ b/dev/dags/example_model_version.py @@ -34,5 +34,6 @@ start_date=datetime(2023, 1, 1), catchup=False, dag_id="example_model_version", + default_args={"retries": 2}, ) # [END local_example] diff --git a/dev/dags/example_virtualenv.py b/dev/dags/example_virtualenv.py index e91c881cf..7b1368f8c 100644 --- a/dev/dags/example_virtualenv.py +++ b/dev/dags/example_virtualenv.py @@ -41,5 +41,6 @@ start_date=datetime(2023, 1, 1), catchup=False, dag_id="example_virtualenv", + default_args={"retries": 2}, ) # [END virtualenv_example] diff --git a/dev/dags/user_defined_profile.py b/dev/dags/user_defined_profile.py index 2624ebb4f..ab30cdb2f 100644 --- a/dev/dags/user_defined_profile.py +++ b/dev/dags/user_defined_profile.py @@ -36,6 +36,7 @@ def user_defined_profile() -> None: profiles_yml_filepath=PROFILES_FILE_PATH, ), operator_args={"append_env": True, "install_deps": True}, + default_args={"retries": 2}, ) post_dbt = EmptyOperator(task_id="post_dbt") diff --git a/docs/configuration/scheduling.rst b/docs/configuration/scheduling.rst index 738031848..de21f8495 100644 --- a/docs/configuration/scheduling.rst +++ b/docs/configuration/scheduling.rst @@ -50,7 +50,7 @@ Then, you can use Airflow's data-aware scheduling capabilities to schedule ``my_ project_two = DbtDag( # ... - schedule=[get_dbt_dataset("my_conn", "project_one", "my_model")], + schedule_interval=[get_dbt_dataset("my_conn", "project_one", "my_model")], dbt_project_name="project_two", ) diff --git a/docs/getting_started/astro.rst b/docs/getting_started/astro.rst index a3fa14577..c0bedc7e6 100644 --- a/docs/getting_started/astro.rst +++ b/docs/getting_started/astro.rst @@ -106,8 +106,12 @@ In your ``my_cosmos_dag.py`` file, import the ``DbtDag`` class from Cosmos and c start_date=datetime(2023, 1, 1), catchup=False, dag_id="my_cosmos_dag", + default_args={"retries": 2}, ) +.. note:: + In some cases, especially in larger dbt projects, you might run into a ``DagBag import timeout`` error. + This error can be resolved by increasing the value of the Airflow configuration `core.dagbag_import_timeout `_. Start your project ~~~~~~~~~~~~~~~~~~ diff --git a/docs/getting_started/gcc.rst b/docs/getting_started/gcc.rst index 00fa503a0..1ec056e84 100644 --- a/docs/getting_started/gcc.rst +++ b/docs/getting_started/gcc.rst @@ -75,4 +75,10 @@ Make sure to rename the ```` value below to your adapter's Python start_date=datetime(2023, 1, 1), catchup=False, dag_id="my_cosmos_dag", + default_args={"retries": 2}, ) + + +.. note:: + In some cases, especially in larger dbt projects, you might run into a ``DagBag import timeout`` error. + This error can be resolved by increasing the value of the Airflow configuration `core.dagbag_import_timeout `_. diff --git a/docs/getting_started/kubernetes.rst b/docs/getting_started/kubernetes.rst index 6d2368997..1ae918a9a 100644 --- a/docs/getting_started/kubernetes.rst +++ b/docs/getting_started/kubernetes.rst @@ -28,7 +28,7 @@ Additional KubernetesPodOperator parameters can be added on the operator_args pa For instance, -.. code-block:: text +.. code-block:: python run_models = DbtTaskGroup( profile_config=ProfileConfig( diff --git a/docs/getting_started/mwaa.rst b/docs/getting_started/mwaa.rst index 0a3fa400e..f7a569302 100644 --- a/docs/getting_started/mwaa.rst +++ b/docs/getting_started/mwaa.rst @@ -109,4 +109,10 @@ In your ``my_cosmos_dag.py`` file, import the ``DbtDag`` class from Cosmos and c start_date=datetime(2023, 1, 1), catchup=False, dag_id="my_cosmos_dag", + default_args={"retries": 2}, ) + + +.. note:: + In some cases, especially in larger dbt projects, you might run into a ``DagBag import timeout`` error. + This error can be resolved by increasing the value of the Airflow configuration `core.dagbag_import_timeout `_. diff --git a/docs/getting_started/open-source.rst b/docs/getting_started/open-source.rst index 2c44be4a6..ba9bbdb15 100644 --- a/docs/getting_started/open-source.rst +++ b/docs/getting_started/open-source.rst @@ -61,4 +61,10 @@ For example, if you wanted to put your dbt project in the ``/usr/local/airflow/d start_date=datetime(2023, 1, 1), catchup=False, dag_id="my_cosmos_dag", + default_args={"retries": 2}, ) + + +.. note:: + In some cases, especially in larger dbt projects, you might run into a ``DagBag import timeout`` error. + This error can be resolved by increasing the value of the Airflow configuration `core.dagbag_import_timeout `_. diff --git a/docs/index.rst b/docs/index.rst index b9768f621..3c61b645d 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -63,13 +63,14 @@ You can render an Airflow Task Group using the ``DbtTaskGroup`` class. Here's an with DAG( dag_id="extract_dag", start_date=datetime(2022, 11, 27), - schedule="@daily", + schedule_interval="@daily", ): e1 = EmptyOperator(task_id="pre_dbt") dbt_tg = DbtTaskGroup( project_config=ProjectConfig("jaffle_shop"), profile_config=profile_config, + default_args={"retries": 2}, ) e2 = EmptyOperator(task_id="post_dbt")