diff --git a/.dockerignore b/.dockerignore index 75d9be8960a3..dba7378a3b77 100644 --- a/.dockerignore +++ b/.dockerignore @@ -54,7 +54,6 @@ !Dockerfile !hatch_build.py !prod_image_installed_providers.txt -!airflow_pre_installed_providers.txt # This folder is for you if you want to add any packages to the docker context when you build your own # docker image. most of other files and any new folder you add will be excluded by default diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4fe8be8dc0dd..e657f871271f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -432,26 +432,26 @@ repos: additional_dependencies: ['setuptools', 'rich>=12.4.4', 'pyyaml', 'tomli'] - id: check-extra-packages-references name: Checks setup extra packages - description: Checks if all the extras defined in pyproject.toml are listed in extra-packages-ref.rst file + description: Checks if all the extras defined in hatch_build.py are listed in extra-packages-ref.rst file language: python - files: ^docs/apache-airflow/extra-packages-ref\.rst$|^pyproject.toml + files: ^docs/apache-airflow/extra-packages-ref\.rst$|^hatch_build.py pass_filenames: false entry: ./scripts/ci/pre_commit/pre_commit_check_extra_packages_ref.py - additional_dependencies: ['rich>=12.4.4', 'tomli', 'tabulate'] - - id: check-pyproject-toml-order - name: Check order of dependencies in pyproject.toml + additional_dependencies: ['rich>=12.4.4', 'hatchling==1.22.4', 'tabulate'] + - id: check-hatch-build-order + name: Check order of dependencies in hatch_build.py language: python - files: ^pyproject\.toml$ + files: ^hatch_build.py$ pass_filenames: false - entry: ./scripts/ci/pre_commit/pre_commit_check_order_pyproject_toml.py - additional_dependencies: ['rich>=12.4.4'] + entry: ./scripts/ci/pre_commit/pre_commit_check_order_hatch_build.py + additional_dependencies: ['rich>=12.4.4', 'hatchling==1.22.4'] - id: update-extras name: Update extras in documentation entry: ./scripts/ci/pre_commit/pre_commit_insert_extras.py language: python files: ^contributing-docs/12_airflow_dependencies_and_extras.rst$|^INSTALL$|^airflow/providers/.*/provider\.yaml$|^Dockerfile.* pass_filenames: false - additional_dependencies: ['rich>=12.4.4', 'tomli'] + additional_dependencies: ['rich>=12.4.4', 'hatchling==1.22.4'] - id: check-extras-order name: Check order of extras in Dockerfile entry: ./scripts/ci/pre_commit/pre_commit_check_order_dockerfile_extras.py diff --git a/Dockerfile b/Dockerfile index 10fc939093f6..1f72714477b3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -455,13 +455,17 @@ function install_airflow_dependencies_from_branch_tip() { if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,} fi + local TEMP_AIRFLOW_DIR + TEMP_AIRFLOW_DIR=$(mktemp -d) # Install latest set of dependencies - without constraints. This is to download a "base" set of # dependencies that we can cache and reuse when installing airflow using constraints and latest # pyproject.toml in the next step (when we install regular airflow). set -x - ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - "apache-airflow[${AIRFLOW_EXTRAS}] @ https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz" + curl -fsSL "https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz" | \ + tar xvz -C "${TEMP_AIRFLOW_DIR}" --strip 1 + # Make sure editable dependencies are calculated when devel-ci dependencies are installed + ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${ADDITIONAL_PIP_INSTALL_FLAGS} \ + --editable "${TEMP_AIRFLOW_DIR}[${AIRFLOW_EXTRAS}]" set +x common::install_packaging_tools set -x @@ -477,6 +481,7 @@ function install_airflow_dependencies_from_branch_tip() { set +x ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} apache-airflow set -x + rm -rvf "${TEMP_AIRFLOW_DIR}" # If you want to make sure dependency is removed from cache in your PR when you removed it from # pyproject.toml - please add your dependency here as a list of strings # for example: diff --git a/Dockerfile.ci b/Dockerfile.ci index d52e8909468c..03c5bbb4e736 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -402,13 +402,17 @@ function install_airflow_dependencies_from_branch_tip() { if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,} fi + local TEMP_AIRFLOW_DIR + TEMP_AIRFLOW_DIR=$(mktemp -d) # Install latest set of dependencies - without constraints. This is to download a "base" set of # dependencies that we can cache and reuse when installing airflow using constraints and latest # pyproject.toml in the next step (when we install regular airflow). set -x - ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - "apache-airflow[${AIRFLOW_EXTRAS}] @ https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz" + curl -fsSL "https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz" | \ + tar xvz -C "${TEMP_AIRFLOW_DIR}" --strip 1 + # Make sure editable dependencies are calculated when devel-ci dependencies are installed + ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${ADDITIONAL_PIP_INSTALL_FLAGS} \ + --editable "${TEMP_AIRFLOW_DIR}[${AIRFLOW_EXTRAS}]" set +x common::install_packaging_tools set -x @@ -424,6 +428,7 @@ function install_airflow_dependencies_from_branch_tip() { set +x ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} apache-airflow set -x + rm -rvf "${TEMP_AIRFLOW_DIR}" # If you want to make sure dependency is removed from cache in your PR when you removed it from # pyproject.toml - please add your dependency here as a list of strings # for example: @@ -1309,7 +1314,6 @@ COPY airflow/__init__.py ${AIRFLOW_SOURCES}/airflow/ COPY generated/* ${AIRFLOW_SOURCES}/generated/ COPY constraints/* ${AIRFLOW_SOURCES}/constraints/ COPY LICENSE ${AIRFLOW_SOURCES}/LICENSE -COPY airflow_pre_installed_providers.txt ${AIRFLOW_SOURCES}/ COPY hatch_build.py ${AIRFLOW_SOURCES}/ COPY --from=scripts install_airflow.sh /scripts/docker/ diff --git a/INSTALL b/INSTALL index a1e2034b6993..38434d9192d4 100644 --- a/INSTALL +++ b/INSTALL @@ -1,6 +1,7 @@ -# INSTALL / BUILD instructions for Apache Airflow +INSTALL / BUILD instructions for Apache Airflow -## Basic installation of Airflow from sources and development environment setup +Basic installation of Airflow from sources and development environment setup +============================================================================ This is a generic installation method that requires minimum starndard tools to develop airflow and test it in local virtual environment (using standard CPyhon installation and `pip`). @@ -23,7 +24,18 @@ MacOS (Mojave/Catalina) you might need to to install XCode command line tools an brew install sqlite mysql postgresql -## Downloading and installing Airflow from sources +The `pip` is one of the build packaging front-ends that might be used to install Airflow. It's the one +that we recommend (see below) for reproducible installation of specific versions of Airflow. + +As of version 2.8 Airflow follows PEP 517/518 and uses `pyproject.toml` file to define build dependencies +and build process and it requires relatively modern versions of packaging tools to get airflow built from +local sources or sdist packages, as PEP 517 compliant build hooks are used to determine dynamic build +dependencies. In case of `pip` it means that at least version 22.1.0 is needed (released at the beginning of +2022) to build or install Airflow from sources. This does not affect the ability of installing Airflow from +released wheel packages. + +Downloading and installing Airflow from sources +----------------------------------------------- While you can get Airflow sources in various ways (including cloning https://github.com/apache/airflow/), the canonical way to download it is to fetch the tarball published at https://downloads.apache.org where you can @@ -95,7 +107,8 @@ Airflow project contains some pre-defined virtualenv definitions in ``pyproject. easily used by hatch to create your local venvs. This is not necessary for you to develop and test Airflow, but it is a convenient way to manage your local Python versions and virtualenvs. -## Installing Hatch +Installing Hatch +---------------- You can install hat using various other ways (including Gui installers). @@ -128,19 +141,21 @@ You can see the list of available envs with: This is what it shows currently: -┏━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ -┃ Name ┃ Type ┃ Features ┃ Description ┃ -┡━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ -│ default │ virtual │ devel │ Default environment with Python 3.8 for maximum compatibility │ -├─────────────┼─────────┼──────────┼───────────────────────────────────────────────────────────────┤ -│ airflow-38 │ virtual │ │ Environment with Python 3.8. No devel installed. │ -├─────────────┼─────────┼──────────┼───────────────────────────────────────────────────────────────┤ -│ airflow-39 │ virtual │ │ Environment with Python 3.9. No devel installed. │ -├─────────────┼─────────┼──────────┼───────────────────────────────────────────────────────────────┤ -│ airflow-310 │ virtual │ │ Environment with Python 3.10. No devel installed. │ -├─────────────┼─────────┼──────────┼───────────────────────────────────────────────────────────────┤ -│ airflow-311 │ virtual │ │ Environment with Python 3.11. No devel installed │ -└─────────────┴─────────┴──────────┴───────────────────────────────────────────────────────────────┘ +┏━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +┃ Name ┃ Type ┃ Description ┃ +┡━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ +│ default │ virtual │ Default environment with Python 3.8 for maximum compatibility │ +├─────────────┼─────────┼───────────────────────────────────────────────────────────────┤ +│ airflow-38 │ virtual │ Environment with Python 3.8. No devel installed. │ +├─────────────┼─────────┼───────────────────────────────────────────────────────────────┤ +│ airflow-39 │ virtual │ Environment with Python 3.9. No devel installed. │ +├─────────────┼─────────┼───────────────────────────────────────────────────────────────┤ +│ airflow-310 │ virtual │ Environment with Python 3.10. No devel installed. │ +├─────────────┼─────────┼───────────────────────────────────────────────────────────────┤ +│ airflow-311 │ virtual │ Environment with Python 3.11. No devel installed │ +├─────────────┼─────────┼───────────────────────────────────────────────────────────────┤ +│ airflow-312 │ virtual │ Environment with Python 3.11. No devel installed │ +└─────────────┴─────────┴───────────────────────────────────────────────────────────────┘ The default env (if you have not used one explicitly) is `default` and it is a Python 3.8 virtualenv for maximum compatibility with `devel` extra installed - this devel extra contains the minimum set @@ -229,7 +244,8 @@ and install to latest supported ones by pure airflow core. pip install -e ".[devel]" \ --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-no-providers-3.8.txt" -## All airflow extras +Airflow extras +============== Airflow has a number of extras that you can install to get additional dependencies. They sometimes install providers, sometimes enable other features where packages are not installed by default. @@ -239,36 +255,69 @@ https://airflow.apache.org/docs/apache-airflow/stable/extra-packages-ref.html The list of available extras is below. -Regular extras that are available for users in the Airflow package. +Core extras +----------- + +Those extras are available as regular core airflow extras - they install optional features of Airflow. + +# START CORE EXTRAS HERE + +aiobotocore, apache-atlas, apache-webhdfs, async, cgroups, deprecated-api, github-enterprise, +google-auth, graphviz, kerberos, ldap, leveldb, otel, pandas, password, pydantic, rabbitmq, s3fs, +saml, sentry, statsd, uv, virtualenv + +# END CORE EXTRAS HERE -# START REGULAR EXTRAS HERE +Provider extras +--------------- -aiobotocore, airbyte, alibaba, all, all-core, all-dbs, amazon, apache-atlas, apache-beam, apache- -cassandra, apache-drill, apache-druid, apache-flink, apache-hdfs, apache-hive, apache-impala, -apache-kafka, apache-kylin, apache-livy, apache-pig, apache-pinot, apache-spark, apache-webhdfs, -apprise, arangodb, asana, async, atlas, atlassian-jira, aws, azure, cassandra, celery, cgroups, -cloudant, cncf-kubernetes, cohere, common-io, common-sql, crypto, databricks, datadog, dbt-cloud, -deprecated-api, dingding, discord, docker, druid, elasticsearch, exasol, fab, facebook, ftp, gcp, -gcp_api, github, github-enterprise, google, google-auth, graphviz, grpc, hashicorp, hdfs, hive, -http, imap, influxdb, jdbc, jenkins, kerberos, kubernetes, ldap, leveldb, microsoft-azure, -microsoft-mssql, microsoft-psrp, microsoft-winrm, mongo, mssql, mysql, neo4j, odbc, openai, -openfaas, openlineage, opensearch, opsgenie, oracle, otel, pagerduty, pandas, papermill, password, -pgvector, pinecone, pinot, postgres, presto, pydantic, qdrant, rabbitmq, redis, s3, s3fs, -salesforce, samba, saml, segment, sendgrid, sentry, sftp, singularity, slack, smtp, snowflake, -spark, sqlite, ssh, statsd, tableau, tabular, telegram, teradata, trino, uv, vertica, virtualenv, -weaviate, webhdfs, winrm, yandex, zendesk +Those extras are available as regular Airflow extras, they install provider packages in standard builds +or dependencies that are necessary to enable the feature in editable build. -# END REGULAR EXTRAS HERE +# START PROVIDER EXTRAS HERE -Devel extras - used to install development-related tools. Only available during editable install. +airbyte, alibaba, amazon, apache.beam, apache.cassandra, apache.drill, apache.druid, apache.flink, +apache.hdfs, apache.hive, apache.impala, apache.kafka, apache.kylin, apache.livy, apache.pig, +apache.pinot, apache.spark, apprise, arangodb, asana, atlassian.jira, celery, cloudant, +cncf.kubernetes, cohere, common.io, common.sql, databricks, datadog, dbt.cloud, dingding, discord, +docker, elasticsearch, exasol, fab, facebook, ftp, github, google, grpc, hashicorp, http, imap, +influxdb, jdbc, jenkins, microsoft.azure, microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo, +mysql, neo4j, odbc, openai, openfaas, openlineage, opensearch, opsgenie, oracle, pagerduty, +papermill, pgvector, pinecone, postgres, presto, qdrant, redis, salesforce, samba, segment, +sendgrid, sftp, singularity, slack, smtp, snowflake, sqlite, ssh, tableau, tabular, telegram, +teradata, trino, vertica, weaviate, yandex, zendesk + +# END PROVIDER EXTRAS HERE + +Devel extras +------------ + +The `devel` extras are not available in the released packages. They are only available when you install +Airflow from sources in `editable` installation - i.e. one that you are usually using to contribute to +Airflow. They provide tools such as `pytest` and `mypy` for general purpose development and testing. # START DEVEL EXTRAS HERE -devel, devel-all, devel-all-dbs, devel-ci, devel-debuggers, devel-devscripts, devel-duckdb, devel- -hadoop, devel-mypy, devel-sentry, devel-static-checks, devel-tests +devel, devel-all-dbs, devel-ci, devel-debuggers, devel-devscripts, devel-duckdb, devel-hadoop, +devel-mypy, devel-sentry, devel-static-checks, devel-tests # END DEVEL EXTRAS HERE +Bundle extras +------------- + +Those extras are bundles dynamically generated from other extras. + +# START BUNDLE EXTRAS HERE + +all, all-core, all-dbs, devel-all, devel-ci + +# END BUNDLE EXTRAS HERE + + +Doc extras +---------- + Doc extras - used to install dependencies that are needed to build documentation. Only available during editable install. @@ -278,7 +327,20 @@ doc, doc-gen # END DOC EXTRAS HERE -## Compiling front end assets +Deprecated extras +----------------- + +The `deprecated` extras are deprecated extras from Airflow 1 that will be removed in future versions. + +# START DEPRECATED EXTRAS HERE + +atlas, aws, azure, cassandra, crypto, druid, gcp, gcp-api, hdfs, hive, kubernetes, mssql, pinot, s3, +spark, webhdfs, winrm + +# END DEPRECATED EXTRAS HERE + +Compiling front end assets +-------------------------- Sometimes you can see that front-end assets are missing and website looks broken. This is because you need to compile front-end assets. This is done automatically when you create a virtualenv diff --git a/airflow_pre_installed_providers.txt b/airflow_pre_installed_providers.txt index 2d38abfce119..8ea073266ae6 100644 --- a/airflow_pre_installed_providers.txt +++ b/airflow_pre_installed_providers.txt @@ -1,7 +1,7 @@ # List of all the providers that are pre-installed when you run `pip install apache-airflow` without extras common.io common.sql -fab>=1.0.2dev0 +fab>=1.0.2dev1 ftp http imap diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml index 59022ef9aaa1..cda4de256d09 100644 --- a/clients/python/pyproject.toml +++ b/clients/python/pyproject.toml @@ -16,7 +16,7 @@ # under the License. [build-system] -requires = ["hatchling"] +requires = ["hatchling==1.22.4"] build-backend = "hatchling.build" [project] diff --git a/contributing-docs/07_local_virtualenv.rst b/contributing-docs/07_local_virtualenv.rst index a7a984e47cca..373b6d76aaf5 100644 --- a/contributing-docs/07_local_virtualenv.rst +++ b/contributing-docs/07_local_virtualenv.rst @@ -51,6 +51,16 @@ of required packages. - MacOs with ARM architectures require graphviz for venv setup, refer `here `_ to install graphviz - The helm chart tests need helm to be installed as a pre requisite. Refer `here `_ to install and setup helm +.. note:: + + As of version 2.8 Airflow follows PEP 517/518 and uses ``pyproject.toml`` file to define build dependencies + and build process and it requires relatively modern versions of packaging tools to get airflow built from + local sources or ``sdist`` packages, as PEP 517 compliant build hooks are used to determine dynamic build + dependencies. In case of ``pip`` it means that at least version 22.1.0 is needed (released at the beginning of + 2022) to build or install Airflow from sources. This does not affect the ability of installing Airflow from + released wheel packages. + + Installing Airflow .................. @@ -173,26 +183,31 @@ You can see the list of available envs with: This is what it shows currently: -+-------------+---------+----------+---------------------------------------------------------------+ -| Name | Type | Features | Description | -+=============+=========+==========+===============================================================+ -| default | virtual | devel | Default environment with Python 3.8 for maximum compatibility | -+-------------+---------+----------+---------------------------------------------------------------+ -| airflow-38 | virtual | devel | Environment with Python 3.8 | -+-------------+---------+----------+---------------------------------------------------------------+ -| airflow-39 | virtual | devel | Environment with Python 3.9 | -+-------------+---------+----------+---------------------------------------------------------------+ -| airflow-310 | virtual | devel | Environment with Python 3.10 | -+-------------+---------+----------+---------------------------------------------------------------+ -| airflow-311 | virtual | devel | Environment with Python 3.11 | -+-------------+---------+----------+---------------------------------------------------------------+ -| airflow-312 | virtual | devel | Environment with Python 3.12 | -+-------------+---------+----------+---------------------------------------------------------------+ ++-------------+---------+---------------------------------------------------------------+ +| Name | Type | Description | ++=============+=========+===============================================================+ +| default | virtual | Default environment with Python 3.8 for maximum compatibility | ++-------------+---------+---------------------------------------------------------------+ +| airflow-38 | virtual | Environment with Python 3.8. No devel installed. | ++-------------+---------+---------------------------------------------------------------+ +| airflow-39 | virtual | Environment with Python 3.9. No devel installed. | ++-------------+---------+---------------------------------------------------------------+ +| airflow-310 | virtual | Environment with Python 3.10. No devel installed. | ++-------------+---------+---------------------------------------------------------------+ +| airflow-311 | virtual | Environment with Python 3.11. No devel installed | ++-------------+---------+---------------------------------------------------------------+ +| airflow-312 | virtual | Environment with Python 3.12. No devel installed | ++-------------+---------+---------------------------------------------------------------+ The default env (if you have not used one explicitly) is ``default`` and it is a Python 3.8 -virtualenv for maximum compatibility with ``devel`` extra installed - this devel extra contains the minimum set -of dependencies and tools that should be used during unit testing of core Airflow and running all ``airflow`` -CLI commands - without support for providers or databases. +virtualenv for maximum compatibility. You can install devel set of dependencies with it +by running: + +.. code:: bash + + pip install -e ".[devel]" + +After entering the environment. The other environments are just bare-bones Python virtualenvs with Airflow core requirements only, without any extras installed and without any tools. They are much faster to create than the default diff --git a/contributing-docs/08_static_code_checks.rst b/contributing-docs/08_static_code_checks.rst index a947c7512f29..18222fd601c8 100644 --- a/contributing-docs/08_static_code_checks.rst +++ b/contributing-docs/08_static_code_checks.rst @@ -170,6 +170,8 @@ require Breeze Docker image to be built locally. +-----------------------------------------------------------+--------------------------------------------------------------+---------+ | check-google-re2-as-dependency | Check google-re2 is declared as dependency when needed | | +-----------------------------------------------------------+--------------------------------------------------------------+---------+ +| check-hatch-build-order | Check order of dependencies in hatch_build.py | | ++-----------------------------------------------------------+--------------------------------------------------------------+---------+ | check-hooks-apply | Check if all hooks apply to the repository | | +-----------------------------------------------------------+--------------------------------------------------------------+---------+ | check-incorrect-use-of-LoggingMixin | Make sure LoggingMixin is not used alone | | @@ -208,8 +210,6 @@ require Breeze Docker image to be built locally. +-----------------------------------------------------------+--------------------------------------------------------------+---------+ | check-pydevd-left-in-code | Check for pydevd debug statements accidentally left | | +-----------------------------------------------------------+--------------------------------------------------------------+---------+ -| check-pyproject-toml-order | Check order of dependencies in pyproject.toml | | -+-----------------------------------------------------------+--------------------------------------------------------------+---------+ | check-revision-heads-map | Check that the REVISION_HEADS_MAP is up-to-date | | +-----------------------------------------------------------+--------------------------------------------------------------+---------+ | check-safe-filter-usage-in-html | Don't use safe in templates | | diff --git a/contributing-docs/12_airflow_dependencies_and_extras.rst b/contributing-docs/12_airflow_dependencies_and_extras.rst index bf16efd91de1..91328a24abb3 100644 --- a/contributing-docs/12_airflow_dependencies_and_extras.rst +++ b/contributing-docs/12_airflow_dependencies_and_extras.rst @@ -156,23 +156,68 @@ the documentation. This is the full list of these extras: + +Core extras +........... + +Those extras are available as regular core airflow extras - they install optional features of Airflow. + + .. START CORE EXTRAS HERE + +aiobotocore, apache-atlas, apache-webhdfs, async, cgroups, deprecated-api, github-enterprise, +google-auth, graphviz, kerberos, ldap, leveldb, otel, pandas, password, pydantic, rabbitmq, s3fs, +saml, sentry, statsd, uv, virtualenv + + .. END CORE EXTRAS HERE + +Provider extras +............... + +Those extras are available as regular Airflow extras, they install provider packages in standard builds +or dependencies that are necessary to enable the feature in editable build. + + .. START PROVIDER EXTRAS HERE + +airbyte, alibaba, amazon, apache.beam, apache.cassandra, apache.drill, apache.druid, apache.flink, +apache.hdfs, apache.hive, apache.impala, apache.kafka, apache.kylin, apache.livy, apache.pig, +apache.pinot, apache.spark, apprise, arangodb, asana, atlassian.jira, celery, cloudant, +cncf.kubernetes, cohere, common.io, common.sql, databricks, datadog, dbt.cloud, dingding, discord, +docker, elasticsearch, exasol, fab, facebook, ftp, github, google, grpc, hashicorp, http, imap, +influxdb, jdbc, jenkins, microsoft.azure, microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo, +mysql, neo4j, odbc, openai, openfaas, openlineage, opensearch, opsgenie, oracle, pagerduty, +papermill, pgvector, pinecone, postgres, presto, qdrant, redis, salesforce, samba, segment, +sendgrid, sftp, singularity, slack, smtp, snowflake, sqlite, ssh, tableau, tabular, telegram, +teradata, trino, vertica, weaviate, yandex, zendesk + + .. END PROVIDER EXTRAS HERE + + Devel extras ............. The ``devel`` extras are not available in the released packages. They are only available when you install Airflow from sources in ``editable`` installation - i.e. one that you are usually using to contribute to -Airflow. They provide tools such as ``pytest`` and ``mypy`` for general purpose development and testing, also -some providers have their own development-related extras tbat allow to install tools necessary to run tests, -where the tools are specific for the provider. - +Airflow. They provide tools such as ``pytest`` and ``mypy`` for general purpose development and testing. .. START DEVEL EXTRAS HERE -devel, devel-all, devel-all-dbs, devel-ci, devel-debuggers, devel-devscripts, devel-duckdb, devel- -hadoop, devel-mypy, devel-sentry, devel-static-checks, devel-tests +devel, devel-all-dbs, devel-ci, devel-debuggers, devel-devscripts, devel-duckdb, devel-hadoop, +devel-mypy, devel-sentry, devel-static-checks, devel-tests .. END DEVEL EXTRAS HERE +Bundle extras +............. + +Those extras are bundles dynamically generated from other extras. + + .. START BUNDLE EXTRAS HERE + +all, all-core, all-dbs, devel-all, devel-ci + + .. END BUNDLE EXTRAS HERE + + Doc extras ........... @@ -189,33 +234,17 @@ doc, doc-gen .. END DOC EXTRAS HERE +Deprecated extras +................. -Regular extras -.............. - -Those extras are available as regular Airflow extras and are targeted to be used by Airflow users and -contributors to select features of Airflow they want to use They might install additional providers or -just install dependencies that are necessary to enable the feature. - - .. START REGULAR EXTRAS HERE +The ``deprecated`` extras are deprecated extras from Airflow 1 that will be removed in future versions. -aiobotocore, airbyte, alibaba, all, all-core, all-dbs, amazon, apache-atlas, apache-beam, apache- -cassandra, apache-drill, apache-druid, apache-flink, apache-hdfs, apache-hive, apache-impala, -apache-kafka, apache-kylin, apache-livy, apache-pig, apache-pinot, apache-spark, apache-webhdfs, -apprise, arangodb, asana, async, atlas, atlassian-jira, aws, azure, cassandra, celery, cgroups, -cloudant, cncf-kubernetes, cohere, common-io, common-sql, crypto, databricks, datadog, dbt-cloud, -deprecated-api, dingding, discord, docker, druid, elasticsearch, exasol, fab, facebook, ftp, gcp, -gcp_api, github, github-enterprise, google, google-auth, graphviz, grpc, hashicorp, hdfs, hive, -http, imap, influxdb, jdbc, jenkins, kerberos, kubernetes, ldap, leveldb, microsoft-azure, -microsoft-mssql, microsoft-psrp, microsoft-winrm, mongo, mssql, mysql, neo4j, odbc, openai, -openfaas, openlineage, opensearch, opsgenie, oracle, otel, pagerduty, pandas, papermill, password, -pgvector, pinecone, pinot, postgres, presto, pydantic, qdrant, rabbitmq, redis, s3, s3fs, -salesforce, samba, saml, segment, sendgrid, sentry, sftp, singularity, slack, smtp, snowflake, -spark, sqlite, ssh, statsd, tableau, tabular, telegram, teradata, trino, uv, vertica, virtualenv, -weaviate, webhdfs, winrm, yandex, zendesk + .. START DEPRECATED EXTRAS HERE - .. END REGULAR EXTRAS HERE +atlas, aws, azure, cassandra, crypto, druid, gcp, gcp-api, hdfs, hive, kubernetes, mssql, pinot, s3, +spark, webhdfs, winrm + .. END DEPRECATED EXTRAS HERE ----- diff --git a/dev/breeze/README.md b/dev/breeze/README.md index 981a7bb85901..0a6ef086ffd4 100644 --- a/dev/breeze/README.md +++ b/dev/breeze/README.md @@ -66,6 +66,6 @@ PLEASE DO NOT MODIFY THE HASH BELOW! IT IS AUTOMATICALLY UPDATED BY PRE-COMMIT. --------------------------------------------------------------------------------------------------------- -Package config hash: fb5183650b0efb5ec8241fcd53b6e5cc26c21c8d273fff91d0e8a3716f37703c44c7ae189653dd3fd52624d89c97635e0a7fc09104138ba35cb3ccf45f8efd4b +Package config hash: 64737d477cded72bb31d3b440bb2e5b76d48e865fd5d7ecc3b2cf9d1f0c889a7232e78f74854e9d2d0a1fd0dd653cb3ff81aee7387fea5afddec91f16ee63cd0 --------------------------------------------------------------------------------------------------------- diff --git a/dev/breeze/doc/02_customizing.rst b/dev/breeze/doc/02_customizing.rst index 78d24f30db7b..4e41bedd0874 100644 --- a/dev/breeze/doc/02_customizing.rst +++ b/dev/breeze/doc/02_customizing.rst @@ -45,7 +45,7 @@ will be evaluated at entering the environment. The ``files`` folder from your local sources is automatically mounted to the container under ``/files`` path and you can put there any files you want to make available for the Breeze container. -You can also copy any .whl or .sdist packages to dist and when you pass ``--use-packages-from-dist`` flag +You can also copy any .whl or ``sdist`` packages to dist and when you pass ``--use-packages-from-dist`` flag as ``wheel`` or ``sdist`` line parameter, breeze will automatically install the packages found there when you enter Breeze. diff --git a/dev/breeze/doc/ci/04_selective_checks.md b/dev/breeze/doc/ci/04_selective_checks.md index 9a4959dfc5d8..7df3c6d4c385 100644 --- a/dev/breeze/doc/ci/04_selective_checks.md +++ b/dev/breeze/doc/ci/04_selective_checks.md @@ -52,11 +52,11 @@ We have the following Groups of files for CI that determine which tests are run: * `API tests files` and `Codegen test files` - those are OpenAPI definition files that impact Open API specification and determine that we should run dedicated API tests. * `Helm files` - change in those files impacts helm "rendering" tests - `chart` folder and `helm_tests` folder. -* `Setup files` - change in the setup files indicates that we should run `upgrade to newer dependencies` - - pyproject.toml and generated dependencies files in `generated` folder. The dependency files and part of - the pyproject.toml are automatically generated from the provider.yaml files in provider by - the `update-providers-dependencies` pre-commit. The provider.yaml is a single source of truth for each - provider. +* `Build files` - change in the files indicates that we should run `upgrade to newer dependencies` - + build dependencies in `pyproject.toml` and generated dependencies files in `generated` folder. + The dependencies are automatically generated from the `provider.yaml` files in provider by + the `hatch_build.py` build hook. The provider.yaml is a single source of truth for each + provider and `hatch_build.py` for all regular dependencies. * `DOC files` - change in those files indicate that we should run documentation builds (both airflow sources and airflow documentation) * `WWW files` - those are files for the WWW part of our UI (useful to determine if UI tests should run) @@ -73,7 +73,6 @@ We have the following Groups of files for CI that determine which tests are run: * `All Docs Python files` - files that are checked by `mypy-docs` static checks * `All Provider Yaml files` - all provider yaml files - We have a number of `TEST_TYPES` that can be selectively disabled/enabled based on the content of the incoming PR. Usually they are limited to a sub-folder of the "tests" folder but there are some exceptions. You can read more about those in `testing.rst `. Those types @@ -114,7 +113,8 @@ together using `pytest-xdist` (pytest-xdist distributes the tests among parallel types to execute. This is done because changes in core might impact all the other test types. * if `CI Image building` is disabled, only basic pre-commits are enabled - no 'image-depending` pre-commits are enabled. -* If there are some setup files changed, `upgrade to newer dependencies` is enabled. +* If there are some build dependencies changed (`hatch_build.py` and updated system dependencies in + the `pyproject.toml` - then `upgrade to newer dependencies` is enabled. * If docs are build, the `docs-list-as-string` will determine which docs packages to build. This is based on several criteria: if any of the airflow core, charts, docker-stack, providers files or docs have changed, then corresponding packages are build (including cross-dependent providers). If any of the core files @@ -164,67 +164,70 @@ separated by spaces. This is to accommodate for the wau how outputs of this kind Github Actions to pass the list of parameters to a command to execute -| Output | Meaning of the output | Example value | List as string | -|------------------------------------|------------------------------------------------------------------------------------------------------|-------------------------------------------|----------------| -| affected-providers-list-as-string | List of providers affected when they are selectively affected. | airbyte http | * | -| all-python-versions | List of all python versions there are available in the form of JSON array | ['3.8', '3.9', '3.10'] | | -| all-python-versions-list-as-string | List of all python versions there are available in the form of space separated string | 3.8 3.9 3.10 | * | -| all-versions | If set to true, then all python, k8s, DB versions are used for tests. | false | | -| basic-checks-only | Whether to run all static checks ("false") or only basic set of static checks ("true") | false | | -| cache-directive | Which cache should be used for images ("registry", "local" , "disabled") | registry | | -| chicken-egg-providers | List of providers that should be considered as "chicken-egg" - expecting development Airflow version | | | -| ci-image-build | Whether CI image build is needed | true | | -| debug-resources | Whether resources usage should be printed during parallel job execution ("true"/ "false") | false | | -| default-branch | Which branch is default for the build ("main" for main branch, "v2-4-test" for 2.4 line etc.) | main | | -| default-constraints-branch | Which branch is default for the build ("constraints-main" for main branch, "constraints-2-4" etc.) | constraints-main | | -| default-helm-version | Which Helm version to use as default | v3.9.4 | | -| default-kind-version | Which Kind version to use as default | v0.16.0 | | -| default-kubernetes-version | Which Kubernetes version to use as default | v1.25.2 | | -| default-mysql-version | Which MySQL version to use as default | 5.7 | | -| default-postgres-version | Which Postgres version to use as default | 10 | | -| default-python-version | Which Python version to use as default | 3.8 | | -| docs-build | Whether to build documentation ("true"/"false") | true | | -| docs-list-as-string | What filter to apply to docs building - based on which documentation packages should be built | apache-airflow helm-chart google | | -| full-tests-needed | Whether this build runs complete set of tests or only subset (for faster PR builds) [1] | false | | -| generated-dependencies-changed | Whether generated dependencies have changed ("true"/"false") | false | | -| helm-version | Which Helm version to use for tests | v3.9.4 | | -| is-airflow-runner | Whether runner used is an airflow or infrastructure runner (true if airflow/false if infrastructure) | false | | -| is-amd-runner | Whether runner used is an AMD one | true | | -| is-arm-runner | Whether runner used is an ARM one | false | | -| is-committer-build | Whether the build is triggered by a committer | false | | -| is-k8s-runner | Whether the build runs on our k8s infrastructure | false | | -| is-self-hosted-runner | Whether the runner is self-hosted | false | | -| is-vm-runner | Whether the runner uses VM to run | true | | -| kind-version | Which Kind version to use for tests | v0.16.0 | | -| kubernetes-combos-list-as-string | All combinations of Python version and Kubernetes version to use for tests as space-separated string | 3.8-v1.25.2 3.9-v1.26.4 | * | -| kubernetes-versions | All Kubernetes versions to use for tests as JSON array | ['v1.25.2'] | | -| kubernetes-versions-list-as-string | All Kubernetes versions to use for tests as space-separated string | v1.25.2 | * | -| mypy-folders | List of folders to be considered for mypy | [] | | -| mysql-exclude | Which versions of MySQL to exclude for tests as JSON array | [] | | -| mysql-versions | Which versions of MySQL to use for tests as JSON array | ['5.7'] | | -| needs-api-codegen | Whether "api-codegen" are needed to run ("true"/"false") | true | | -| needs-api-tests | Whether "api-tests" are needed to run ("true"/"false") | true | | -| needs-helm-tests | Whether Helm tests are needed to run ("true"/"false") | true | | -| needs-javascript-scans | Whether javascript CodeQL scans should be run ("true"/"false") | true | | -| needs-mypy | Whether mypy check is supposed to run in this build | true | | -| needs-python-scans | Whether Python CodeQL scans should be run ("true"/"false") | true | | -| parallel-test-types-list-as-string | Which test types should be run for unit tests | API Always Providers Providers\[-google\] | * | -| postgres-exclude | Which versions of Postgres to exclude for tests as JSON array | [] | | -| postgres-versions | Which versions of Postgres to use for tests as JSON array | ['10'] | | -| prod-image-build | Whether PROD image build is needed | true | | -| prod-image-build | Whether PROD image build is needed | true | | -| providers-compatibility-checks | List of dicts: (python_version, airflow_version, removed_providers) for compatibility checks | [] | | -| python-versions | List of python versions to use for that build | ['3.8'] | * | -| python-versions-list-as-string | Which versions of MySQL to use for tests as space-separated string | 3.8 | * | -| run-amazon-tests | Whether Amazon tests should be run ("true"/"false") | true | | -| run-kubernetes-tests | Whether Kubernetes tests should be run ("true"/"false") | true | | -| run-tests | Whether unit tests should be run ("true"/"false") | true | | -| run-www-tests | Whether WWW tests should be run ("true"/"false") | true | | -| runs-on | List of labels assigned for runners for that build (used to select runners) | ["ubuntu-22.04"] | | -| skip-pre-commits | Which pre-commits should be skipped during the static-checks run | check-provider-yaml-valid,flynt,identity | | -| skip-provider-tests | When provider tests should be skipped (on non-main branch or when no provider changes detected) | true | | -| sqlite-exclude | Which versions of Sqlite to exclude for tests as JSON array | [] | | -| upgrade-to-newer-dependencies | Whether the image build should attempt to upgrade all dependencies (true/false or commit hash) | false | | +| Output | Meaning of the output | Example value | List as string | +|----------------------------------------|------------------------------------------------------------------------------------------------------|-------------------------------------------|----------------| +| affected-providers-list-as-string | List of providers affected when they are selectively affected. | airbyte http | * | +| all-python-versions | List of all python versions there are available in the form of JSON array | ['3.8', '3.9', '3.10'] | | +| all-python-versions-list-as-string | List of all python versions there are available in the form of space separated string | 3.8 3.9 3.10 | * | +| all-versions | If set to true, then all python, k8s, DB versions are used for tests. | false | | +| basic-checks-only | Whether to run all static checks ("false") or only basic set of static checks ("true") | false | | +| build_system_changed_in_pyproject_toml | When builds system dependencies changed in pyproject.toml changed in the PR. | false | | +| cache-directive | Which cache should be used for images ("registry", "local" , "disabled") | registry | | +| chicken-egg-providers | List of providers that should be considered as "chicken-egg" - expecting development Airflow version | | | +| ci-image-build | Whether CI image build is needed | true | | +| debug-resources | Whether resources usage should be printed during parallel job execution ("true"/ "false") | false | | +| default-branch | Which branch is default for the build ("main" for main branch, "v2-4-test" for 2.4 line etc.) | main | | +| default-constraints-branch | Which branch is default for the build ("constraints-main" for main branch, "constraints-2-4" etc.) | constraints-main | | +| default-helm-version | Which Helm version to use as default | v3.9.4 | | +| default-kind-version | Which Kind version to use as default | v0.16.0 | | +| default-kubernetes-version | Which Kubernetes version to use as default | v1.25.2 | | +| default-mysql-version | Which MySQL version to use as default | 5.7 | | +| default-postgres-version | Which Postgres version to use as default | 10 | | +| default-python-version | Which Python version to use as default | 3.8 | | +| docs-build | Whether to build documentation ("true"/"false") | true | | +| docs-list-as-string | What filter to apply to docs building - based on which documentation packages should be built | apache-airflow helm-chart google | | +| full-tests-needed | Whether this build runs complete set of tests or only subset (for faster PR builds) [1] | false | | +| generated-dependencies-changed | Whether generated dependencies have changed ("true"/"false") | false | | +| hatch-build-changed | When hatch build.py changed in the PR. | false | | +| helm-version | Which Helm version to use for tests | v3.9.4 | | +| is-airflow-runner | Whether runner used is an airflow or infrastructure runner (true if airflow/false if infrastructure) | false | | +| is-amd-runner | Whether runner used is an AMD one | true | | +| is-arm-runner | Whether runner used is an ARM one | false | | +| is-committer-build | Whether the build is triggered by a committer | false | | +| is-k8s-runner | Whether the build runs on our k8s infrastructure | false | | +| is-self-hosted-runner | Whether the runner is self-hosted | false | | +| is-vm-runner | Whether the runner uses VM to run | true | | +| kind-version | Which Kind version to use for tests | v0.16.0 | | +| kubernetes-combos-list-as-string | All combinations of Python version and Kubernetes version to use for tests as space-separated string | 3.8-v1.25.2 3.9-v1.26.4 | * | +| kubernetes-versions | All Kubernetes versions to use for tests as JSON array | ['v1.25.2'] | | +| kubernetes-versions-list-as-string | All Kubernetes versions to use for tests as space-separated string | v1.25.2 | * | +| mypy-folders | List of folders to be considered for mypy | [] | | +| mysql-exclude | Which versions of MySQL to exclude for tests as JSON array | [] | | +| mysql-versions | Which versions of MySQL to use for tests as JSON array | ['5.7'] | | +| needs-api-codegen | Whether "api-codegen" are needed to run ("true"/"false") | true | | +| needs-api-tests | Whether "api-tests" are needed to run ("true"/"false") | true | | +| needs-helm-tests | Whether Helm tests are needed to run ("true"/"false") | true | | +| needs-javascript-scans | Whether javascript CodeQL scans should be run ("true"/"false") | true | | +| needs-mypy | Whether mypy check is supposed to run in this build | true | | +| needs-python-scans | Whether Python CodeQL scans should be run ("true"/"false") | true | | +| parallel-test-types-list-as-string | Which test types should be run for unit tests | API Always Providers Providers\[-google\] | * | +| postgres-exclude | Which versions of Postgres to exclude for tests as JSON array | [] | | +| postgres-versions | Which versions of Postgres to use for tests as JSON array | ['10'] | | +| prod-image-build | Whether PROD image build is needed | true | | +| prod-image-build | Whether PROD image build is needed | true | | +| providers-compatibility-checks | List of dicts: (python_version, airflow_version, removed_providers) for compatibility checks | [] | | +| pyproject-toml-changed | When pyproject.toml changed in the PR. | false | | +| python-versions | List of python versions to use for that build | ['3.8'] | * | +| python-versions-list-as-string | Which versions of MySQL to use for tests as space-separated string | 3.8 | * | +| run-amazon-tests | Whether Amazon tests should be run ("true"/"false") | true | | +| run-kubernetes-tests | Whether Kubernetes tests should be run ("true"/"false") | true | | +| run-tests | Whether unit tests should be run ("true"/"false") | true | | +| run-www-tests | Whether WWW tests should be run ("true"/"false") | true | | +| runs-on | List of labels assigned for runners for that build (used to select runners) | ["ubuntu-22.04"] | | +| skip-pre-commits | Which pre-commits should be skipped during the static-checks run | check-provider-yaml-valid,flynt,identity | | +| skip-provider-tests | When provider tests should be skipped (on non-main branch or when no provider changes detected) | true | | +| sqlite-exclude | Which versions of Sqlite to exclude for tests as JSON array | [] | | +| upgrade-to-newer-dependencies | Whether the image build should attempt to upgrade all dependencies (true/false or commit hash) | false | | [1] Note for deciding if `full tests needed` mode is enabled and provider.yaml files. diff --git a/dev/breeze/doc/images/output_static-checks.svg b/dev/breeze/doc/images/output_static-checks.svg index 200d53394d3e..a6c9ba8bec5a 100644 --- a/dev/breeze/doc/images/output_static-checks.svg +++ b/dev/breeze/doc/images/output_static-checks.svg @@ -1,4 +1,4 @@ - +
into
and breaks our doc formatting + # By adding a lot of whitespace separation. This limit can be lifted when we update our doc to handle + #
tags for sections + "docutils<0.17,>=0.16", + "sphinx-airflow-theme>=0.0.12", + "sphinx-argparse>=0.4.0", + # sphinx-autoapi fails with astroid 3.0, see: https://github.com/readthedocs/sphinx-autoapi/issues/407 + # This was fixed in sphinx-autoapi 3.0, however it has requirement sphinx>=6.1, but we stuck on 5.x + "sphinx-autoapi>=2.1.1", + "sphinx-copybutton>=0.5.2", + "sphinx-design>=0.5.0", + "sphinx-jinja>=2.0.2", + "sphinx-rtd-theme>=2.0.0", + # Currently we are using sphinx 5 but we need to migrate to Sphinx 7 + "sphinx>=5.3.0,<6.0.0", + "sphinxcontrib-applehelp>=1.0.4", + "sphinxcontrib-devhelp>=1.0.2", + "sphinxcontrib-htmlhelp>=2.0.1", + "sphinxcontrib-httpdomain>=1.8.1", + "sphinxcontrib-jquery>=4.1", + "sphinxcontrib-jsmath>=1.0.1", + "sphinxcontrib-qthelp>=1.0.3", + "sphinxcontrib-redoc>=1.6.0", + "sphinxcontrib-serializinghtml==1.1.5", + "sphinxcontrib-spelling>=8.0.0", + ], + "doc-gen": [ + "apache-airflow[doc]", + "eralchemy2>=1.3.8", + ], + # END OF doc extras +} + +DEVEL_EXTRAS: dict[str, list[str]] = { + # START OF devel extras + "devel-debuggers": [ + "ipdb>=0.13.13", + ], + "devel-devscripts": [ + "click>=8.0", + "gitpython>=3.1.40", + "hatch>=1.9.1", + "pipdeptree>=2.13.1", + "pygithub>=2.1.1", + "restructuredtext-lint>=1.4.0", + "rich-click>=1.7.0", + "semver>=3.0.2", + "towncrier>=23.11.0", + "twine>=4.0.2", + ], + "devel-duckdb": [ + # Python 3.12 support was added in 0.10.0 + "duckdb>=0.10.0; python_version >= '3.12'", + "duckdb>=0.9.0; python_version < '3.12'", + ], + # Mypy 0.900 and above ships only with stubs from stdlib so if we need other stubs, we need to install them + # manually as `types-*`. See https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports + # for details. We want to install them explicitly because we want to eventually move to + # mypyd which does not support installing the types dynamically with --install-types + "devel-mypy": [ + # TODO: upgrade to newer versions of MyPy continuously as they are released + # Make sure to upgrade the mypy version in update-common-sql-api-stubs in .pre-commit-config.yaml + # when you upgrade it here !!!! + "mypy==1.9.0", + "types-Deprecated", + "types-Markdown", + "types-PyMySQL", + "types-PyYAML", + "types-aiofiles", + "types-certifi", + "types-croniter", + "types-docutils", + "types-paramiko", + "types-protobuf", + "types-python-dateutil", + "types-python-slugify", + "types-pytz", + "types-redis", + "types-requests", + "types-setuptools", + "types-tabulate", + "types-termcolor", + "types-toml", + ], + "devel-sentry": [ + "blinker>=1.7.0", + ], + "devel-static-checks": [ + "black>=23.12.0", + "pre-commit>=3.5.0", + "ruff==0.3.3", + "yamllint>=1.33.0", + ], + "devel-tests": [ + "aiofiles>=23.2.0", + "aioresponses>=0.7.6", + "backports.zoneinfo>=0.2.1;python_version<'3.9'", + "beautifulsoup4>=4.7.1", + # Coverage 7.4.0 added experimental support for Python 3.12 PEP669 which we use in Airflow + "coverage>=7.4.0", + "pytest-asyncio>=0.23.3", + "pytest-cov>=4.1.0", + "pytest-icdiff>=0.9", + "pytest-instafail>=0.5.0", + "pytest-mock>=3.12.0", + "pytest-rerunfailures>=13.0", + "pytest-timeouts>=1.2.1", + "pytest-xdist>=3.5.0", + # Temporary upper limmit to <8, not all dependencies at that moment ready to use 8.0 + # Internal meta-task for track https://github.com/apache/airflow/issues/37156 + "pytest>=7.4.4,<8.0", + "requests_mock>=1.11.0", + "time-machine>=2.13.0", + "wheel>=0.42.0", + ], + "devel": [ + "apache-airflow[celery]", + "apache-airflow[cncf-kubernetes]", + "apache-airflow[common-io]", + "apache-airflow[common-sql]", + "apache-airflow[devel-debuggers]", + "apache-airflow[devel-devscripts]", + "apache-airflow[devel-duckdb]", + "apache-airflow[devel-mypy]", + "apache-airflow[devel-sentry]", + "apache-airflow[devel-static-checks]", + "apache-airflow[devel-tests]", + "apache-airflow[fab]", + "apache-airflow[ftp]", + "apache-airflow[http]", + "apache-airflow[imap]", + "apache-airflow[sqlite]", + ], + "devel-all-dbs": [ + "apache-airflow[apache-cassandra]", + "apache-airflow[apache-drill]", + "apache-airflow[apache-druid]", + "apache-airflow[apache-hdfs]", + "apache-airflow[apache-hive]", + "apache-airflow[apache-impala]", + "apache-airflow[apache-pinot]", + "apache-airflow[arangodb]", + "apache-airflow[cloudant]", + "apache-airflow[databricks]", + "apache-airflow[exasol]", + "apache-airflow[influxdb]", + "apache-airflow[microsoft-mssql]", + "apache-airflow[mongo]", + "apache-airflow[mysql]", + "apache-airflow[neo4j]", + "apache-airflow[postgres]", + "apache-airflow[presto]", + "apache-airflow[trino]", + "apache-airflow[vertica]", + ], + "devel-ci": [ + "apache-airflow[devel-all]", + ], + "devel-hadoop": [ + "apache-airflow[apache-hdfs]", + "apache-airflow[apache-hive]", + "apache-airflow[apache-impala]", + "apache-airflow[devel]", + "apache-airflow[hdfs]", + "apache-airflow[kerberos]", + "apache-airflow[presto]", + ], +} + +BUNDLE_EXTRAS: dict[str, list[str]] = { + "all-dbs": [ + "apache-airflow[apache-cassandra]", + "apache-airflow[apache-drill]", + "apache-airflow[apache-druid]", + "apache-airflow[apache-hdfs]", + "apache-airflow[apache-hive]", + "apache-airflow[apache-impala]", + "apache-airflow[apache-pinot]", + "apache-airflow[arangodb]", + "apache-airflow[cloudant]", + "apache-airflow[databricks]", + "apache-airflow[exasol]", + "apache-airflow[influxdb]", + "apache-airflow[microsoft-mssql]", + "apache-airflow[mongo]", + "apache-airflow[mysql]", + "apache-airflow[neo4j]", + "apache-airflow[postgres]", + "apache-airflow[presto]", + "apache-airflow[trino]", + "apache-airflow[vertica]", + ], +} + +DEPRECATED_EXTRAS: dict[str, list[str]] = { + ######################################################################################################## + # The whole section can be removed in Airflow 3.0 as those old aliases are deprecated in 2.* series + ######################################################################################################## + "atlas": [ + "apache-airflow[apache-atlas]", + ], + "aws": [ + "apache-airflow[amazon]", + ], + "azure": [ + "apache-airflow[microsoft-azure]", + ], + "cassandra": [ + "apache-airflow[apache-cassandra]", + ], + # Empty alias extra just for backward compatibility with Airflow 1.10 + "crypto": [], + "druid": [ + "apache-airflow[apache-druid]", + ], + "gcp": [ + "apache-airflow[google]", + ], + "gcp-api": [ + "apache-airflow[google]", + ], + "hdfs": [ + "apache-airflow[apache-hdfs]", + ], + "hive": [ + "apache-airflow[apache-hive]", + ], + "kubernetes": [ + "apache-airflow[cncf-kubernetes]", + ], + "mssql": [ + "apache-airflow[microsoft-mssql]", + ], + "pinot": [ + "apache-airflow[apache-pinot]", + ], + "s3": [ + "apache-airflow[amazon]", + ], + "spark": [ + "apache-airflow[apache-spark]", + ], + "webhdfs": [ + "apache-airflow[apache-webhdfs]", + ], + "winrm": [ + "apache-airflow[microsoft-winrm]", + ], +} + +# When you remove a dependency from the list, you should also make sure to add the dependency to be removed +# in the scripts/docker/install_airflow_dependencies_from_branch_tip.sh script DEPENDENCIES_TO_REMOVE +# in order to make sure the dependency is not installed in the CI image build process from the main +# of Airflow branch. After your PR is merged, you should remove it from the list there. +DEPENDENCIES = [ + # Alembic is important to handle our migrations in predictable and performant way. It is developed + # together with SQLAlchemy. Our experience with Alembic is that it very stable in minor version + # The 1.13.0 of alembic marked some migration code as SQLAlchemy 2+ only so we limit it to 1.13.1 + "alembic>=1.13.1, <2.0", + "argcomplete>=1.10", + "asgiref", + "attrs>=22.1.0", + # Blinker use for signals in Flask, this is an optional dependency in Flask 2.2 and lower. + # In Flask 2.3 it becomes a mandatory dependency, and flask signals are always available. + "blinker>=1.6.2", + # Colorlog 6.x merges TTYColoredFormatter into ColoredFormatter, breaking backwards compatibility with 4.x + # Update CustomTTYColoredFormatter to remove + "colorlog>=4.0.2, <5.0", + "configupdater>=3.1.1", + # `airflow/www/extensions/init_views` imports `connexion.decorators.validation.RequestBodyValidator` + # connexion v3 has refactored the entire module to middleware, see: /spec-first/connexion/issues/1525 + # Specifically, RequestBodyValidator was removed in: /spec-first/connexion/pull/1595 + # The usage was added in #30596, seemingly only to override and improve the default error message. + # Either revert that change or find another way, preferably without using connexion internals. + # This limit can be removed after https://github.com/apache/airflow/issues/35234 is fixed + "connexion[flask]>=2.10.0,<3.0", + "cron-descriptor>=1.2.24", + "croniter>=2.0.2", + "cryptography>=39.0.0", + "deprecated>=1.2.13", + "dill>=0.2.2", + "flask-caching>=1.5.0", + # Flask-Session 0.6 add new arguments into the SqlAlchemySessionInterface constructor as well as + # all parameters now are mandatory which make AirflowDatabaseSessionInterface incopatible with this version. + "flask-session>=0.4.0,<0.6", + "flask-wtf>=0.15", + # Flask 2.3 is scheduled to introduce a number of deprecation removals - some of them might be breaking + # for our dependencies - notably `_app_ctx_stack` and `_request_ctx_stack` removals. + # We should remove the limitation after 2.3 is released and our dependencies are updated to handle it + "flask>=2.2,<2.3", + "fsspec>=2023.10.0", + "google-re2>=1.0", + "gunicorn>=20.1.0", + "httpx", + 'importlib_metadata>=1.7;python_version<"3.9"', + # Importib_resources 6.2.0-6.3.1 break pytest_rewrite + # see https://github.com/python/importlib_resources/issues/299 + 'importlib_resources>=5.2,!=6.2.0,!=6.3.0,!=6.3.1;python_version<"3.9"', + "itsdangerous>=2.0", + "jinja2>=3.0.0", + "jsonschema>=4.18.0", + "lazy-object-proxy", + "linkify-it-py>=2.0.0", + "lockfile>=0.12.2", + "markdown-it-py>=2.1.0", + "markupsafe>=1.1.1", + "marshmallow-oneofschema>=2.0.1", + "mdit-py-plugins>=0.3.0", + "opentelemetry-api>=1.15.0", + "opentelemetry-exporter-otlp", + "packaging>=14.0", + "pathspec>=0.9.0", + "pendulum>=2.1.2,<4.0", + "pluggy>=1.0", + "psutil>=4.2.0", + "pygments>=2.0.1", + "pyjwt>=2.0.0", + "python-daemon>=3.0.0", + "python-dateutil>=2.3", + "python-nvd3>=0.15.0", + "python-slugify>=5.0", + # Requests 3 if it will be released, will be heavily breaking. + "requests>=2.27.0,<3", + "rfc3339-validator>=0.1.4", + "rich-argparse>=1.0.0", + "rich>=12.4.4", + "setproctitle>=1.1.8", + # We use some deprecated features of sqlalchemy 2.0 and we should replace them before we can upgrade + # See https://sqlalche.me/e/b8d9 for details of deprecated features + # you can set environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings. + # The issue tracking it is https://github.com/apache/airflow/issues/28723 + "sqlalchemy>=1.4.36,<2.0", + "sqlalchemy-jsonfield>=1.0", + "tabulate>=0.7.5", + "tenacity>=6.2.0,!=8.2.0", + "termcolor>=1.1.0", + # We should remove this dependency when Providers are limited to Airflow 2.7+ + # as we replaced the usage of unicodecsv with csv in Airflow 2.7 + # See https://github.com/apache/airflow/pull/31693 + # We should also remove "licenses/LICENSE-unicodecsv.txt" file when we remove this dependency + "unicodecsv>=0.14.1", + # The Universal Pathlib provides Pathlib-like interface for FSSPEC + "universal-pathlib>=0.2.2", + # Werkzug 3 breaks Flask-Login 0.6.2, also connexion needs to be updated to >= 3.0 + # we should remove this limitation when FAB supports Flask 2.3 and we migrate connexion to 3+ + "werkzeug>=2.0,<3", +] + + +ALL_DYNAMIC_EXTRA_DICTS: list[tuple[dict[str, list[str]], str]] = [ + (CORE_EXTRAS, "Core extras"), + (DOC_EXTRAS, "Doc extras"), + (DEVEL_EXTRAS, "Devel extras"), + (BUNDLE_EXTRAS, "Bundle extras"), + (DEPRECATED_EXTRAS, "Deprecated extras"), ] +ALL_GENERATED_BUNDLE_EXTRAS = ["all", "all-core", "devel-all", "devel-ci"] + + +def normalize_extra(dependency_id: str) -> str: + return dependency_id.replace(".", "-").replace("_", "-") + + +def normalize_requirement(requirement: str): + from packaging.requirements import Requirement + from packaging.utils import NormalizedName, canonicalize_name + + req = Requirement(requirement) + package: NormalizedName = canonicalize_name(req.name) + package_str = str(package) + if req.extras: + # Sort extras by name + package_str += f"[{','.join(sorted([normalize_extra(extra) for extra in req.extras]))}]" + version_required = "" + if req.specifier: + version_required = ",".join(map(str, sorted(req.specifier, key=lambda spec: spec.version))) + if req.marker: + version_required += f"; {req.marker}" + return str(package_str + version_required) + + +ALL_DYNAMIC_EXTRAS: list[str] = sorted( + set( + itertools.chain( + *[d for d, desc in ALL_DYNAMIC_EXTRA_DICTS], + [normalize_extra(provider) for provider in PROVIDER_DEPENDENCIES], + ALL_GENERATED_BUNDLE_EXTRAS, + ) + ) +) + def get_provider_id(provider_spec: str) -> str: # in case provider_spec is "=" @@ -59,17 +562,17 @@ def get_provider_requirement(provider_spec: str) -> str: # if providers are ready, we can preinstall them PREINSTALLED_PROVIDERS = [ get_provider_requirement(provider_spec) - for provider_spec in PREINSTALLED_PROVIDER_SPECS - if DEPENDENCIES[get_provider_id(provider_spec)]["state"] == "ready" + for provider_spec in PRE_INSTALLED_PROVIDERS + if PROVIDER_DEPENDENCIES[get_provider_id(provider_spec)]["state"] == "ready" ] # if provider is in not-ready or pre-release, we need to install its dependencies # however we need to skip apache-airflow itself and potentially any providers that are PREINSTALLED_NOT_READY_DEPS = [] -for provider_spec in PREINSTALLED_PROVIDER_SPECS: +for provider_spec in PRE_INSTALLED_PROVIDERS: provider_id = get_provider_id(provider_spec) - if DEPENDENCIES[provider_id]["state"] not in ["ready", "suspended", "removed"]: - for dependency in DEPENDENCIES[provider_id]["deps"]: + if PROVIDER_DEPENDENCIES[provider_id]["state"] not in ["ready", "suspended", "removed"]: + for dependency in PROVIDER_DEPENDENCIES[provider_id]["deps"]: if dependency.startswith("apache-airflow-providers"): raise Exception( f"The provider {provider_id} is pre-installed and it has as dependency " @@ -159,42 +662,177 @@ def write_git_version(self) -> None: git_version_file.write_text(version) +def _is_devel_extra(extra: str) -> bool: + return extra.startswith("devel") or extra in ["doc", "doc-gen"] + + +GENERATED_DEPENDENCIES_START = "# START OF GENERATED DEPENDENCIES" +GENERATED_DEPENDENCIES_END = "# END OF GENERATED DEPENDENCIES" + + +def convert_to_extra_dependency(dependency: str) -> str: + # if there is version in dependency - remove it as we do not need it in extra specification + # for editable installation + if ">=" in dependency: + dependency = dependency.split(">=")[0] + extra = dependency.replace("apache-airflow-providers-", "").replace("-", "_").replace(".", "_") + return f"apache-airflow[{extra}]" + + +def get_python_exclusion(excluded_python_versions: list[str]): + exclusion = "" + if excluded_python_versions: + separator = ";" + for version in excluded_python_versions: + exclusion += f'{separator}python_version != "{version}"' + separator = " and " + return exclusion + + +def skip_for_editable_build(excluded_python_versions: list[str]) -> bool: + current_python_version = f"{sys.version_info.major}.{sys.version_info.minor}" + if current_python_version in excluded_python_versions: + return True + return False + + class CustomBuildHook(BuildHookInterface[BuilderConfig]): """Custom build hook for Airflow - remove devel extras and adds preinstalled providers.""" + def __init__(self, *args: Any, **kwargs: Any) -> None: + # Stores all dependencies that that any of the airflow extras (including devel) use + self.all_devel_ci_dependencies: set[str] = set() + # All extras that should be included in the wheel package + self.all_non_devel_extras: set[str] = set() + # All extras that should be available in the editable install + self.all_devel_extras: set[str] = set() + self.optional_dependencies: dict[str, list[str]] = {} + self._dependencies: list[str] = [] + super().__init__(*args, **kwargs) + def initialize(self, version: str, build_data: dict[str, Any]) -> None: """ Initialize hook immediately before each build. Any modifications to the build data will be seen by the build target. """ + self._process_all_built_in_extras(version) + self._process_all_provider_extras(version) + + # Adds all-core extras for the extras that are built-in and not devel + self.optional_dependencies["all-core"] = sorted( + set([f"apache-airflow[{extra}]" for extra in CORE_EXTRAS.keys()]) + ) + # Adds "apache-airflow[extra]" for all extras that are not devel extras for wheel and editable builds + self.optional_dependencies["all"] = [ + f"apache-airflow[{extra}]" for extra in sorted(self.all_non_devel_extras) + ] + # Adds all devel extras for the extras that are built-in only for editable builds + if version != "standard": + self.optional_dependencies["devel-all"] = [ + f"apache-airflow[{extra}]" for extra in sorted(self.all_devel_extras) + ] + # This is special dependency that is used to install all possible + # 3rd-party dependencies for airflow for the CI image. It is exposed in the wheel package + # because we want to use for building the image cache from GitHub URL. + self.optional_dependencies["devel-ci"] = sorted(self.all_devel_ci_dependencies) + self._dependencies = DEPENDENCIES + if version == "standard": - all_possible_non_airflow_dependencies = [] - for extra, deps in self.metadata.core.optional_dependencies.items(): - for dep in deps: - if not dep.startswith("apache-airflow"): - all_possible_non_airflow_dependencies.append(dep) - # remove devel dependencies from optional dependencies for standard packages - self.metadata.core._optional_dependencies = { - key: value - for (key, value) in self.metadata.core.optional_dependencies.items() - if not key.startswith("devel") and key not in ["doc", "doc-gen"] - } - # This is the special dependency in wheel package that is used to install all possible - # 3rd-party dependencies for airflow for the CI image. It is exposed in the wheel package - # because we want to use for building the image cache from GitHub URL. - self.metadata.core._optional_dependencies["devel-ci"] = all_possible_non_airflow_dependencies - # Replace editable dependencies with provider dependencies for provider packages - for dependency_id in DEPENDENCIES.keys(): - if DEPENDENCIES[dependency_id]["state"] != "ready": - continue - normalized_dependency_id = dependency_id.replace(".", "-") - self.metadata.core._optional_dependencies[normalized_dependency_id] = [ - f"apache-airflow-providers-{normalized_dependency_id}" - ] # Inject preinstalled providers into the dependencies for standard packages - if self.metadata.core._dependencies: - for provider in PREINSTALLED_PROVIDERS: - self.metadata.core._dependencies.append(provider) - for dependency in PREINSTALLED_NOT_READY_DEPS: - self.metadata.core._dependencies.append(dependency) + for provider in PREINSTALLED_PROVIDERS: + self._dependencies.append(provider) + for not_ready_provider_dependency in PREINSTALLED_NOT_READY_DEPS: + self._dependencies.append(not_ready_provider_dependency) + + # with hatchling, we can modify dependencies dynamically by modifying the build_data + build_data["dependencies"] = self._dependencies + + # unfortunately hatchling currently does not have a way to override optional_dependencies + # via build_data (or so it seem) so we need to modify internal _optional_dependencies + # field in core.metadata until this is possible + self.metadata.core._optional_dependencies = self.optional_dependencies + + def _add_devel_ci_dependencies(self, deps: list[str], python_exclusion: str) -> None: + """ + Add devel_ci_dependencies. + + Adds all external dependencies which are not apache-airflow deps to the list of dependencies + that are going to be added to `devel-ci` extra. + + :param deps: list of dependencies to add + :param version: "standard" or "editable" build. + :param excluded_python_versions: List of python versions to exclude + :param python_exclusion: Python version exclusion string. + """ + for dep in deps: + if not dep.startswith("apache-airflow"): + self.all_devel_ci_dependencies.add(normalize_requirement(dep) + python_exclusion) + + def _process_all_provider_extras(self, version: str) -> None: + """ + Process all provider extras. + + Processes all provider dependencies. This generates dependencies for editable builds + and providers for wheel builds. + + :param version: "standard" or "editable" build. + :return: + """ + for dependency_id in PROVIDER_DEPENDENCIES.keys(): + if PROVIDER_DEPENDENCIES[dependency_id]["state"] != "ready": + continue + excluded_python_versions = PROVIDER_DEPENDENCIES[dependency_id].get("excluded-python-versions") + if version != "standard" and skip_for_editable_build(excluded_python_versions): + continue + normalized_extra_name = normalize_extra(dependency_id) + deps: list[str] = PROVIDER_DEPENDENCIES[dependency_id]["deps"] + + deps = [dep for dep in deps if not dep.startswith("apache-airflow>=")] + devel_deps: list[str] = PROVIDER_DEPENDENCIES[dependency_id].get("devel-deps", []) + + if version == "standard": + # add providers instead of dependencies for wheel builds + self.optional_dependencies[normalized_extra_name] = [ + f"apache-airflow-providers-{normalized_extra_name}" + f"{get_python_exclusion(excluded_python_versions)}" + ] + else: + # for editable packages - add regular + devel dependencies retrieved from provider.yaml + # but convert the provider dependencies to apache-airflow[extras] + # and adding python exclusions where needed + editable_deps = [] + for dep in itertools.chain(deps, devel_deps): + if dep.startswith("apache-airflow-providers-"): + dep = convert_to_extra_dependency(dep) + editable_deps.append(dep) + self.optional_dependencies[normalized_extra_name] = sorted(set(editable_deps)) + self._add_devel_ci_dependencies(editable_deps, python_exclusion="") + self.all_devel_extras.add(normalized_extra_name) + self.all_non_devel_extras.add(normalized_extra_name) + + def _process_all_built_in_extras(self, version: str) -> None: + """ + Process all built-in extras. + + Adds all core extras (for editable builds) minus devel and doc extras (for wheel builds) + to the list of dependencies. It also builds the list of all non-devel built-in extras that will be + used to produce "all" extra. + + :param version: "standard" or "editable" build. + :return: + """ + for dict, _ in ALL_DYNAMIC_EXTRA_DICTS: + for extra, deps in dict.items(): + self.all_devel_extras.add(extra) + self._add_devel_ci_dependencies(deps, python_exclusion="") + if dict not in [DEPRECATED_EXTRAS, DEVEL_EXTRAS, DOC_EXTRAS]: + # do not add deprecated extras to "all" extras + self.all_non_devel_extras.add(extra) + if version == "standard": + # for wheel builds we skip devel and doc extras + if dict not in [DEVEL_EXTRAS, DOC_EXTRAS]: + self.optional_dependencies[extra] = deps + else: + # for editable builds we add all extras + self.optional_dependencies[extra] = deps diff --git a/pyproject.toml b/pyproject.toml index cd6aa8b6a555..55f9592eccf8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,8 +35,6 @@ build-backend = "hatchling.build" [project] name = "apache-airflow" -dynamic = ["version"] - description = "Programmatically author, schedule and monitor data pipelines" readme = { file = "generated/PYPI_README.md", content-type = "text/markdown" } license-files.globs = ["LICENSE", "3rd-party-licenses/*.txt"] @@ -62,1146 +60,88 @@ classifiers = [ "Programming Language :: Python :: 3.11", "Topic :: System :: Monitoring", ] -# When you remove a dependency from the list, you should also make sure to add the dependency to be removed -# in the scripts/docker/install_airflow_dependencies_from_branch_tip.sh script DEPENDENCIES_TO_REMOVE -# in order to make sure the dependency is not installed in the CI image build process from the main -# of Airflow branch. After your PR is merged, you should remove it from the list there. -dependencies = [ - # Alembic is important to handle our migrations in predictable and performant way. It is developed - # together with SQLAlchemy. Our experience with Alembic is that it very stable in minor version - # The 1.13.0 of alembic marked some migration code as SQLAlchemy 2+ only so we limit it to 1.13.1 - "alembic>=1.13.1, <2.0", - "argcomplete>=1.10", - "asgiref", - "attrs>=22.1.0", - # Blinker use for signals in Flask, this is an optional dependency in Flask 2.2 and lower. - # In Flask 2.3 it becomes a mandatory dependency, and flask signals are always available. - "blinker>=1.6.2", - # Colorlog 6.x merges TTYColoredFormatter into ColoredFormatter, breaking backwards compatibility with 4.x - # Update CustomTTYColoredFormatter to remove - "colorlog>=4.0.2, <5.0", - "configupdater>=3.1.1", - # `airflow/www/extensions/init_views` imports `connexion.decorators.validation.RequestBodyValidator` - # connexion v3 has refactored the entire module to middleware, see: /spec-first/connexion/issues/1525 - # Specifically, RequestBodyValidator was removed in: /spec-first/connexion/pull/1595 - # The usage was added in #30596, seemingly only to override and improve the default error message. - # Either revert that change or find another way, preferably without using connexion internals. - # This limit can be removed after https://github.com/apache/airflow/issues/35234 is fixed - "connexion[flask]>=2.10.0,<3.0", - "cron-descriptor>=1.2.24", - "croniter>=2.0.2", - "cryptography>=39.0.0", - "deprecated>=1.2.13", - "dill>=0.2.2", - "flask-caching>=1.5.0", - # Flask-Session 0.6 add new arguments into the SqlAlchemySessionInterface constructor as well as - # all parameters now are mandatory which make AirflowDatabaseSessionInterface incopatible with this version. - "flask-session>=0.4.0,<0.6", - "flask-wtf>=0.15", - # Flask 2.3 is scheduled to introduce a number of deprecation removals - some of them might be breaking - # for our dependencies - notably `_app_ctx_stack` and `_request_ctx_stack` removals. - # We should remove the limitation after 2.3 is released and our dependencies are updated to handle it - "flask>=2.2,<2.3", - "fsspec>=2023.10.0", - "google-re2>=1.0", - "gunicorn>=20.1.0", - "httpx", - "importlib_metadata>=1.7;python_version<\"3.9\"", - # Importib_resources 6.2.0-6.3.1 break pytest_rewrite - # see https://github.com/python/importlib_resources/issues/299 - "importlib_resources>=5.2,!=6.2.0,!=6.3.0,!=6.3.1;python_version<\"3.9\"", - "itsdangerous>=2.0", - "jinja2>=3.0.0", - "jsonschema>=4.18.0", - "lazy-object-proxy", - "linkify-it-py>=2.0.0", - "lockfile>=0.12.2", - "markdown-it-py>=2.1.0", - "markupsafe>=1.1.1", - "marshmallow-oneofschema>=2.0.1", - "mdit-py-plugins>=0.3.0", - "opentelemetry-api>=1.15.0", - "opentelemetry-exporter-otlp", - "packaging>=14.0", - "pathspec>=0.9.0", - "pendulum>=2.1.2,<4.0", - "pluggy>=1.0", - "psutil>=4.2.0", - "pygments>=2.0.1", - "pyjwt>=2.0.0", - "python-daemon>=3.0.0", - "python-dateutil>=2.3", - "python-nvd3>=0.15.0", - "python-slugify>=5.0", - # Requests 3 if it will be released, will be heavily breaking. - "requests>=2.27.0,<3", - "rfc3339-validator>=0.1.4", - "rich-argparse>=1.0.0", - "rich>=12.4.4", - "setproctitle>=1.1.8", - # We use some deprecated features of sqlalchemy 2.0 and we should replace them before we can upgrade - # See https://sqlalche.me/e/b8d9 for details of deprecated features - # you can set environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings. - # The issue tracking it is https://github.com/apache/airflow/issues/28723 - "sqlalchemy>=1.4.36,<2.0", - "sqlalchemy-jsonfield>=1.0", - "tabulate>=0.7.5", - "tenacity>=6.2.0,!=8.2.0", - "termcolor>=1.1.0", - # We should remove this dependency when Providers are limited to Airflow 2.7+ - # as we replaced the usage of unicodecsv with csv in Airflow 2.7 - # See https://github.com/apache/airflow/pull/31693 - # We should also remove "licenses/LICENSE-unicodecsv.txt" file when we remove this dependency - "unicodecsv>=0.14.1", - # The Universal Pathlib provides Pathlib-like interface for FSSPEC - "universal-pathlib>=0.2.2", - # Werkzug 3 breaks Flask-Login 0.6.2, also connexion needs to be updated to >= 3.0 - # we should remove this limitation when FAB supports Flask 2.3 and we migrate connexion to 3+ - "werkzeug>=2.0,<3", -] -[project.optional-dependencies] -# Here manually managed extras start -# Those extras are manually managed and should be updated when needed +dynamic = ["version", "optional-dependencies", "dependencies"] + +# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +# !!! YOU MIGHT BE SURPRISED NOT SEEING THE DEPENDENCIES AS `project.dependencies` !!!!!!!!! +# !!! AND EXTRAS AS `project.optional-dependencies` !!!!!!!!! +# !!! THEY ARE marked as `dynamic` GENERATED by `hatch_build.py` !!!!!!!!! +# !!! SEE COMMENTS BELOW TO FIND WHERE DEPENDENCIES ARE MAINTAINED !!!!!!!!! +# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # -# START OF core extras +# !!!!!! Those provuders are defined in `hatch_build.py` and should be maintained there !!!!!!! # -# This required for AWS deferrable operators. -# There is conflict between boto3 and aiobotocore dependency botocore. -# TODO: We can remove it once boto3 and aiobotocore both have compatible botocore version or -# boto3 have native aync support and we move away from aio aiobotocore +# Those extras are available as regular core airflow extras - they install optional features of Airflow. # -aiobotocore = [ - "aiobotocore>=2.7.0", -] -async = [ - "eventlet>=0.33.3", - "gevent>=0.13", - "greenlet>=0.4.9", -] -cgroups = [ - # Cgroupspy 0.2.2 added Python 3.10 compatibility - "cgroupspy>=0.2.2", -] -deprecated-api = [ - "requests>=2.27.0,<3", -] -github-enterprise = [ - "apache-airflow[fab]", - "authlib>=1.0.0", -] -google-auth = [ - "apache-airflow[fab]", - "authlib>=1.0.0", -] -graphviz = [ - "graphviz>=0.12", -] -kerberos = [ - "pykerberos>=1.1.13", - "requests-kerberos>=0.10.0", - "thrift-sasl>=0.2.0", -] -ldap = [ - "ldap3>=2.5.1", - "python-ldap", -] -leveldb = [ - "plyvel", -] -otel = [ - "opentelemetry-exporter-prometheus", -] -pandas = [ - # In pandas 2.2 minimal version of the sqlalchemy is 2.0 - # https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#increased-minimum-versions-for-dependencies - # However Airflow not fully supports it yet: https://github.com/apache/airflow/issues/28723 - # In addition FAB also limit sqlalchemy to < 2.0 - "pandas>=1.2.5,<2.2", -] -password = [ - "bcrypt>=2.0.0", - "flask-bcrypt>=0.7.1", -] -pydantic = [ - "pydantic>=2.3.0", -] -rabbitmq = [ - "amqp", -] -s3fs = [ - # This is required for support of S3 file system which uses aiobotocore - # which can have a conflict with boto3 as mentioned in aiobotocore extra - "s3fs>=2023.10.0", -] -saml = [ - # This is required for support of SAML which might be used by some providers (e.g. Amazon) - "python3-saml>=1.16.0", -] -sentry = [ - "blinker>=1.1", - # Sentry SDK 1.33 is broken when greenlets are installed and fails to import - # See https://github.com/getsentry/sentry-python/issues/2473 - "sentry-sdk>=1.32.0,!=1.33.0", -] -statsd = [ - "statsd>=3.3.0", -] -uv = [ - "uv>=0.1.24", -] -virtualenv = [ - "virtualenv", -] -# END OF core extras -# START OF Apache no provider extras -apache-atlas = [ - "atlasclient>=0.1.2", -] -apache-webhdfs = [ - "hdfs[avro,dataframe,kerberos]>=2.0.4", -] -# END OF Apache no provider extras -all-core = [ - "apache-airflow[aiobotocore]", - "apache-airflow[apache-atlas]", - "apache-airflow[async]", - "apache-airflow[cgroups]", - "apache-airflow[deprecated-api]", - "apache-airflow[github-enterprise]", - "apache-airflow[google-auth]", - "apache-airflow[graphviz]", - "apache-airflow[kerberos]", - "apache-airflow[ldap]", - "apache-airflow[leveldb]", - "apache-airflow[otel]", - "apache-airflow[pandas]", - "apache-airflow[password]", - "apache-airflow[pydantic]", - "apache-airflow[rabbitmq]", - "apache-airflow[s3fs]", - "apache-airflow[saml]", - "apache-airflow[sentry]", - "apache-airflow[statsd]", - "apache-airflow[apache-webhdfs]", - "apache-airflow[virtualenv]", -] -# START OF devel extras -devel-debuggers = [ - "ipdb>=0.13.13", -] -devel-devscripts = [ - "click>=8.0", - "gitpython>=3.1.40", - "hatch>=1.9.1", - "pipdeptree>=2.13.1", - "pygithub>=2.1.1", - "restructuredtext-lint>=1.4.0", - "rich-click>=1.7.0", - "semver>=3.0.2", - "towncrier>=23.11.0", - "twine>=4.0.2", -] -devel-duckdb = [ - # Python 3.12 support was added in 0.10.0 - "duckdb>=0.10.0; python_version >= '3.12'", - "duckdb>=0.9.0; python_version < '3.12'", -] -# Mypy 0.900 and above ships only with stubs from stdlib so if we need other stubs, we need to install them -# manually as `types-*`. See https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports -# for details. We want to install them explicitly because we want to eventually move to -# mypyd which does not support installing the types dynamically with --install-types -devel-mypy = [ - # TODO: upgrade to newer versions of MyPy continuously as they are released - # Make sure to upgrade the mypy version in update-common-sql-api-stubs in .pre-commit-config.yaml - # when you upgrade it here !!!! - "mypy==1.9.0", - "types-Deprecated", - "types-Markdown", - "types-PyMySQL", - "types-PyYAML", - "types-aiofiles", - "types-certifi", - "types-croniter", - "types-docutils", - "types-paramiko", - "types-protobuf", - "types-python-dateutil", - "types-python-slugify", - "types-pytz", - "types-redis", - "types-requests", - "types-setuptools", - "types-tabulate", - "types-termcolor", - "types-toml", -] -devel-sentry = [ - "blinker>=1.7.0", -] -devel-static-checks = [ - "black>=23.12.0", - "pre-commit>=3.5.0", - "ruff==0.3.4", - "yamllint>=1.33.0", -] -devel-tests = [ - "aiofiles>=23.2.0", - "aioresponses>=0.7.6", - "backports.zoneinfo>=0.2.1;python_version<'3.9'", - "beautifulsoup4>=4.7.1", - # Coverage 7.4.0 added experimental support for Python 3.12 PEP669 which we use in Airflow - "coverage>=7.4.0", - "pytest-asyncio>=0.23.3", - "pytest-cov>=4.1.0", - "pytest-icdiff>=0.9", - "pytest-instafail>=0.5.0", - "pytest-mock>=3.12.0", - "pytest-rerunfailures>=13.0", - "pytest-timeouts>=1.2.1", - "pytest-xdist>=3.5.0", - # Temporary upper limmit to <8, not all dependencies at that moment ready to use 8.0 - # Internal meta-task for track https://github.com/apache/airflow/issues/37156 - "pytest>=7.4.4,<8.0", - "requests_mock>=1.11.0", - "time-machine>=2.13.0", - "wheel>=0.42.0", -] -# END OF devel extras -# START OF doc extras -doc = [ - "astroid>=2.12.3,<3.0", - "checksumdir>=1.2.0", - # click 8.1.4 and 8.1.5 generate mypy errors due to typing issue in the upstream package: - # https://github.com/pallets/click/issues/2558 - "click>=8.0,!=8.1.4,!=8.1.5", - # Docutils 0.17.0 converts generated
into
and breaks our doc formatting - # By adding a lot of whitespace separation. This limit can be lifted when we update our doc to handle - #
tags for sections - "docutils<0.17,>=0.16", - "sphinx-airflow-theme>=0.0.12", - "sphinx-argparse>=0.4.0", - # sphinx-autoapi fails with astroid 3.0, see: https://github.com/readthedocs/sphinx-autoapi/issues/407 - # This was fixed in sphinx-autoapi 3.0, however it has requirement sphinx>=6.1, but we stuck on 5.x - "sphinx-autoapi>=2.1.1", - "sphinx-copybutton>=0.5.2", - "sphinx-design>=0.5.0", - "sphinx-jinja>=2.0.2", - "sphinx-rtd-theme>=2.0.0", - # Currently we are using sphinx 5 but we need to migrate to Sphinx 7 - "sphinx>=5.3.0,<6.0.0", - "sphinxcontrib-applehelp>=1.0.4", - "sphinxcontrib-devhelp>=1.0.2", - "sphinxcontrib-htmlhelp>=2.0.1", - "sphinxcontrib-httpdomain>=1.8.1", - "sphinxcontrib-jquery>=4.1", - "sphinxcontrib-jsmath>=1.0.1", - "sphinxcontrib-qthelp>=1.0.3", - "sphinxcontrib-redoc>=1.6.0", - "sphinxcontrib-serializinghtml==1.1.5", - "sphinxcontrib-spelling>=8.0.0", -] -doc-gen = [ - "apache-airflow[doc]", - "eralchemy2>=1.3.8", -] -# END OF doc extras -# START OF bundle extras -all-dbs = [ - "apache-airflow[apache-cassandra]", - "apache-airflow[apache-drill]", - "apache-airflow[apache-druid]", - "apache-airflow[apache-hdfs]", - "apache-airflow[apache-hive]", - "apache-airflow[apache-impala]", - "apache-airflow[apache-pinot]", - "apache-airflow[arangodb]", - "apache-airflow[cloudant]", - "apache-airflow[databricks]", - "apache-airflow[exasol]", - "apache-airflow[influxdb]", - "apache-airflow[microsoft-mssql]", - "apache-airflow[mongo]", - "apache-airflow[mysql]", - "apache-airflow[neo4j]", - "apache-airflow[postgres]", - "apache-airflow[presto]", - "apache-airflow[trino]", - "apache-airflow[vertica]", -] -devel = [ - "apache-airflow[celery]", - "apache-airflow[cncf-kubernetes]", - "apache-airflow[common-io]", - "apache-airflow[common-sql]", - "apache-airflow[devel-debuggers]", - "apache-airflow[devel-devscripts]", - "apache-airflow[devel-duckdb]", - "apache-airflow[devel-mypy]", - "apache-airflow[devel-sentry]", - "apache-airflow[devel-static-checks]", - "apache-airflow[devel-tests]", - "apache-airflow[fab]", - "apache-airflow[ftp]", - "apache-airflow[http]", - "apache-airflow[imap]", - "apache-airflow[sqlite]", -] -devel-all-dbs = [ - "apache-airflow[apache-cassandra]", - "apache-airflow[apache-drill]", - "apache-airflow[apache-druid]", - "apache-airflow[apache-hdfs]", - "apache-airflow[apache-hive]", - "apache-airflow[apache-impala]", - "apache-airflow[apache-pinot]", - "apache-airflow[arangodb]", - "apache-airflow[cloudant]", - "apache-airflow[databricks]", - "apache-airflow[exasol]", - "apache-airflow[influxdb]", - "apache-airflow[microsoft-mssql]", - "apache-airflow[mongo]", - "apache-airflow[mysql]", - "apache-airflow[neo4j]", - "apache-airflow[postgres]", - "apache-airflow[presto]", - "apache-airflow[trino]", - "apache-airflow[vertica]", -] -devel-ci = [ - "apache-airflow[devel-all]", -] -devel-hadoop = [ - "apache-airflow[apache-hdfs]", - "apache-airflow[apache-hive]", - "apache-airflow[apache-impala]", - "apache-airflow[devel]", - "apache-airflow[hdfs]", - "apache-airflow[kerberos]", - "apache-airflow[presto]", -] -# END OF bundle extras -############################################################################################################# -# The whole section can be removed in Airflow 3.0 as those old aliases are deprecated in 2.* series -############################################################################################################# -# START OF deprecated extras -atlas = [ - "apache-airflow[apache-atlas]", -] -aws = [ - "apache-airflow[amazon]", -] -azure = [ - "apache-airflow[microsoft-azure]", -] -cassandra = [ - "apache-airflow[apache-cassandra]", -] -# Empty alias extra just for backward compatibility with Airflow 1.10 -crypto = [ -] -druid = [ - "apache-airflow[apache-druid]", -] -gcp = [ - "apache-airflow[google]", -] -gcp_api = [ - "apache-airflow[google]", -] -hdfs = [ - "apache-airflow[apache-hdfs]", -] -hive = [ - "apache-airflow[apache-hive]", -] -kubernetes = [ - "apache-airflow[cncf-kubernetes]", -] -mssql = [ - "apache-airflow[microsoft-mssql]", -] -pinot = [ - "apache-airflow[apache-pinot]", -] -s3 = [ - "apache-airflow[amazon]", -] -spark = [ - "apache-airflow[apache-spark]", -] -webhdfs = [ - "apache-airflow[apache-webhdfs]", -] -winrm = [ - "apache-airflow[microsoft-winrm]", -] -# END OF deprecated extras -############################################################################################################# -# The whole section below is automatically generated by `update-providers-dependencies` pre-commit based -# on `provider.yaml` files present in the `providers` subdirectories. The `provider.yaml` files are -# A single source of truth for provider dependencies, +# START CORE EXTRAS HERE # -# PLEASE DO NOT MODIFY THIS SECTION MANUALLY. IT WILL BE OVERWRITTEN BY PRE-COMMIT !! -# If you want to modify these - modify the corresponding provider.yaml instead. -############################################################################################################# -# START OF GENERATED DEPENDENCIES -airbyte = [ # source: airflow/providers/airbyte/provider.yaml - "apache-airflow[http]", -] -alibaba = [ # source: airflow/providers/alibaba/provider.yaml - "alibabacloud_adb20211201>=1.0.0", - "alibabacloud_tea_openapi>=0.3.7", - "oss2>=2.14.0", -] -amazon = [ # source: airflow/providers/amazon/provider.yaml - "PyAthena>=3.0.10", - "apache-airflow[common_sql]", - "apache-airflow[http]", - "asgiref", - "boto3>=1.33.0", - "botocore>=1.33.0", - "inflection>=0.5.1", - "jsonpath_ng>=1.5.3", - "redshift_connector>=2.0.918", - "sqlalchemy_redshift>=0.8.6", - "watchtower>=2.0.1,<4", - # Devel dependencies for the amazon provider - "aiobotocore>=2.7.0", - "aws_xray_sdk>=2.12.0", - "moto[cloudformation,glue]>=5.0.0", - "mypy-boto3-appflow>=1.33.0", - "mypy-boto3-rds>=1.33.0", - "mypy-boto3-redshift-data>=1.33.0", - "mypy-boto3-s3>=1.33.0", - "s3fs>=2023.10.0", - "openapi-schema-validator>=0.6.2", - "openapi-spec-validator>=0.7.1", -] -apache-beam = [ # source: airflow/providers/apache/beam/provider.yaml - "apache-beam>=2.53.0;python_version != \"3.12\"", - "pyarrow>=14.0.1;python_version != \"3.12\"", -] -apache-cassandra = [ # source: airflow/providers/apache/cassandra/provider.yaml - "cassandra-driver>=3.29.1", -] -apache-drill = [ # source: airflow/providers/apache/drill/provider.yaml - "apache-airflow[common_sql]", - "sqlalchemy-drill>=1.1.0", -] -apache-druid = [ # source: airflow/providers/apache/druid/provider.yaml - "apache-airflow[common_sql]", - "pydruid>=0.4.1", -] -apache-flink = [ # source: airflow/providers/apache/flink/provider.yaml - "apache-airflow[cncf_kubernetes]", - "cryptography>=2.0.0", -] -apache-hdfs = [ # source: airflow/providers/apache/hdfs/provider.yaml - "hdfs[avro,dataframe,kerberos]>=2.0.4", -] -apache-hive = [ # source: airflow/providers/apache/hive/provider.yaml - "apache-airflow[common_sql]", - "hmsclient>=0.1.0", - "pandas>=1.2.5,<2.2", - "pyhive[hive_pure_sasl]>=0.7.0", - "thrift>=0.9.2", -] -apache-impala = [ # source: airflow/providers/apache/impala/provider.yaml - "impyla>=0.18.0,<1.0", -] -apache-kafka = [ # source: airflow/providers/apache/kafka/provider.yaml - "asgiref", - "confluent-kafka>=1.8.2", -] -apache-kylin = [ # source: airflow/providers/apache/kylin/provider.yaml - "kylinpy>=2.6", -] -apache-livy = [ # source: airflow/providers/apache/livy/provider.yaml - "aiohttp>=3.9.2", - "apache-airflow[http]", - "asgiref", -] -apache-pig = [] # source: airflow/providers/apache/pig/provider.yaml -apache-pinot = [ # source: airflow/providers/apache/pinot/provider.yaml - "apache-airflow[common_sql]", - "pinotdb>=5.1.0", -] -apache-spark = [ # source: airflow/providers/apache/spark/provider.yaml - "grpcio-status>=1.59.0", - "pyspark", -] -apprise = [ # source: airflow/providers/apprise/provider.yaml - "apprise", -] -arangodb = [ # source: airflow/providers/arangodb/provider.yaml - "python-arango>=7.3.2", -] -asana = [ # source: airflow/providers/asana/provider.yaml - "asana>=0.10,<4.0.0", -] -atlassian-jira = [ # source: airflow/providers/atlassian/jira/provider.yaml - "atlassian-python-api>=1.14.2,!=3.41.6", - "beautifulsoup4", -] -celery = [ # source: airflow/providers/celery/provider.yaml - "celery[redis]>=5.3.0,<6,!=5.3.3,!=5.3.2", - "flower>=1.0.0", - "google-re2>=1.0", -] -cloudant = [ # source: airflow/providers/cloudant/provider.yaml - "cloudant>=2.0", -] -cncf-kubernetes = [ # source: airflow/providers/cncf/kubernetes/provider.yaml - "aiofiles>=23.2.0", - "asgiref>=3.5.2", - "cryptography>=2.0.0", - "google-re2>=1.0", - "kubernetes>=28.1.0,<=29.0.0", - "kubernetes_asyncio>=28.1.0,<=29.0.0", -] -cohere = [ # source: airflow/providers/cohere/provider.yaml - "cohere>=4.37,<5", -] -common-io = [] # source: airflow/providers/common/io/provider.yaml -common-sql = [ # source: airflow/providers/common/sql/provider.yaml - "more-itertools>=9.0.0", - "sqlparse>=0.4.2", -] -databricks = [ # source: airflow/providers/databricks/provider.yaml - "aiohttp>=3.9.2, <4", - "apache-airflow[common_sql]", - "databricks-sql-connector>=2.0.0, <3.0.0, !=2.9.0", - "requests>=2.27.0,<3", - # Devel dependencies for the databricks provider - "deltalake>=0.12.0", -] -datadog = [ # source: airflow/providers/datadog/provider.yaml - "datadog>=0.14.0", -] -dbt-cloud = [ # source: airflow/providers/dbt/cloud/provider.yaml - "aiohttp>=3.9.2", - "apache-airflow[http]", - "asgiref", -] -dingding = [ # source: airflow/providers/dingding/provider.yaml - "apache-airflow[http]", -] -discord = [ # source: airflow/providers/discord/provider.yaml - "apache-airflow[http]", -] -docker = [ # source: airflow/providers/docker/provider.yaml - "docker>=6", - "python-dotenv>=0.21.0", -] -elasticsearch = [ # source: airflow/providers/elasticsearch/provider.yaml - "apache-airflow[common_sql]", - "elasticsearch>=8.10,<9", -] -exasol = [ # source: airflow/providers/exasol/provider.yaml - "apache-airflow[common_sql]", - "pandas>=1.2.5,<2.2", - "pyexasol>=0.5.1", -] -fab = [ # source: airflow/providers/fab/provider.yaml - "flask-appbuilder==4.4.1", - "flask-login>=0.6.2", - "flask>=2.2,<2.3", - "google-re2>=1.0", -] -facebook = [ # source: airflow/providers/facebook/provider.yaml - "facebook-business>=6.0.2", -] -ftp = [] # source: airflow/providers/ftp/provider.yaml -github = [ # source: airflow/providers/github/provider.yaml - "PyGithub!=1.58", -] -google = [ # source: airflow/providers/google/provider.yaml - "PyOpenSSL", - "apache-airflow[common_sql]", - "asgiref>=3.5.2", - "gcloud-aio-auth>=4.0.0,<5.0.0", - "gcloud-aio-bigquery>=6.1.2", - "gcloud-aio-storage>=9.0.0", - "gcsfs>=2023.10.0", - "google-ads>=23.1.0", - "google-analytics-admin", - "google-api-core>=2.11.0,!=2.16.0", - "google-api-python-client>=1.6.0", - "google-auth-httplib2>=0.0.1", - "google-auth>=1.0.0", - "google-cloud-aiplatform>=1.42.1", - "google-cloud-batch>=0.13.0", - "google-cloud-bigquery-datatransfer>=3.13.0", - "google-cloud-bigtable>=2.17.0", - "google-cloud-build>=3.22.0", - "google-cloud-compute>=1.10.0", - "google-cloud-container>=2.17.4", - "google-cloud-datacatalog>=3.11.1", - "google-cloud-dataflow-client>=0.8.6", - "google-cloud-dataform>=0.5.0", - "google-cloud-dataplex>=1.10.0", - "google-cloud-dataproc-metastore>=1.12.0", - "google-cloud-dataproc>=5.8.0", - "google-cloud-dlp>=3.12.0", - "google-cloud-kms>=2.15.0", - "google-cloud-language>=2.9.0", - "google-cloud-logging>=3.5.0", - "google-cloud-memcache>=1.7.0", - "google-cloud-monitoring>=2.18.0", - "google-cloud-orchestration-airflow>=1.10.0", - "google-cloud-os-login>=2.9.1", - "google-cloud-pubsub>=2.19.0", - "google-cloud-redis>=2.12.0", - "google-cloud-run>=0.9.0", - "google-cloud-secret-manager>=2.16.0", - "google-cloud-spanner>=3.11.1", - "google-cloud-speech>=2.18.0", - "google-cloud-storage-transfer>=1.4.1", - "google-cloud-storage>=2.7.0", - "google-cloud-tasks>=2.13.0", - "google-cloud-texttospeech>=2.14.1", - "google-cloud-translate>=3.11.0", - "google-cloud-videointelligence>=2.11.0", - "google-cloud-vision>=3.4.0", - "google-cloud-workflows>=1.10.0", - "grpcio-gcp>=0.2.2", - "httpx", - "json-merge-patch>=0.2", - "looker-sdk>=22.2.0", - "pandas-gbq", - "pandas>=1.2.5,<2.2", - "proto-plus>=1.19.6", - "python-slugify>=5.0", - "sqlalchemy-bigquery>=1.2.1", - "sqlalchemy-spanner>=1.6.2", -] -grpc = [ # source: airflow/providers/grpc/provider.yaml - "google-auth-httplib2>=0.0.1", - "google-auth>=1.0.0, <3.0.0", - "grpcio>=1.15.0", -] -hashicorp = [ # source: airflow/providers/hashicorp/provider.yaml - "hvac>=1.1.0", -] -http = [ # source: airflow/providers/http/provider.yaml - "aiohttp>=3.9.2", - "asgiref", - "requests>=2.27.0,<3", - "requests_toolbelt", -] -imap = [] # source: airflow/providers/imap/provider.yaml -influxdb = [ # source: airflow/providers/influxdb/provider.yaml - "influxdb-client>=1.19.0", - "requests>=2.27.0,<3", -] -jdbc = [ # source: airflow/providers/jdbc/provider.yaml - "apache-airflow[common_sql]", - "jaydebeapi>=1.1.1", -] -jenkins = [ # source: airflow/providers/jenkins/provider.yaml - "python-jenkins>=1.0.0", -] -microsoft-azure = [ # source: airflow/providers/microsoft/azure/provider.yaml - "adal>=1.2.7", - "adlfs>=2023.10.0", - "azure-batch>=8.0.0", - "azure-cosmos>=4.0.0,<4.6.0", - "azure-datalake-store>=0.0.45", - "azure-identity>=1.3.1", - "azure-keyvault-secrets>=4.1.0", - "azure-kusto-data>=4.1.0", - "azure-mgmt-containerinstance>=9.0.0", - "azure-mgmt-containerregistry>=8.0.0", - "azure-mgmt-cosmosdb", - "azure-mgmt-datafactory>=2.0.0", - "azure-mgmt-datalake-store>=0.5.0", - "azure-mgmt-resource>=2.2.0", - "azure-mgmt-storage>=16.0.0", - "azure-servicebus>=7.12.1", - "azure-storage-blob>=12.14.0", - "azure-storage-file-datalake>=12.9.1", - "azure-storage-file-share", - "azure-synapse-artifacts>=0.17.0", - "azure-synapse-spark", - # Devel dependencies for the microsoft.azure provider - "pywinrm", -] -microsoft-mssql = [ # source: airflow/providers/microsoft/mssql/provider.yaml - "apache-airflow[common_sql]", - "pymssql>=2.1.8", -] -microsoft-psrp = [ # source: airflow/providers/microsoft/psrp/provider.yaml - "pypsrp>=0.8.0", -] -microsoft-winrm = [ # source: airflow/providers/microsoft/winrm/provider.yaml - "pywinrm>=0.4", -] -mongo = [ # source: airflow/providers/mongo/provider.yaml - "dnspython>=1.13.0", - "pymongo>=3.6.0", - # Devel dependencies for the mongo provider - "mongomock", -] -mysql = [ # source: airflow/providers/mysql/provider.yaml - "apache-airflow[common_sql]", - "mysql-connector-python>=8.0.29", - "mysqlclient>=1.3.6", -] -neo4j = [ # source: airflow/providers/neo4j/provider.yaml - "neo4j>=4.2.1", -] -odbc = [ # source: airflow/providers/odbc/provider.yaml - "apache-airflow[common_sql]", - "pyodbc", -] -openai = [ # source: airflow/providers/openai/provider.yaml - "openai[datalib]>=1.0", -] -openfaas = [] # source: airflow/providers/openfaas/provider.yaml -openlineage = [ # source: airflow/providers/openlineage/provider.yaml - "apache-airflow[common_sql]", - "attrs>=22.2", - "openlineage-integration-common>=0.28.0", - "openlineage-python>=0.28.0", -] -opensearch = [ # source: airflow/providers/opensearch/provider.yaml - "opensearch-py>=2.2.0", -] -opsgenie = [ # source: airflow/providers/opsgenie/provider.yaml - "opsgenie-sdk>=2.1.5", -] -oracle = [ # source: airflow/providers/oracle/provider.yaml - "apache-airflow[common_sql]", - "oracledb>=1.0.0", -] -pagerduty = [ # source: airflow/providers/pagerduty/provider.yaml - "pdpyras>=4.1.2", -] -papermill = [ # source: airflow/providers/papermill/provider.yaml - "ipykernel;python_version != \"3.12\"", - "papermill[all]>=2.4.0;python_version != \"3.12\"", - "scrapbook[all];python_version != \"3.12\"", -] -pgvector = [ # source: airflow/providers/pgvector/provider.yaml - "apache-airflow[postgres]", - "pgvector>=0.2.3", -] -pinecone = [ # source: airflow/providers/pinecone/provider.yaml - "pinecone-client>=2.2.4,<3.0", -] -postgres = [ # source: airflow/providers/postgres/provider.yaml - "apache-airflow[common_sql]", - "psycopg2-binary>=2.8.0", -] -presto = [ # source: airflow/providers/presto/provider.yaml - "apache-airflow[common_sql]", - "pandas>=1.2.5,<2.2", - "presto-python-client>=0.8.4", -] -qdrant = [ # source: airflow/providers/qdrant/provider.yaml - "qdrant_client>=1.7.0", -] -redis = [ # source: airflow/providers/redis/provider.yaml - "redis>=4.5.2,!=4.5.5,!=5.0.2", -] -salesforce = [ # source: airflow/providers/salesforce/provider.yaml - "pandas>=1.2.5,<2.2", - "simple-salesforce>=1.0.0", -] -samba = [ # source: airflow/providers/samba/provider.yaml - "smbprotocol>=1.5.0", -] -segment = [ # source: airflow/providers/segment/provider.yaml - "analytics-python>=1.2.9", -] -sendgrid = [ # source: airflow/providers/sendgrid/provider.yaml - "sendgrid>=6.0.0", -] -sftp = [ # source: airflow/providers/sftp/provider.yaml - "apache-airflow[ssh]", - "asyncssh>=2.12.0", - "paramiko>=2.8.0", -] -singularity = [ # source: airflow/providers/singularity/provider.yaml - "spython>=0.0.56", -] -slack = [ # source: airflow/providers/slack/provider.yaml - "apache-airflow[common_sql]", - "slack_sdk>=3.19.0", -] -smtp = [] # source: airflow/providers/smtp/provider.yaml -snowflake = [ # source: airflow/providers/snowflake/provider.yaml - "apache-airflow[common_sql]", - "snowflake-connector-python>=2.7.8", - "snowflake-sqlalchemy>=1.1.0", -] -sqlite = [ # source: airflow/providers/sqlite/provider.yaml - "apache-airflow[common_sql]", -] -ssh = [ # source: airflow/providers/ssh/provider.yaml - "paramiko>=2.6.0", - "sshtunnel>=0.3.2", -] -tableau = [ # source: airflow/providers/tableau/provider.yaml - "tableauserverclient", -] -tabular = [ # source: airflow/providers/tabular/provider.yaml - # Devel dependencies for the tabular provider - "pyiceberg>=0.5.0", -] -telegram = [ # source: airflow/providers/telegram/provider.yaml - "python-telegram-bot>=20.2", -] -teradata = [ # source: airflow/providers/teradata/provider.yaml - "apache-airflow[common_sql]", - "teradatasql>=17.20.0.28", - "teradatasqlalchemy>=17.20.0.0", -] -trino = [ # source: airflow/providers/trino/provider.yaml - "apache-airflow[common_sql]", - "pandas>=1.2.5,<2.2", - "trino>=0.318.0", -] -vertica = [ # source: airflow/providers/vertica/provider.yaml - "apache-airflow[common_sql]", - "vertica-python>=0.5.1", -] -weaviate = [ # source: airflow/providers/weaviate/provider.yaml - "pandas>=1.2.5,<2.2", - "weaviate-client>=3.24.2", -] -yandex = [ # source: airflow/providers/yandex/provider.yaml - "yandex-query-client>=0.1.2", - "yandexcloud>=0.228.0", -] -zendesk = [ # source: airflow/providers/zendesk/provider.yaml - "zenpy>=2.0.40", -] -all = [ - # core extras - "apache-airflow[aiobotocore]", - "apache-airflow[async]", - "apache-airflow[cgroups]", - "apache-airflow[deprecated-api]", - "apache-airflow[github-enterprise]", - "apache-airflow[google-auth]", - "apache-airflow[graphviz]", - "apache-airflow[kerberos]", - "apache-airflow[ldap]", - "apache-airflow[leveldb]", - "apache-airflow[otel]", - "apache-airflow[pandas]", - "apache-airflow[password]", - "apache-airflow[pydantic]", - "apache-airflow[rabbitmq]", - "apache-airflow[s3fs]", - "apache-airflow[saml]", - "apache-airflow[sentry]", - "apache-airflow[statsd]", - "apache-airflow[uv]", - "apache-airflow[virtualenv]", - # Apache no provider extras - "apache-airflow[apache-atlas]", - "apache-airflow[apache-webhdfs]", - "apache-airflow[all-core]", - # Provider extras - "apache-airflow[airbyte]", - "apache-airflow[alibaba]", - "apache-airflow[amazon]", - "apache-airflow[apache-beam]", - "apache-airflow[apache-cassandra]", - "apache-airflow[apache-drill]", - "apache-airflow[apache-druid]", - "apache-airflow[apache-flink]", - "apache-airflow[apache-hdfs]", - "apache-airflow[apache-hive]", - "apache-airflow[apache-impala]", - "apache-airflow[apache-kafka]", - "apache-airflow[apache-kylin]", - "apache-airflow[apache-livy]", - "apache-airflow[apache-pig]", - "apache-airflow[apache-pinot]", - "apache-airflow[apache-spark]", - "apache-airflow[apprise]", - "apache-airflow[arangodb]", - "apache-airflow[asana]", - "apache-airflow[atlassian-jira]", - "apache-airflow[celery]", - "apache-airflow[cloudant]", - "apache-airflow[cncf-kubernetes]", - "apache-airflow[cohere]", - "apache-airflow[common-io]", - "apache-airflow[common-sql]", - "apache-airflow[databricks]", - "apache-airflow[datadog]", - "apache-airflow[dbt-cloud]", - "apache-airflow[dingding]", - "apache-airflow[discord]", - "apache-airflow[docker]", - "apache-airflow[elasticsearch]", - "apache-airflow[exasol]", - "apache-airflow[fab]", - "apache-airflow[facebook]", - "apache-airflow[ftp]", - "apache-airflow[github]", - "apache-airflow[google]", - "apache-airflow[grpc]", - "apache-airflow[hashicorp]", - "apache-airflow[http]", - "apache-airflow[imap]", - "apache-airflow[influxdb]", - "apache-airflow[jdbc]", - "apache-airflow[jenkins]", - "apache-airflow[microsoft-azure]", - "apache-airflow[microsoft-mssql]", - "apache-airflow[microsoft-psrp]", - "apache-airflow[microsoft-winrm]", - "apache-airflow[mongo]", - "apache-airflow[mysql]", - "apache-airflow[neo4j]", - "apache-airflow[odbc]", - "apache-airflow[openai]", - "apache-airflow[openfaas]", - "apache-airflow[openlineage]", - "apache-airflow[opensearch]", - "apache-airflow[opsgenie]", - "apache-airflow[oracle]", - "apache-airflow[pagerduty]", - "apache-airflow[papermill]", - "apache-airflow[pgvector]", - "apache-airflow[pinecone]", - "apache-airflow[postgres]", - "apache-airflow[presto]", - "apache-airflow[qdrant]", - "apache-airflow[redis]", - "apache-airflow[salesforce]", - "apache-airflow[samba]", - "apache-airflow[segment]", - "apache-airflow[sendgrid]", - "apache-airflow[sftp]", - "apache-airflow[singularity]", - "apache-airflow[slack]", - "apache-airflow[smtp]", - "apache-airflow[snowflake]", - "apache-airflow[sqlite]", - "apache-airflow[ssh]", - "apache-airflow[tableau]", - "apache-airflow[tabular]", - "apache-airflow[telegram]", - "apache-airflow[teradata]", - "apache-airflow[trino]", - "apache-airflow[vertica]", - "apache-airflow[weaviate]", - "apache-airflow[yandex]", - "apache-airflow[zendesk]", -] -devel-all = [ - "apache-airflow[all]", - "apache-airflow[devel]", - "apache-airflow[doc]", - "apache-airflow[doc-gen]", - "apache-airflow[saml]", - # Apache no provider extras - "apache-airflow[apache-atlas]", - "apache-airflow[apache-webhdfs]", - "apache-airflow[all-core]", - # Include all provider deps - "apache-airflow[airbyte]", - "apache-airflow[alibaba]", - "apache-airflow[amazon]", - "apache-airflow[apache-beam]", - "apache-airflow[apache-cassandra]", - "apache-airflow[apache-drill]", - "apache-airflow[apache-druid]", - "apache-airflow[apache-flink]", - "apache-airflow[apache-hdfs]", - "apache-airflow[apache-hive]", - "apache-airflow[apache-impala]", - "apache-airflow[apache-kafka]", - "apache-airflow[apache-kylin]", - "apache-airflow[apache-livy]", - "apache-airflow[apache-pig]", - "apache-airflow[apache-pinot]", - "apache-airflow[apache-spark]", - "apache-airflow[apprise]", - "apache-airflow[arangodb]", - "apache-airflow[asana]", - "apache-airflow[atlassian-jira]", - "apache-airflow[celery]", - "apache-airflow[cloudant]", - "apache-airflow[cncf-kubernetes]", - "apache-airflow[cohere]", - "apache-airflow[common-io]", - "apache-airflow[common-sql]", - "apache-airflow[databricks]", - "apache-airflow[datadog]", - "apache-airflow[dbt-cloud]", - "apache-airflow[dingding]", - "apache-airflow[discord]", - "apache-airflow[docker]", - "apache-airflow[elasticsearch]", - "apache-airflow[exasol]", - "apache-airflow[fab]", - "apache-airflow[facebook]", - "apache-airflow[ftp]", - "apache-airflow[github]", - "apache-airflow[google]", - "apache-airflow[grpc]", - "apache-airflow[hashicorp]", - "apache-airflow[http]", - "apache-airflow[imap]", - "apache-airflow[influxdb]", - "apache-airflow[jdbc]", - "apache-airflow[jenkins]", - "apache-airflow[microsoft-azure]", - "apache-airflow[microsoft-mssql]", - "apache-airflow[microsoft-psrp]", - "apache-airflow[microsoft-winrm]", - "apache-airflow[mongo]", - "apache-airflow[mysql]", - "apache-airflow[neo4j]", - "apache-airflow[odbc]", - "apache-airflow[openai]", - "apache-airflow[openfaas]", - "apache-airflow[openlineage]", - "apache-airflow[opensearch]", - "apache-airflow[opsgenie]", - "apache-airflow[oracle]", - "apache-airflow[pagerduty]", - "apache-airflow[papermill]", - "apache-airflow[pgvector]", - "apache-airflow[pinecone]", - "apache-airflow[postgres]", - "apache-airflow[presto]", - "apache-airflow[qdrant]", - "apache-airflow[redis]", - "apache-airflow[salesforce]", - "apache-airflow[samba]", - "apache-airflow[segment]", - "apache-airflow[sendgrid]", - "apache-airflow[sftp]", - "apache-airflow[singularity]", - "apache-airflow[slack]", - "apache-airflow[smtp]", - "apache-airflow[snowflake]", - "apache-airflow[sqlite]", - "apache-airflow[ssh]", - "apache-airflow[tableau]", - "apache-airflow[tabular]", - "apache-airflow[telegram]", - "apache-airflow[teradata]", - "apache-airflow[trino]", - "apache-airflow[vertica]", - "apache-airflow[weaviate]", - "apache-airflow[yandex]", - "apache-airflow[zendesk]", -] -# END OF GENERATED DEPENDENCIES -############################################################################################################# -# The rest of the pyproject.toml file should be manually maintained -############################################################################################################# +# aiobotocore, apache-atlas, apache-webhdfs, async, cgroups, deprecated-api, github-enterprise, +# google-auth, graphviz, kerberos, ldap, leveldb, otel, pandas, password, pydantic, rabbitmq, s3fs, +# saml, sentry, statsd, uv, virtualenv +# +# END CORE EXTRAS HERE +# +# The ``devel`` extras are not available in the released packages. They are only available when you install +# Airflow from sources in ``editable`` installation - i.e. one that you are usually using to contribute to +# Airflow. They provide tools such as ``pytest`` and ``mypy`` for general purpose development and testing. +# +# START DEVEL EXTRAS HERE +# +# devel, devel-all-dbs, devel-ci, devel-debuggers, devel-devscripts, devel-duckdb, devel-hadoop, +# devel-mypy, devel-sentry, devel-static-checks, devel-tests +# +# END DEVEL EXTRAS HERE +# +# Those extras are bundles dynamically generated from other extras. +# +# START BUNDLE EXTRAS HERE +# +# all, all-core, all-dbs, devel-all, devel-ci +# +# END BUNDLE EXTRAS HERE +# +# The ``doc`` extras are not available in the released packages. They are only available when you install +# Airflow from sources in ``editable`` installation - i.e. one that you are usually using to contribute to +# Airflow. They provide tools needed when you want to build Airflow documentation (note that you also need +# ``devel`` extras installed for airflow and providers in order to build documentation for airflow and +# provider packages respectively). The ``doc`` package is enough to build regular documentation, where +# ``doc_gen`` is needed to generate ER diagram we have describing our database. +# +# START DOC EXTRAS HERE +# +# doc, doc-gen +# +# END DOC EXTRAS HERE +# +# The `deprecated` extras are deprecated extras from Airflow 1 that will be removed in future versions. +# +# START DEPRECATED EXTRAS HERE +# +# atlas, aws, azure, cassandra, crypto, druid, gcp, gcp-api, hdfs, hive, kubernetes, mssql, pinot, s3, +# spark, webhdfs, winrm +# +# END DEPRECATED EXTRAS HERE +# +# !!!!!! Those provuders are defined in the `airflow/providers//provider.yaml` files !!!!!!! +# +# Those extras are available as regular Airflow extras, they install provider packages in standard builds +# or dependencies that are necessary to enable the feature in editable build. +# START PROVIDER EXTRAS HERE +# +# airbyte, alibaba, amazon, apache.beam, apache.cassandra, apache.drill, apache.druid, apache.flink, +# apache.hdfs, apache.hive, apache.impala, apache.kafka, apache.kylin, apache.livy, apache.pig, +# apache.pinot, apache.spark, apprise, arangodb, asana, atlassian.jira, celery, cloudant, +# cncf.kubernetes, cohere, common.io, common.sql, databricks, datadog, dbt.cloud, dingding, discord, +# docker, elasticsearch, exasol, fab, facebook, ftp, github, google, grpc, hashicorp, http, imap, +# influxdb, jdbc, jenkins, microsoft.azure, microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo, +# mysql, neo4j, odbc, openai, openfaas, openlineage, opensearch, opsgenie, oracle, pagerduty, +# papermill, pgvector, pinecone, postgres, presto, qdrant, redis, salesforce, samba, segment, +# sendgrid, sftp, singularity, slack, smtp, snowflake, sqlite, ssh, tableau, tabular, telegram, +# teradata, trino, vertica, weaviate, yandex, zendesk +# +# END PROVIDER EXTRAS HERE + [project.scripts] airflow = "airflow.__main__:main" [project.urls] @@ -1219,7 +159,7 @@ YouTube = "https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/" python = "3.8" platforms = ["linux", "macos"] description = "Default environment with Python 3.8 for maximum compatibility" -features = ["devel"] +features = [] [tool.hatch.envs.airflow-38] python = "3.8" @@ -1277,7 +217,6 @@ artifacts = [ "/airflow/www/static/dist/", "/airflow/git_version", "/generated/", - "/airflow_pre_installed_providers.txt", ] diff --git a/scripts/ci/pre_commit/common_precommit_utils.py b/scripts/ci/pre_commit/common_precommit_utils.py index 8926bc1823b2..41bc3a5eeaf9 100644 --- a/scripts/ci/pre_commit/common_precommit_utils.py +++ b/scripts/ci/pre_commit/common_precommit_utils.py @@ -73,21 +73,36 @@ def pre_process_files(files: list[str]) -> list[str]: return result -def insert_documentation(file_path: Path, content: list[str], header: str, footer: str): - text = file_path.read_text().splitlines(keepends=True) +def insert_documentation( + file_path: Path, content: list[str], header: str, footer: str, add_comment: bool = False +) -> bool: + found = False + old_content = file_path.read_text() + lines = old_content.splitlines(keepends=True) replacing = False result: list[str] = [] - for line in text: + for line in lines: if line.strip().startswith(header.strip()): replacing = True + found = True result.append(line) - result.extend(content) + if add_comment: + result.extend(["# " + line if line != "\n" else "#\n" for line in content]) + else: + result.extend(content) if line.strip().startswith(footer.strip()): replacing = False if not replacing: result.append(line) - src = "".join(result) - file_path.write_text(src) + new_content = "".join(result) + if not found: + print(f"Header {header} not found in {file_path}") + sys.exit(1) + if new_content != old_content: + file_path.write_text(new_content) + console.print(f"Updated {file_path}") + return True + return False def initialize_breeze_precommit(name: str, file: str): diff --git a/scripts/ci/pre_commit/pre_commit_check_extra_packages_ref.py b/scripts/ci/pre_commit/pre_commit_check_extra_packages_ref.py index dbeca287de4c..6e5c410338ec 100755 --- a/scripts/ci/pre_commit/pre_commit_check_extra_packages_ref.py +++ b/scripts/ci/pre_commit/pre_commit_check_extra_packages_ref.py @@ -28,24 +28,17 @@ from tabulate import tabulate -# tomllib is available in Python 3.11+ and before that tomli offers same interface for parsing TOML files -try: - import tomllib -except ImportError: - import tomli as tomllib - - AIRFLOW_ROOT_PATH = Path(__file__).parents[3].resolve() +COMMON_PRECOMMIT_PATH = Path(__file__).parent.resolve() EXTRA_PACKAGES_REF_FILE = AIRFLOW_ROOT_PATH / "docs" / "apache-airflow" / "extra-packages-ref.rst" PYPROJECT_TOML_FILE_PATH = AIRFLOW_ROOT_PATH / "pyproject.toml" -sys.path.insert(0, str(Path(__file__).parent.resolve())) # make sure common_precommit_utils is imported - +sys.path.insert(0, COMMON_PRECOMMIT_PATH.as_posix()) # make sure common_precommit_utils is imported from common_precommit_utils import console -pyproject_toml_content = tomllib.loads(PYPROJECT_TOML_FILE_PATH.read_text()) +sys.path.insert(0, AIRFLOW_ROOT_PATH.as_posix()) # make sure airflow root is imported +from hatch_build import ALL_DYNAMIC_EXTRAS -optional_dependencies: dict[str, list[str]] = pyproject_toml_content["project"]["optional-dependencies"] doc_ref_content = EXTRA_PACKAGES_REF_FILE.read_text() errors: list[str] = [] @@ -55,7 +48,7 @@ suggestions_devel: list[tuple] = [] suggestions_providers: list[tuple] = [] -for dependency in optional_dependencies: +for dependency in ALL_DYNAMIC_EXTRAS: console.print(f"[bright_blue]Checking if {dependency} is mentioned in refs[/]") find_matching = re.search(rf"^\| {dependency} *\|", doc_ref_content, flags=re.MULTILINE) if not find_matching: @@ -95,4 +88,4 @@ console.print(tabulate(suggestions_providers, headers=HEADERS, tablefmt="grid"), markup=False) sys.exit(1) else: - console.print(f"[green]Checked: {len(optional_dependencies)} dependencies are mentioned[/]") + console.print(f"[green]Checked: {len(ALL_DYNAMIC_EXTRAS)} dependencies are mentioned[/]") diff --git a/scripts/ci/pre_commit/pre_commit_check_order_hatch_build.py b/scripts/ci/pre_commit/pre_commit_check_order_hatch_build.py new file mode 100755 index 000000000000..9208d3331f93 --- /dev/null +++ b/scripts/ci/pre_commit/pre_commit_check_order_hatch_build.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Test for an order of dependencies in setup.py +""" + +from __future__ import annotations + +import sys +from pathlib import Path + +from rich import print + +errors: list[str] = [] + +AIRFLOW_ROOT_PATH = Path(__file__).parents[3].resolve() +HATCH_BUILD_PATH = AIRFLOW_ROOT_PATH / "hatch_build.py" + +sys.path.insert(0, str(Path(__file__).parent.resolve())) # make sure common_precommit_utils is imported +from common_precommit_utils import check_list_sorted + +sys.path.insert(0, str(AIRFLOW_ROOT_PATH)) # make sure airflow root is imported +from hatch_build import ALL_DYNAMIC_EXTRA_DICTS + +if __name__ == "__main__": + file_contents = HATCH_BUILD_PATH.read_text() + + for extra_dict, description in ALL_DYNAMIC_EXTRA_DICTS: + for extra, extra_list in extra_dict.items(): + check_list_sorted(extra_list, f"Order of extra: {description}:{extra}", errors) + print() + for error in errors: + print(error) + + print() + + if errors: + sys.exit(1) diff --git a/scripts/ci/pre_commit/pre_commit_check_order_pyproject_toml.py b/scripts/ci/pre_commit/pre_commit_check_order_pyproject_toml.py deleted file mode 100755 index 46fa056c537f..000000000000 --- a/scripts/ci/pre_commit/pre_commit_check_order_pyproject_toml.py +++ /dev/null @@ -1,104 +0,0 @@ -#!/usr/bin/env python -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -""" -Test for an order of dependencies in setup.py -""" - -from __future__ import annotations - -import re -import sys -from pathlib import Path - -from rich import print - -errors: list[str] = [] - -AIRFLOW_ROOT_PATH = Path(__file__).parents[3].resolve() -PYPROJECT_TOML_PATH = AIRFLOW_ROOT_PATH / "pyproject.toml" - -sys.path.insert(0, str(Path(__file__).parent.resolve())) # make sure common_precommit_utils is imported -from common_precommit_utils import check_list_sorted - - -def check_extras(type: str, extra: str, extras: list[str]) -> None: - r""" - Test for an order of dependencies in extra defined - `^dependent_group_name = [.*?]\n` in setup.py - """ - print(f"[info]Checking {type}:{extra}[/]") - extras = [extra.replace("[", "\\[") for extra in extras] - check_list_sorted(extras, f"Order of extra: {type}:{extra}", errors) - - -def extract_deps(content: str, extra: str) -> list[str]: - deps: list[str] = [] - extracting = False - for line in content.splitlines(): - line = line.strip() - if line.startswith("#"): - continue - if not extracting and line == f"{extra} = [": - extracting = True - elif extracting and line == "]": - break - elif extracting: - deps.append(line.strip().strip(",").strip('"')) - return deps - - -def check_type(pyproject_toml_contents: str, type: str) -> None: - """ - Test for an order of dependencies groups between mark - '# Start dependencies group' and '# End dependencies group' in setup.py - """ - print(f"[info]Checking {type}[/]") - pattern_type = re.compile(f"# START OF {type}\n(.*)# END OF {type}", re.DOTALL) - parsed_type_content = pattern_type.findall(pyproject_toml_contents)[0] - # strip comments - parsed_type_content = ( - "\n".join([line for line in parsed_type_content.splitlines() if not line.startswith("#")]) + "\n" - ) - pattern_extra_name = re.compile(r" = \[.*?]\n", re.DOTALL) - type_content = pattern_extra_name.sub(",", parsed_type_content) - - list_extra_names = type_content.strip(",").split(",") - check_list_sorted(list_extra_names, "Order of dependencies", errors) - for extra in list_extra_names: - deps_list = extract_deps(parsed_type_content, extra) - check_extras(type, extra, deps_list) - - -if __name__ == "__main__": - file_contents = PYPROJECT_TOML_PATH.read_text() - check_type(file_contents, "core extras") - check_type(file_contents, "Apache no provider extras") - check_type(file_contents, "devel extras") - check_type(file_contents, "doc extras") - check_type(file_contents, "bundle extras") - check_type(file_contents, "deprecated extras") - - print() - for error in errors: - print(error) - - print() - - if errors: - sys.exit(1) diff --git a/scripts/ci/pre_commit/pre_commit_insert_extras.py b/scripts/ci/pre_commit/pre_commit_insert_extras.py index d64cd6cd5589..e32ad199b43e 100755 --- a/scripts/ci/pre_commit/pre_commit_insert_extras.py +++ b/scripts/ci/pre_commit/pre_commit_insert_extras.py @@ -19,89 +19,79 @@ import sys import textwrap -from enum import Enum from pathlib import Path -# tomllib is available in Python 3.11+ and before that tomli offers same interface for parsing TOML files -try: - import tomllib -except ImportError: - import tomli as tomllib - AIRFLOW_ROOT_PATH = Path(__file__).parents[3].resolve() PYPROJECT_TOML_FILE_PATH = AIRFLOW_ROOT_PATH / "pyproject.toml" sys.path.insert(0, str(Path(__file__).parent.resolve())) # make sure common_precommit_utils is imported from common_precommit_utils import insert_documentation - -class ExtraType(Enum): - DEVEL = "DEVEL" - DOC = "DOC" - REGULAR = "REGULAR" +sys.path.insert(0, AIRFLOW_ROOT_PATH.as_posix()) # make sure airflow root is imported +from hatch_build import ( + ALL_DYNAMIC_EXTRA_DICTS, + ALL_GENERATED_BUNDLE_EXTRAS, + BUNDLE_EXTRAS, + PROVIDER_DEPENDENCIES, +) -def get_header_and_footer(extra_type: ExtraType, file_format: str) -> tuple[str, str]: +def get_header_and_footer(extra_type: str, file_format: str) -> tuple[str, str]: if file_format == "rst": - return f" .. START {extra_type.value} EXTRAS HERE", f" .. END {extra_type.value} EXTRAS HERE" + return f" .. START {extra_type.upper()} HERE", f" .. END {extra_type.upper()} HERE" elif file_format == "txt": - return f"# START {extra_type.value} EXTRAS HERE", f"# END {extra_type.value} EXTRAS HERE" + return f"# START {extra_type.upper()} HERE", f"# END {extra_type.upper()} HERE" else: raise Exception(f"Bad format {format} passed. Only rst and txt are supported") -def get_wrapped_list(extras_set: set[str]) -> list[str]: +def get_wrapped_list(extras_set: list[str]) -> list[str]: array = [line + "\n" for line in textwrap.wrap(", ".join(sorted(extras_set)), 100)] array.insert(0, "\n") array.append("\n") return array -def get_extra_types_dict(extras: dict[str, list[str]]) -> dict[ExtraType, tuple[set[str], list[str]]]: +def get_extra_types_dict() -> dict[str, list[str]]: """ Split extras into four types. :return: dictionary of extra types with tuple of two set,list - set of extras and text-wrapped list """ - extra_type_dict: dict[ExtraType, tuple[set[str], list[str]]] = {} - - for extra_type in ExtraType: - extra_type_dict[extra_type] = (set(), []) - - for key, value in extras.items(): - if key.startswith("devel"): - extra_type_dict[ExtraType.DEVEL][0].add(key) - elif key in ["doc", "doc-gen"]: - extra_type_dict[ExtraType.DOC][0].add(key) - else: - extra_type_dict[ExtraType.REGULAR][0].add(key) - - for extra_type in ExtraType: - extra_type_dict[extra_type][1].extend(get_wrapped_list(extra_type_dict[extra_type][0])) - + extra_type_dict: dict[str, list[str]] = {} + + for extra_dict, extra_description in ALL_DYNAMIC_EXTRA_DICTS: + extra_list = sorted(extra_dict) + if extra_dict == BUNDLE_EXTRAS: + extra_list = sorted(extra_list + ALL_GENERATED_BUNDLE_EXTRAS) + extra_type_dict[extra_description] = get_wrapped_list(extra_list) + extra_type_dict["Provider extras"] = get_wrapped_list(PROVIDER_DEPENDENCIES) return extra_type_dict -def get_extras_from_pyproject_toml() -> dict[str, list[str]]: - pyproject_toml_content = tomllib.loads(PYPROJECT_TOML_FILE_PATH.read_text()) - return pyproject_toml_content["project"]["optional-dependencies"] - - -FILES_TO_UPDATE = [ - (AIRFLOW_ROOT_PATH / "INSTALL", "txt"), - (AIRFLOW_ROOT_PATH / "contributing-docs" / "12_airflow_dependencies_and_extras.rst", "rst"), +FILES_TO_UPDATE: list[tuple[Path, str, bool]] = [ + (AIRFLOW_ROOT_PATH / "INSTALL", "txt", False), + (AIRFLOW_ROOT_PATH / "contributing-docs" / "12_airflow_dependencies_and_extras.rst", "rst", False), + (AIRFLOW_ROOT_PATH / "pyproject.toml", "txt", True), ] -def process_documentation_files(): - extra_type_dict = get_extra_types_dict(get_extras_from_pyproject_toml()) - for file, file_format in FILES_TO_UPDATE: +def process_documentation_files() -> bool: + changed = False + extra_type_dict = get_extra_types_dict() + for file, file_format, add_comment in FILES_TO_UPDATE: if not file.exists(): raise Exception(f"File {file} does not exist") - for extra_type in ExtraType: - header, footer = get_header_and_footer(extra_type, file_format) - insert_documentation(file, extra_type_dict[extra_type][1], header, footer) + for extra_type_description, extra_list in extra_type_dict.items(): + header, footer = get_header_and_footer(extra_type_description, file_format) + if insert_documentation( + file, extra_type_dict[extra_type_description], header, footer, add_comment + ): + changed = True + return changed if __name__ == "__main__": - process_documentation_files() + if process_documentation_files(): + print("Some files were updated. Please commit them.") + sys.exit(1) diff --git a/scripts/ci/pre_commit/pre_commit_sort_installed_providers.py b/scripts/ci/pre_commit/pre_commit_sort_installed_providers.py index 897fddf2ef06..c97addf50178 100755 --- a/scripts/ci/pre_commit/pre_commit_sort_installed_providers.py +++ b/scripts/ci/pre_commit/pre_commit_sort_installed_providers.py @@ -45,6 +45,4 @@ def sort_file(path: Path): if __name__ == "__main__": prod_image_installed_providers_path = AIRFLOW_SOURCES / "prod_image_installed_providers.txt" - airflow_pre_installed_providers_path = AIRFLOW_SOURCES / "airflow_pre_installed_providers.txt" sort_file(prod_image_installed_providers_path) - sort_file(airflow_pre_installed_providers_path) diff --git a/scripts/ci/pre_commit/pre_commit_update_build_dependencies.py b/scripts/ci/pre_commit/pre_commit_update_build_dependencies.py index 64c96a90e477..af0916bbd3eb 100755 --- a/scripts/ci/pre_commit/pre_commit_update_build_dependencies.py +++ b/scripts/ci/pre_commit/pre_commit_update_build_dependencies.py @@ -17,6 +17,7 @@ # under the License. from __future__ import annotations +import re import shutil import subprocess import sys @@ -26,8 +27,19 @@ AIRFLOW_SOURCES = Path(__file__).parents[3].resolve() PYPROJECT_TOML_FILE = AIRFLOW_SOURCES / "pyproject.toml" +HATCHLING_MATCH = re.compile(r"hatchling==[0-9.]*") + +FILES_TO_REPLACE_HATCHLING_IN = [ + AIRFLOW_SOURCES / ".pre-commit-config.yaml", + AIRFLOW_SOURCES / "clients" / "python" / "pyproject.toml", + AIRFLOW_SOURCES / "docker_tests" / "requirements.txt", +] + +files_changed = False + if __name__ == "__main__": temp_dir = Path(tempfile.mkdtemp()) + hatchling_spec = "" try: subprocess.check_call([sys.executable, "-m", "venv", temp_dir.as_posix()]) venv_python = temp_dir / "bin" / "python" @@ -47,10 +59,25 @@ if dep.startswith("tomli=="): dep = dep + "; python_version < '3.11'" result.append(f' "{dep}",') + if dep.startswith("hatchling=="): + hatchling_spec = dep if skipping and line == "]": skipping = False result.append(line) result.append("") - PYPROJECT_TOML_FILE.write_text("\n".join(result)) + new_pyproject_toml_file_content = "\n".join(result) + if new_pyproject_toml_file_content != pyproject_toml_content: + files_changed = True + PYPROJECT_TOML_FILE.write_text(new_pyproject_toml_file_content) + for file_to_replace_hatchling in FILES_TO_REPLACE_HATCHLING_IN: + old_file_content = file_to_replace_hatchling.read_text() + new_file_content = HATCHLING_MATCH.sub(hatchling_spec, old_file_content, re.MULTILINE) + if new_file_content != old_file_content: + files_changed = True + file_to_replace_hatchling.write_text(new_file_content) finally: shutil.rmtree(temp_dir) + + if files_changed: + print("Some files changed. Please commit the changes.") + sys.exit(1) diff --git a/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py b/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py index 72d9acb524fa..ca1d36aed296 100755 --- a/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py +++ b/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py @@ -22,7 +22,6 @@ import sys from ast import Import, ImportFrom, NodeVisitor, parse from collections import defaultdict -from enum import Enum from pathlib import Path from typing import Any, List @@ -182,154 +181,6 @@ def check_if_different_provider_used(file_path: Path) -> None: FOUND_EXTRAS: dict[str, list[str]] = defaultdict(list) - -class ParsedDependencyTypes(Enum): - CORE_EXTRAS = "core extras" - APACHE_NO_PROVIDER_EXTRAS = "Apache no provider extras" - DEVEL_EXTRAS = "devel extras" - DOC_EXTRAS = "doc extras" - BUNDLE_EXTRAS = "bundle extras" - DEPRECATED_EXTRAS = "deprecated extras" - MANUAL_EXTRAS = "manual extras" - - -GENERATED_DEPENDENCIES_START = "# START OF GENERATED DEPENDENCIES" -GENERATED_DEPENDENCIES_END = "# END OF GENERATED DEPENDENCIES" - - -def normalize_extra(dependency: str) -> str: - return dependency.replace(".", "-").replace("_", "-") - - -def normalize_package_name(dependency: str) -> str: - return f"apache-airflow-providers-{dependency.replace('.', '-').replace('_', '-')}" - - -def convert_to_extra_dependency(dependency: str) -> str: - # if there is version in dependency - remove it as we do not need it in extra specification - # for editable installation - if ">=" in dependency: - dependency = dependency.split(">=")[0] - extra = dependency.replace("apache-airflow-providers-", "").replace("-", "_").replace(".", "_") - return f"apache-airflow[{extra}]" - - -def generate_dependencies( - result_content: list[str], - dependencies: dict[str, dict[str, list[str] | str]], -): - def generate_parsed_extras(type: ParsedDependencyTypes): - result_content.append(f" # {type.value}") - for extra in FOUND_EXTRAS[type.value]: - result_content.append(f' "apache-airflow[{extra}]",') - - def get_python_exclusion(dependency_info: dict[str, list[str] | str]): - excluded_python_versions = dependency_info.get("excluded-python-versions") - exclusion = "" - if excluded_python_versions: - separator = ";" - for version in excluded_python_versions: - exclusion += f'{separator}python_version != \\"{version}\\"' - separator = " and " - return exclusion - - for dependency, dependency_info in dependencies.items(): - if dependency_info["state"] in ["suspended", "removed"]: - continue - deps = dependency_info["deps"] - deps = [dep for dep in deps if not dep.startswith("apache-airflow>=")] - devel_deps = dependency_info.get("devel-deps") - if not deps and not devel_deps: - result_content.append( - f"{normalize_extra(dependency)} = [] " - f"# source: airflow/providers/{dependency.replace('.', '/')}/provider.yaml" - ) - continue - result_content.append( - f"{normalize_extra(dependency)} = " - f"[ # source: airflow/providers/{dependency.replace('.', '/')}/provider.yaml" - ) - if not isinstance(deps, list): - raise TypeError(f"Wrong type of 'deps' {deps} for {dependency} in {DEPENDENCIES_JSON_FILE_PATH}") - for dep in deps: - if dep.startswith("apache-airflow-providers-"): - dep = convert_to_extra_dependency(dep) - result_content.append(f' "{dep}{get_python_exclusion(dependency_info)}",') - if devel_deps: - result_content.append(f" # Devel dependencies for the {dependency} provider") - for dep in devel_deps: - result_content.append(f' "{dep}{get_python_exclusion(dependency_info)}",') - result_content.append("]") - result_content.append("all = [") - generate_parsed_extras(ParsedDependencyTypes.CORE_EXTRAS) - generate_parsed_extras(ParsedDependencyTypes.APACHE_NO_PROVIDER_EXTRAS) - result_content.append(" # Provider extras") - for dependency, dependency_info in dependencies.items(): - result_content.append(f' "apache-airflow[{normalize_extra(dependency)}]",') - result_content.append("]") - result_content.append("devel-all = [") - result_content.append(' "apache-airflow[all]",') - result_content.append(' "apache-airflow[devel]",') - result_content.append(' "apache-airflow[doc]",') - result_content.append(' "apache-airflow[doc-gen]",') - result_content.append(' "apache-airflow[saml]",') - generate_parsed_extras(ParsedDependencyTypes.APACHE_NO_PROVIDER_EXTRAS) - result_content.append(" # Include all provider deps") - for dependency, dependency_info in dependencies.items(): - result_content.append(f' "apache-airflow[{normalize_extra(dependency)}]",') - result_content.append("]") - - -def get_dependency_type(dependency_type: str) -> ParsedDependencyTypes | None: - for dep_type in ParsedDependencyTypes: - if dep_type.value == dependency_type: - return dep_type - return None - - -def update_pyproject_toml(dependencies: dict[str, dict[str, list[str] | str]]) -> bool: - file_content = PYPROJECT_TOML_FILE_PATH.read_text() - result_content: list[str] = [] - copying = True - current_type: str | None = None - line_count: int = 0 - for line in file_content.splitlines(): - if copying: - result_content.append(line) - if line.strip().startswith(GENERATED_DEPENDENCIES_START): - copying = False - generate_dependencies(result_content, dependencies) - elif line.strip().startswith(GENERATED_DEPENDENCIES_END): - copying = True - result_content.append(line) - elif line.strip().startswith("# START OF "): - current_type = line.strip().replace("# START OF ", "") - type_enum = get_dependency_type(current_type) - if type_enum is None: - console.print( - f"[red]Wrong start of section '{current_type}' in {PYPROJECT_TOML_FILE_PATH} " - f"at line {line_count}: Unknown section type" - ) - sys.exit(1) - elif line.strip().startswith("# END OF "): - end_type = line.strip().replace("# END OF ", "") - if end_type != current_type: - console.print( - f"[red]Wrong end of section {end_type} in {PYPROJECT_TOML_FILE_PATH} at line {line_count}" - ) - sys.exit(1) - if current_type: - if line.strip().endswith(" = ["): - FOUND_EXTRAS[current_type].append(line.split(" = [")[0].strip()) - line_count += 1 - result_content.append("") - new_file_content = "\n".join(result_content) - if file_content != new_file_content: - PYPROJECT_TOML_FILE_PATH.write_text(new_file_content) - return True - return False - - if __name__ == "__main__": find_all_providers_and_provider_files() num_files = len(ALL_PROVIDER_FILES) @@ -395,19 +246,4 @@ def update_pyproject_toml(dependencies: dict[str, dict[str, list[str] | str]]) - ) console.print(f"Written {DEPENDENCIES_JSON_FILE_PATH}") console.print() - if update_pyproject_toml(unique_sorted_dependencies): - if os.environ.get("CI"): - console.print(f"There is a need to regenerate {PYPROJECT_TOML_FILE_PATH}") - console.print( - f"[red]You need to run the following command locally and commit generated " - f"{PYPROJECT_TOML_FILE_PATH.relative_to(AIRFLOW_SOURCES_ROOT)} file:\n" - ) - console.print("breeze static-checks --type update-providers-dependencies --all-files") - console.print() - console.print() - console.print("[yellow]Make sure to rebase your changes on the latest main branch!") - console.print() - sys.exit(1) - else: - console.print(f"Written {PYPROJECT_TOML_FILE_PATH}") console.print() diff --git a/scripts/docker/install_airflow_dependencies_from_branch_tip.sh b/scripts/docker/install_airflow_dependencies_from_branch_tip.sh index 8158ab5886aa..41389ccda667 100644 --- a/scripts/docker/install_airflow_dependencies_from_branch_tip.sh +++ b/scripts/docker/install_airflow_dependencies_from_branch_tip.sh @@ -45,13 +45,17 @@ function install_airflow_dependencies_from_branch_tip() { if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,} fi + local TEMP_AIRFLOW_DIR + TEMP_AIRFLOW_DIR=$(mktemp -d) # Install latest set of dependencies - without constraints. This is to download a "base" set of # dependencies that we can cache and reuse when installing airflow using constraints and latest # pyproject.toml in the next step (when we install regular airflow). set -x - ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - "apache-airflow[${AIRFLOW_EXTRAS}] @ https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz" + curl -fsSL "https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz" | \ + tar xvz -C "${TEMP_AIRFLOW_DIR}" --strip 1 + # Make sure editable dependencies are calculated when devel-ci dependencies are installed + ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${ADDITIONAL_PIP_INSTALL_FLAGS} \ + --editable "${TEMP_AIRFLOW_DIR}[${AIRFLOW_EXTRAS}]" set +x common::install_packaging_tools set -x @@ -67,6 +71,7 @@ function install_airflow_dependencies_from_branch_tip() { set +x ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} apache-airflow set -x + rm -rvf "${TEMP_AIRFLOW_DIR}" # If you want to make sure dependency is removed from cache in your PR when you removed it from # pyproject.toml - please add your dependency here as a list of strings # for example: