From 52dd681da1d915e2a6a4792796723f4dcd17c67a Mon Sep 17 00:00:00 2001 From: Gert van Dijk Date: Sun, 5 Feb 2023 16:09:30 +0100 Subject: [PATCH] Initial public release --- .dockerignore | 14 + .envrc | 5 + .flake8 | 17 + .gitignore | 12 + .vscode/.gitignore | 5 + .vscode/extensions.json | 12 + .vscode/extensions.json.license | 3 + .vscode/launch.json | 23 + .vscode/launch.json.license | 3 + .vscode/settings.json | 56 ++ .vscode/settings.json.license | 3 + .vscode/tasks.json | 123 +++ .vscode/tasks.json.license | 3 + CONTRIBUTING.md | 176 ++++ Dockerfile | 97 ++ LICENSES/Apache-2.0.txt | 73 ++ LICENSES/CC0-1.0.txt | 121 +++ Makefile | 42 + README.md | 263 ++++++ docs/api.md | 162 ++++ docs/design.md | 125 +++ docs/milter-protocol.md | 432 +++++++++ docs/releasing.md | 38 + docs/smtp-recap.md | 36 + postfixtest/Dockerfile | 21 + postfixtest/Makefile | 46 + postfixtest/files/apt-install-clean.sh | 20 + postfixtest/files/entrypoint.sh | 48 + pyproject.toml | 107 +++ run-all-linters | 92 ++ src/purepythonmilter/__init__.py | 94 ++ src/purepythonmilter/_version.py | 9 + src/purepythonmilter/api/__init__.py | 0 src/purepythonmilter/api/application.py | 355 ++++++++ src/purepythonmilter/api/interfaces.py | 200 ++++ src/purepythonmilter/api/logger.py | 70 ++ src/purepythonmilter/api/models.py | 339 +++++++ src/purepythonmilter/examples/README.md | 126 +++ src/purepythonmilter/examples/__init__.py | 0 .../examples/append_header_ip/__init__.py | 0 .../examples/append_header_ip/__main__.py | 90 ++ .../examples/change_body/__init__.py | 0 .../examples/change_body/__main__.py | 66 ++ .../examples/debug_log_all/__init__.py | 0 .../examples/debug_log_all/__main__.py | 120 +++ src/purepythonmilter/protocol/__init__.py | 0 src/purepythonmilter/protocol/commands.py | 520 +++++++++++ src/purepythonmilter/protocol/definitions.py | 107 +++ src/purepythonmilter/protocol/exceptions.py | 21 + src/purepythonmilter/protocol/packet.py | 73 ++ src/purepythonmilter/protocol/payload.py | 43 + src/purepythonmilter/protocol/responses.py | 468 ++++++++++ src/purepythonmilter/py.typed | 0 src/purepythonmilter/server/__init__.py | 0 .../server/connectionhandler.py | 204 +++++ src/purepythonmilter/server/milterserver.py | 133 +++ src/purepythonmilter/server/session.py | 308 +++++++ tests/__init__.py | 0 tests/api/__init__.py | 0 tests/api/test_application.py | 67 ++ tests/api/test_logger.py | 65 ++ tests/api/test_models.py | 126 +++ tests/conftest.py | 259 ++++++ tests/integration/__init__.py | 0 tests/integration/conftest.py | 140 +++ tests/integration/examples/__init__.py | 0 .../examples/test_append_header_ip.py | 62 ++ .../integration/examples/test_change_body.py | 66 ++ .../examples/test_debug_log_all.py | 59 ++ tests/integration/test_server.py | 141 +++ tests/protocol/__init__.py | 0 tests/protocol/test_commands.py | 852 ++++++++++++++++++ tests/protocol/test_packet.py | 114 +++ tests/protocol/test_payload.py | 54 ++ tests/protocol/test_responses.py | 422 +++++++++ tests/server/__init__.py | 0 tests/server/test_session.py | 326 +++++++ 77 files changed, 8277 insertions(+) create mode 100644 .dockerignore create mode 100644 .envrc create mode 100644 .flake8 create mode 100644 .gitignore create mode 100644 .vscode/.gitignore create mode 100644 .vscode/extensions.json create mode 100644 .vscode/extensions.json.license create mode 100644 .vscode/launch.json create mode 100644 .vscode/launch.json.license create mode 100644 .vscode/settings.json create mode 100644 .vscode/settings.json.license create mode 100644 .vscode/tasks.json create mode 100644 .vscode/tasks.json.license create mode 100644 CONTRIBUTING.md create mode 100644 Dockerfile create mode 100644 LICENSES/Apache-2.0.txt create mode 100644 LICENSES/CC0-1.0.txt create mode 100644 Makefile create mode 100644 README.md create mode 100644 docs/api.md create mode 100644 docs/design.md create mode 100644 docs/milter-protocol.md create mode 100644 docs/releasing.md create mode 100644 docs/smtp-recap.md create mode 100644 postfixtest/Dockerfile create mode 100644 postfixtest/Makefile create mode 100755 postfixtest/files/apt-install-clean.sh create mode 100755 postfixtest/files/entrypoint.sh create mode 100644 pyproject.toml create mode 100755 run-all-linters create mode 100644 src/purepythonmilter/__init__.py create mode 100644 src/purepythonmilter/_version.py create mode 100644 src/purepythonmilter/api/__init__.py create mode 100644 src/purepythonmilter/api/application.py create mode 100644 src/purepythonmilter/api/interfaces.py create mode 100644 src/purepythonmilter/api/logger.py create mode 100644 src/purepythonmilter/api/models.py create mode 100644 src/purepythonmilter/examples/README.md create mode 100644 src/purepythonmilter/examples/__init__.py create mode 100644 src/purepythonmilter/examples/append_header_ip/__init__.py create mode 100644 src/purepythonmilter/examples/append_header_ip/__main__.py create mode 100644 src/purepythonmilter/examples/change_body/__init__.py create mode 100644 src/purepythonmilter/examples/change_body/__main__.py create mode 100644 src/purepythonmilter/examples/debug_log_all/__init__.py create mode 100644 src/purepythonmilter/examples/debug_log_all/__main__.py create mode 100644 src/purepythonmilter/protocol/__init__.py create mode 100644 src/purepythonmilter/protocol/commands.py create mode 100644 src/purepythonmilter/protocol/definitions.py create mode 100644 src/purepythonmilter/protocol/exceptions.py create mode 100644 src/purepythonmilter/protocol/packet.py create mode 100644 src/purepythonmilter/protocol/payload.py create mode 100644 src/purepythonmilter/protocol/responses.py create mode 100644 src/purepythonmilter/py.typed create mode 100644 src/purepythonmilter/server/__init__.py create mode 100644 src/purepythonmilter/server/connectionhandler.py create mode 100644 src/purepythonmilter/server/milterserver.py create mode 100644 src/purepythonmilter/server/session.py create mode 100644 tests/__init__.py create mode 100644 tests/api/__init__.py create mode 100644 tests/api/test_application.py create mode 100644 tests/api/test_logger.py create mode 100644 tests/api/test_models.py create mode 100644 tests/conftest.py create mode 100644 tests/integration/__init__.py create mode 100644 tests/integration/conftest.py create mode 100644 tests/integration/examples/__init__.py create mode 100644 tests/integration/examples/test_append_header_ip.py create mode 100644 tests/integration/examples/test_change_body.py create mode 100644 tests/integration/examples/test_debug_log_all.py create mode 100644 tests/integration/test_server.py create mode 100644 tests/protocol/__init__.py create mode 100644 tests/protocol/test_commands.py create mode 100644 tests/protocol/test_packet.py create mode 100644 tests/protocol/test_payload.py create mode 100644 tests/protocol/test_responses.py create mode 100644 tests/server/__init__.py create mode 100644 tests/server/test_session.py diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..7516f6b --- /dev/null +++ b/.dockerignore @@ -0,0 +1,14 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: CC0-1.0 + +/.coverage +/.direnv +/.git +/.mypy_cache +/.pytest_cache +/.pytest-cov +/.reuse +/.ruff_cache +/.vscode +/dist diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..9322a97 --- /dev/null +++ b/.envrc @@ -0,0 +1,5 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: CC0-1.0 + +layout python python3.10 diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..afef3a7 --- /dev/null +++ b/.flake8 @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: CC0-1.0 + +# Flake8 does not support reading from pyproject.toml files. +# https://github.com/PyCQA/flake8/issues/234 + +[flake8] +# Match black's default +max-line-length = 88 +extend-exclude = + *.egg-info/, + ./.mypy_cache, + ./.pytest_cache, + ./build, + ./dist, + ./.direnv diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bfa7931 --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: CC0-1.0 + +*.egg-info/ +__pycache__/ +/.pytest_cache +/build +/dist +/.direnv +/.coverage +/.pytest-cov diff --git a/.vscode/.gitignore b/.vscode/.gitignore new file mode 100644 index 0000000..81abf60 --- /dev/null +++ b/.vscode/.gitignore @@ -0,0 +1,5 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: CC0-1.0 + +/*.log diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 0000000..34f85cc --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,12 @@ +{ + "recommendations": [ + "cameron.vscode-pytest", + "charliermarsh.ruff", + "exiasr.hadolint", + "ms-python.isort", + "ms-python.python", + "ms-python.vscode-pylance", + "ryanluker.vscode-coverage-gutters", + "timonwong.shellcheck", + ] +} diff --git a/.vscode/extensions.json.license b/.vscode/extensions.json.license new file mode 100644 index 0000000..5a22ef0 --- /dev/null +++ b/.vscode/extensions.json.license @@ -0,0 +1,3 @@ +SPDX-FileCopyrightText: 2023 Gert van Dijk + +SPDX-License-Identifier: CC0-1.0 diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..94de361 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,23 @@ +{ + "version": "0.2.0", + "configurations": [ + { + // Overriding default Python debug test launcher command to disable coverage + // (via env var below), because coverage reporting interferes with the + // debugger. + // https://code.visualstudio.com/docs/python/testing#_pytest-configuration-settings + "name": "Python: Debug Tests", + "type": "python", + "request": "launch", + "program": "${file}", + "env": { + "PYTEST_ADDOPTS": "--no-cov", + }, + "purpose": [ + "debug-test", + ], + "console": "integratedTerminal", + "justMyCode": false, + } + ] +} diff --git a/.vscode/launch.json.license b/.vscode/launch.json.license new file mode 100644 index 0000000..5a22ef0 --- /dev/null +++ b/.vscode/launch.json.license @@ -0,0 +1,3 @@ +SPDX-FileCopyrightText: 2023 Gert van Dijk + +SPDX-License-Identifier: CC0-1.0 diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..3e75a86 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,56 @@ +{ + "files.associations": { + ".dockerignore": "ignore", // auto-interprets wrongly as Python somehow + }, + "files.exclude": { + ".coverage": true, + ".direnv/": true, + ".pytest_cache/": true, + ".pytest-cov/": true, + ".ruff_cache/": true, + "**/__pycache__/": true, + "**/.mypy_cache/": true, + "**/*.egg-info/": true, + "dist/": true, + }, + "files.insertFinalNewline": true, + "python.linting.mypyEnabled": true, + "python.linting.enabled": true, + "python.analysis.diagnosticMode": "workspace", + "python.analysis.indexing": true, + "python.analysis.typeCheckingMode": "strict", + "python.formatting.provider": "black", + "editor.rulers": [ + 88, // black's default + ], + // https://github.com/microsoft/vscode-isort#import-sorting-on-save + "[python]": { + "editor.codeActionsOnSave": { + "source.organizeImports": true, + } + }, + // for the 'ms-python.isort' extension. + // keep in sync with /run-all-linters script. + "isort.args": [ + "--profile", + "black", + ], + // Enable to run black+isort on every (manual) save, perhaps in user-level settings. + // "editor.formatOnSave": true, + "python.testing.pytestEnabled": true, + // For Pytest IntelliSense + // https://marketplace.visualstudio.com/items?itemName=Cameron.vscode-pytest + "pytest.command": "\"${command:python.interpreterPath}\" -m pytest", + "coverage-gutters.coverageFileNames": [ + "coverage.xml", + ], + "coverage-gutters.coverageBaseDir": ".pytest-cov", + "coverage-gutters.coverageReportFileName": "html/index.html", + "shellcheck.executablePath": "shellcheck", + "shellcheck.useWorkspaceRootAsCwd": true, + // for the 'timonwong.shellcheck' extension. + // keep in sync with /run-all-linters script. + "shellcheck.customArgs": [ + "--norc", + ], +} diff --git a/.vscode/settings.json.license b/.vscode/settings.json.license new file mode 100644 index 0000000..5a22ef0 --- /dev/null +++ b/.vscode/settings.json.license @@ -0,0 +1,3 @@ +SPDX-FileCopyrightText: 2023 Gert van Dijk + +SPDX-License-Identifier: CC0-1.0 diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..a6a1f7d --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,123 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "run-all-linters-whole-project", + "type": "shell", + "command": "PYTHON_INTERPRETER=\"${command:python.interpreterPath}\" ./run-all-linters", + "icon": { + "id": "checklist", + "color": "terminal.ansiGreen" + }, + "presentation": { + "echo": true, + "reveal": "silent", + "focus": false, + "panel": "shared", + "showReuseMessage": false, + "clear": true, + "revealProblems": "onProblem", + }, + "problemMatcher": [ + { + "owner": "run-all-linters-whole-project", + "source": "mypy (via run-all-linters)", + "fileLocation": [ + "relative", + "${workspaceFolder}", + ], + "pattern": { + // src/purepythonmilter/myfile.py:32:37: error: Name "__qualname__" is not defined [name-defined] + // includes match for ''.py' in filename to avoid matching on shellcheck's + // output in gcc-format. + "regexp": "^(.+\\.py.?):(\\d+):(\\d+): (\\w*): (.+)( \\[(.*)\\])?$", + "file": 1, + "line": 2, + "column": 3, + "code": 7, + "severity": 4, + "message": 5, + }, + }, + { + "owner": "run-all-linters-whole-project", + "source": "flake8 (via run-all-linters)", + "fileLocation": [ + "relative", + "${workspaceFolder}", + ], + "pattern": { + // ./src/purepythonmilter/myfile.py:5:1: F401 'typing.AsyncContextManager' imported but unused + "regexp": "^(.+):(\\d+):(\\d+): ((\\w)\\d+) (.+)$", + "file": 1, + "line": 2, + "column": 3, + "code": 4, + "severity": 5, + "message": 6, + }, + }, + { + "owner": "run-all-linters-whole-project", + "source": "isort (via run-all-linters)", + "fileLocation": [ + "absolute", + ], + "pattern": { + // ERROR: /abs/path/to/purepythonmilter/src/myfile.py Imports are incorrectly sorted and/or formatted. + "regexp": "^([A-Z]+): (.+\\.py) (.+)$", + "severity": 1, + "file": 2, + "message": 3, + }, + }, + // The REUSE tool does not provide a machine-parsable output, because both the + // summary and the listing of files with issues are presented in the same way. + { + "owner": "run-all-linters-whole-project", + "source": "reuse-lint (via run-all-linters)", + "pattern": { + // Unfortunately, your project is not compliant with version 3.0 of the REUSE Specification :-( + "regexp": "^(Unfortunately, your project is not compliant .*)", + "message": 1, + }, + }, + { + "owner": "run-all-linters-whole-project", + "source": "hadolint (via run-all-linters)", + "fileLocation": [ + "relative", + "${workspaceFolder}", + ], + "pattern": { + // Dockerfile:11 DL3006 warning: Always tag the version of an image explicitly + "regexp": "^(.+):(\\d+) ([A-Z0-9]+) ([a-z]+): (.+)$", + "file": 1, + "line": 2, + "code": 3, + "severity": 4, + "message": 5, + }, + }, + { + "owner": "run-all-linters-whole-project", + "source": "shellcheck (via run-all-linters)", + "fileLocation": [ + "relative", + "${workspaceFolder}", + ], + "pattern": { + // filename:25:26: error: Double quote array expansions to avoid re-splitting elements. [SC2068] + "regexp": "^(.+):(\\d+):(\\d+): ([a-z]+): (.+) \\[(.*)\\]$", + "file": 1, + "line": 2, + "column": 3, + "code": 6, + "severity": 4, + "message": 5, + }, + }, + ], + }, + ], +} diff --git a/.vscode/tasks.json.license b/.vscode/tasks.json.license new file mode 100644 index 0000000..a29b2e4 --- /dev/null +++ b/.vscode/tasks.json.license @@ -0,0 +1,3 @@ +SPDX-FileCopyrightText: 2023 Gert van Dijk + +SPDX-License-Identifier: Apache-2.0 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..5ef4356 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,176 @@ + + +# Contributing + +Your contributions, in any form, are very much welcome! 🙏 + +When contributing to this repository with a proposed change to the code, please first +discuss the change you wish to make in an [issue][github-new-issue] (for bugs or +features), create a [discussion][github-new-discussion] post in case anything is +unclear, write me an email (`github@gertvandijk.nl`) or any other method with +the owners of this repository before making a change. + +## Getting started with development + +1. Get a copy of the repository and change your current directory in the project root. + +1. Create a clean Python 3.10.x virtual environment and activate it. + Suggested way is to install [direnv][direnv-home] together with [Pyenv][pyenv-github] + and use the project-supplied `.envrc` (`direnv allow`). + +1. Make sure the base Python packages such as `pip`, `setuptools` and `setuptools-scm` + are up-to-date *inside this virtualenv*. + + ```console + $ pip install --upgrade pip setuptools setuptools-scm[toml] + ``` + +1. This should list zero outdated packages at this point: + + ```console + $ pip list --outdated + ``` + +1. Install the project with development and examples dependencies with this virtualenv + active. + E.g.: + + ```console + $ pip install -e .[development,examples] + ``` + +1. Verify that all tests pass by running `pytest`. + + ```console + $ pytest + [...] + ==== 212 passed in 1.09s ==== + ``` + +1. Verify that you can run the `run-all-linters` script. + + This requires [hadolint][hadolint-github] and [shellcheck][shellcheck-home] to be + installed on the system. + + ```console + $ ./run-all-linters + [...] + Everything looks OK! 🎉 + ``` + +1. Now you're ready to make your changes! + +### Suggested IDE: VS Code + +The repository ships with helpers for use with +[Microsoft Visual Studio Code][ms-vscode-home]. +It suggests extensions, performs strict mypy type checking on the fly, visual indicators +for code style (e.g. 88-chars ruler), provides a task with 'problemMatcher' to run the +`run-all-linters` script and more. + +In order for them to work correctly, please +[select the Python interpreter][ms-vscode-select-python] of the virtualenv you created, +e.g. `.direnv/python-3.10.9/bin/python`. +The linters and type checker will then be run inside this environment created with +specific versions specified rather than relying on whatever is available system-wide. + +ℹ️ If you like, enable automatic on-save formatting with project-provided settings using +the user-level setting `editor.formatOnSave`. +It will run `black` and `isort` for you whenever hitting Save on a file. + +## Pull Request Process + +ℹ️ The aim for this workflow is to end up with a clean git history which should be +helpful for anyone else in the future (e.g. using git-blame, git-log). + +1. Fork the repository to your own GitHub account. +1. Push the change(s) to your local fork, preferably to brach with a self-descriptive + name. + Please use a clean git commit history with descriptive commit messages and refer to + relevant issues or discussions. + In case your work was done in multiple iterations, use amending and/or an interactive + rebase to craft a set of contained commits. +1. Ensure that your fork's branch is based off with latest upstream `develop` branch. + If not, fetch latest changes and *rebase* it. +1. Run the `run-all-linters` script to ensure all code adheres to the code style, strict + typing requirements and licensing headers. +1. Run `pytest` to ensure your code changes do not break current tests (adjust if + necessary) and your newly introduced lines are all covered by new/adjusted tests + (compare coverage output). +1. All ready?! + Create a pull request targeting the `develop` branch. + Write a title that consicely describes the main aim of the changes in the request. + Consider to tick the *"Allow edits by maintainers"* checkbox (see below). +1. Please allow the maintainer to take the time to review and test the code. + In case code changes are requested, please amend the commit(s) affected and update + the commit message(s) if necessary. + +ℹ️ If you're uncomfortable to rebase/amend or unsure about commit message wording or even +adjusting test cases, please indicate that the maintainer is allowed to edit your pull +request when creating the pull request. +Then in the pull request description kindly request the maintainer to apply the work on +that and consider to mark the pull request as [draft][github-draft-pr-howto]. + +Notes: + +- Ideally, every single commit should be reversible and have a single responsibility. + Preparatory work leading up to an actual change should happen in separate commit(s) to + aid reviewing and having a useful git history. + Example of a well-crafted set of commits: + + `HEAD Implement feature X`
+ — the aim of the pull request + + `HEAD^ Refactor module Y to allow for subclassing ClassZ`
+ — improvement, but preparatory change + + `HEAD^^ Add tests for current logic in module Y`
+ — not a functional change in itself, but purely preparatory to assert a before-change + state is tested for. + +- Please adhere to the following style in commit messages: + + - Use present tense. + - Avoid captain obvious-only commit messages like *"Delete file x"* or + *"Update file y"*, because, well, anyone can see precisely that when looking at the + diff. + - Add the reason for the change (if not obvious). + To have a *why* later looking at the changes is very useful, e.g. when creating + release notes or even at review time understanding for the need to include the + change. + +- Please avoid merge commits in your pull request; use rebase instead. + Merge commits are harder to revert and to cherry-pick. +- Pull requests should apply cleanly on the latest upstream `develop` branch. + Preferably, your branch should be 'fast-forwardable' in git-speak. +- The maintainer is free to cherry-pick, amend and push your work in a pull request's + commits directly to any branch, effectively bypassing GitHub's pull request 'merge' + button. + Attributions will be preserved by either the commit's author field or a + `Co-authored-by` footer in the commits. + + This also enables to move forward with dependent commits in a pull request still + pending discussion on the adoption of that actual feature or bug fix approach. + E.g. the two commits at the bottom (`Update code style ...`, `Refactor module Y ...`) + could be merged for everyone to profit from already and reducing the size of the pull + request pending review as well. + +- At the expense of the clean git history policy GPG/SSH signatures on commits by + contributors could be lost as a result of the amendments by non-authors. + If you wish to maintain your digitally verifiable signature, please take the time to + submit your pull request in a state it can be *fast-forward*ed and rebase whenever + the target branch is updated (which may be frequent). + +[github-new-issue]: https://github.com/gertvdijk/purepythonmilter/issues/new/choose +[github-new-discussion]: https://github.com/gertvdijk/purepythonmilter/discussions/new +[direnv-home]: https://direnv.net/ +[pyenv-github]: https://github.com/pyenv/pyenv +[hadolint-github]: https://github.com/hadolint/hadolint +[shellcheck-home]: https://www.shellcheck.net/ +[ms-vscode-home]: https://code.visualstudio.com/ +[ms-vscode-select-python]: https://code.visualstudio.com/docs/python/environments#_work-with-python-interpreters +[github-draft-pr-howto]: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/about-pull-requests#draft-pull-requests diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..a2386e7 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,97 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 + +# syntax=docker/dockerfile:1.3 + +ARG FROM_IMAGE + +### Base stage ### +# https://github.com/hadolint/hadolint/issues/339 +# hadolint ignore=DL3006 +FROM $FROM_IMAGE as base + +# https://github.com/hadolint/hadolint/issues/562 +# hadolint ignore=DL3005 +RUN apt-get update --quiet \ + && apt-get dist-upgrade --quiet --yes \ + && apt-get autoremove --quiet --yes \ + && rm -rf /var/lib/apt/lists + +# Keep in sync with stage below. +RUN adduser \ + --system \ + --group \ + --uid 500 \ + --disabled-login \ + --disabled-password \ + --gecos "purepythonmilter,,," \ + --home /purepythonmilter \ + purepythonmilter + +USER purepythonmilter:purepythonmilter +WORKDIR /purepythonmilter + +# Silence warning from pip that local bin directory is not on PATH. +# Keep in sync with stage below. +RUN mkdir -p "${HOME}/.local/bin" +ENV PATH="/purepythonmilter/.local/bin:${PATH}" + +### Build stage 1/2: dependencies ### +FROM base as builder-deps + +USER root:root +# Install a specified version of pip & setuptools globally. +# Not in the user's site-packages, because we don't need it in there as dependency. +# Also, mount a Buildkit-cachable ~/.cache directory to speed up pip-installs. +# And therefore purposefully ignore DL3042. +# hadolint ignore=DL3042 +RUN --mount=type=cache,target=/root/.cache \ + python -m pip install pip==23.0 setuptools==67.1.0 setuptools-scm[toml]==7.1.0 +USER purepythonmilter:purepythonmilter + +# Install dependencies (for 'examples' optional set) with pinned version manually. +# Mount a Buildkit-cachable ~/.cache directory to speed up pip-installs. +# And therefore purposefully ignore DL3042. +# hadolint ignore=DL3042 +RUN --mount=type=cache,target=/purepythonmilter/.cache \ + python -m pip install --user \ + attrs==22.2.0 \ + click==8.1.3 + +### Build stage 2/2: the package itself ### +FROM builder-deps as builder + +# Copy to a temp location, because pip with setuptools backends +# needs a writable source directory. +# https://pip.pypa.io/en/stable/cli/pip_install/#local-project-installs +RUN --mount=type=bind,source=/,target=/purepythonmilter/reporoot \ + cp -r /purepythonmilter/reporoot /tmp/reporootcopy +# By passing SETUPTOOLS_SCM_PRETEND_VERSION we eliminate the need for git here. +ARG SETUPTOOLS_SCM_PRETEND_VERSION +RUN python -m pip --no-cache-dir install --user '/tmp/reporootcopy[examples]' + +# Verify that all packages are up-to-date (`pip list --outdated` should give no output), +# and do not cache (always run, except within the same minute). +ARG CACHEBUST_MINUTE +# Unfortunately, pip returns exit status 0 regardless of status. +# hadolint ignore=SC2028 +RUN outdated=$(python -m pip list --no-cache-dir --outdated 2>&1) \ + && [ -z "$outdated" ] \ + || (echo "'pip list --outdated' @ ${CACHEBUST_MINUTE}:\n${outdated}"; exit 1) + +### Final stage ### +FROM base + +# Dependencies only (separate as stable layer). +COPY --from=builder-deps /purepythonmilter/.local .local +# purepythonmilter itself. +COPY --from=builder /purepythonmilter/.local .local +ENV PATH="/purepythonmilter/.local/bin:${PATH}" + +ENV PUREPYTHONMILTER_BIND_HOST=0.0.0.0 + +# If you want to run a specific example by default, specify like this: +# CMD ["python", "-m", "purepythonmilter.examples.debug_log_all", "--log-level=DEBUG"] +# +# Or else, specify the command at run time. diff --git a/LICENSES/Apache-2.0.txt b/LICENSES/Apache-2.0.txt new file mode 100644 index 0000000..137069b --- /dev/null +++ b/LICENSES/Apache-2.0.txt @@ -0,0 +1,73 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. + +"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: + + (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. + + You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + +To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/LICENSES/CC0-1.0.txt b/LICENSES/CC0-1.0.txt new file mode 100644 index 0000000..0e259d4 --- /dev/null +++ b/LICENSES/CC0-1.0.txt @@ -0,0 +1,121 @@ +Creative Commons Legal Code + +CC0 1.0 Universal + + CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE + LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN + ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS + INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES + REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS + PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM + THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED + HEREUNDER. + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator +and subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for +the purpose of contributing to a commons of creative, cultural and +scientific works ("Commons") that the public can reliably and without fear +of later claims of infringement build upon, modify, incorporate in other +works, reuse and redistribute as freely as possible in any form whatsoever +and for any purposes, including without limitation commercial purposes. +These owners may contribute to the Commons to promote the ideal of a free +culture and the further production of creative, cultural and scientific +works, or to gain reputation or greater distribution for their Work in +part through the use and efforts of others. + +For these and/or other purposes and motivations, and without any +expectation of additional consideration or compensation, the person +associating CC0 with a Work (the "Affirmer"), to the extent that he or she +is an owner of Copyright and Related Rights in the Work, voluntarily +elects to apply CC0 to the Work and publicly distribute the Work under its +terms, with knowledge of his or her Copyright and Related Rights in the +Work and the meaning and intended legal effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not +limited to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, + communicate, and translate a Work; + ii. moral rights retained by the original author(s) and/or performer(s); +iii. publicity and privacy rights pertaining to a person's image or + likeness depicted in a Work; + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + v. rights protecting the extraction, dissemination, use and reuse of data + in a Work; + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation + thereof, including any amended or successor version of such + directive); and +vii. other similar, equivalent or corresponding rights throughout the + world based on applicable law or treaty, and any national + implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention +of, applicable law, Affirmer hereby overtly, fully, permanently, +irrevocably and unconditionally waives, abandons, and surrenders all of +Affirmer's Copyright and Related Rights and associated claims and causes +of action, whether now known or unknown (including existing as well as +future claims and causes of action), in the Work (i) in all territories +worldwide, (ii) for the maximum duration provided by applicable law or +treaty (including future time extensions), (iii) in any current or future +medium and for any number of copies, and (iv) for any purpose whatsoever, +including without limitation commercial, advertising or promotional +purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each +member of the public at large and to the detriment of Affirmer's heirs and +successors, fully intending that such Waiver shall not be subject to +revocation, rescission, cancellation, termination, or any other legal or +equitable action to disrupt the quiet enjoyment of the Work by the public +as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason +be judged legally invalid or ineffective under applicable law, then the +Waiver shall be preserved to the maximum extent permitted taking into +account Affirmer's express Statement of Purpose. In addition, to the +extent the Waiver is so judged Affirmer hereby grants to each affected +person a royalty-free, non transferable, non sublicensable, non exclusive, +irrevocable and unconditional license to exercise Affirmer's Copyright and +Related Rights in the Work (i) in all territories worldwide, (ii) for the +maximum duration provided by applicable law or treaty (including future +time extensions), (iii) in any current or future medium and for any number +of copies, and (iv) for any purpose whatsoever, including without +limitation commercial, advertising or promotional purposes (the +"License"). The License shall be deemed effective as of the date CC0 was +applied by Affirmer to the Work. Should any part of the License for any +reason be judged legally invalid or ineffective under applicable law, such +partial invalidity or ineffectiveness shall not invalidate the remainder +of the License, and in such case Affirmer hereby affirms that he or she +will not (i) exercise any of his or her remaining Copyright and Related +Rights in the Work or (ii) assert any associated claims and causes of +action with respect to the Work, in either case contrary to Affirmer's +express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + b. Affirmer offers the Work as-is and makes no representations or + warranties of any kind concerning the Work, express, implied, + statutory or otherwise, including without limitation warranties of + title, merchantability, fitness for a particular purpose, non + infringement, or the absence of latent or other defects, accuracy, or + the present or absence of errors, whether or not discoverable, all to + the greatest extent permissible under applicable law. + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without + limitation any person's Copyright and Related Rights in the Work. + Further, Affirmer disclaims responsibility for obtaining any necessary + consents, permissions or other rights required for any use of the + Work. + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to + this CC0 or use of the Work. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..0ded1e1 --- /dev/null +++ b/Makefile @@ -0,0 +1,42 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: CC0-1.0 + +SHELL=/bin/bash -o pipefail + +DOCKER_BUILD_OPTS ?= +PYTHON_PKG_VERSION ?= $(shell python -m setuptools_scm) +# Docker tags with '+' are not supported. +IMAGE_TAG ?= $(shell echo "$(PYTHON_PKG_VERSION)" | tr + _) +IMAGE_NAME := purepythonmilter +# python:3.10.9-slim-bullseye linux/amd64 @ 2023-01-24 +# https://hub.docker.com/_/python/tags?page=1&name=3.10-slim-bullseye +FROM_IMAGE := python@sha256:4ee11faf62dd64d0975cf1d0061d37440ae2289a791f659d5f41c0c05fb6b180 + +CACHEBUST_MINUTE := $(shell date '+%Y-%m-%d %H:%M') + +.NOTPARALLEL: + +.PHONY: default +default: build push + +.PHONY: guard-* +guard-%: + @ if [ "$(${*})" = "" ]; then \ + echo "Variable $* not set"; \ + exit 1; \ + fi + +.PHONY: build +build: guard-IMAGE_REGISTRY + DOCKER_BUILDKIT=1 \ + docker build $(DOCKER_BUILD_OPTS) \ + -t "$(IMAGE_REGISTRY)/$(IMAGE_NAME):$(IMAGE_TAG)" \ + --build-arg="FROM_IMAGE=$(FROM_IMAGE)" \ + --build-arg="SETUPTOOLS_SCM_PRETEND_VERSION=$(PYTHON_PKG_VERSION)" \ + --build-arg="CACHEBUST_MINUTE=$(CACHEBUST_MINUTE)" \ + . + +.PHONY: push +push: guard-IMAGE_REGISTRY + docker push "$(IMAGE_REGISTRY)/$(IMAGE_NAME):$(IMAGE_TAG)" diff --git a/README.md b/README.md new file mode 100644 index 0000000..fbbd3d0 --- /dev/null +++ b/README.md @@ -0,0 +1,263 @@ + + +# A modern pure-Python Milter framework + +[![Python 3.10+](https://img.shields.io/badge/Python-3.10%2B-blue)](https://python.org/) +[![Checked with mypy](https://img.shields.io/badge/mypy-strict-blue)](https://mypy.readthedocs.io/en/stable/) +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000)](https://github.com/psf/black) +[![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/charliermarsh/ruff/main/assets/badge/v1.json)](https://github.com/charliermarsh/ruff) +[![Imports: isort](https://img.shields.io/badge/imports-isort-%231674b1?labelColor=ef8336)](https://pycqa.github.io/isort/) +[![Hadolint](https://img.shields.io/badge/hadolint-passing-brightgreen)](https://github.com/hadolint/hadolint) +[![ShellCheck](https://img.shields.io/badge/ShellCheck-passing-brightgreen)](https://www.shellcheck.net/) +[![License: Apache 2.0](https://img.shields.io/badge/License-Apache_2.0-brightgreen)](https://www.apache.org/licenses/LICENSE-2.0) +[![REUSE compliant](https://img.shields.io/badge/reuse-compliant-brightgreen)](https://reuse.software/) + +Mail servers ([MTA][wikipedia-mta]s) like [Postfix][postfix-home] and +[Sendmail][sendmail-org-home] can connect to an external filter process, called a +'Milter', for actions to take during an incoming SMTP transaction. +You may consider it like a plugin on the mail server software using callbacks over a TCP +or UNIX socket. + +A Milter can have any custom condition to reject/tempfail/discard a message, manipulate +headers and/or body and more. +This can be useful if you require custom validations or manupulative actions before mail +is accepted and that is unavailable in ~~your MTA's~~ Postfix's built-in features. +The use of a Milter would typically be the right choice when it comes to complex +decision making on accepting mail 'before queue' with conditions on headers or the +message body. + +*Purepythonmilter* aims to be a modern, Postfix-first, high-quality, strictly typed +framework and library. +And then all of that with an easy to use API and a high-performance asynchronous +embedded server. + +## Getting started 🚀 + +Install Purepythonmilter, e.g. using `pip`: + +```console +$ pip install purepythonmilter +``` + +Self-descriptive example Milter app: + +```python +import purepythonmilter as ppm + + +async def on_mail_from(cmd: ppm.MailFrom) -> ppm.VerdictOrContinue: + if cmd.address.lower().endswith("@example.com"): + return ppm.RejectWithCode(primary_code=(5, 7, 1), text="not allowed here!") + return ppm.Continue() + + +mymilter = ppm.PurePythonMilter(name="mymilter", hook_on_mail_from=on_mail_from) +mymilter.run_server(host="127.0.0.1", port=9000) +``` + +### Configuration with Postfix + +1. Start your Milter application or run one of the examples directly — see + [`examples/`][examples-readme]. +2. Start a Postfix instance with a configuration like + `smtpd_milters = inet:127.0.0.1:9000` (replace IP address and port number + accordingly). + +### Run an example Milter app with Postfix in Docker + +Described here 👉 [`examples/README.md`][examples-readme]. + +## *Example* use cases for a Milter app 💡 + +- From-header and envelope sender (Return-Path) alignment validation, for compliance + with DMARC ([RFC7489 section 3.1][dmarc-rfc7489-sec31]) or reasons of preventing abuse + (impersonation). + Pevent sending such messages out by rejecting non-compliant messages on submission + time already and incude a descriptive error message to the user. +- Encrypt sensitive connection/account information and attach that in a custom header + value for outbound mail. + In case of abuse, the information can be decrypted by an operator from the raw mails + concerned and eliminates the need to store this data centrally for all mail. +- Cryptographically sign outgoing email or verify signatures of incoming email with some + custom scheme. + *In case you don't like the existing commonly used [OpenDKIM Milter][opendkim-readme] + and want to implement your own DKIM signer/verifier.* + +## What about PyMilter? + +Purepythonmilter was written as an alternative to, and, out of frustration with it. +[PyMilter] is not type annotated (mypy), has signal handling issues (for me), the +dependency on a [hand-crafted Python-C-extension][pymilter-miltermodule-c] linking to +Sendmail's libmilter and no offering of a binary package ([wheel][pep-427]) to ease +installation. 😥 + +*By the way, did you know that Sendmail is — yes even in 2023 — written in K&R C +(predating ANSI-C)?[^sendmail-relnotes-kr-c-deprecation]* 🙈 + +So, yeah, that's the short version of why I started this project. 🤓 + +## Documentation 📖 + +- [`docs/design.md`](./docs/design.md) — design and intents of this project. 🧠 +- [`docs/api.md`](./docs/api.md) — API documentation +- [`docs/milter-protocol.md`](./docs/milter-protocol.md) — raw protocol notes. ✍️ +- [`CONTRIBUTING.md`](./CONTRIBUTING.md) — for development setup and contribution + guidelines + +## Limitations + +- Any functionality requiring intermediary responses (such as 'progress') is not yet + implemented in the API. +- Any functionality that requires carrying state over phases is not yet supported in the + API. (e.g. combining input from two different hooks) +- Mail headers are not 'folded'/'unfolded', but given or written as-is. +- UNIX domain sockets are not supported for the Milter server to listen on (TCP is). + +## Feedback 💬 + +This project is very new and feedback is very much welcome! +Please don't hesitate to [file an issue][github-new-issue], drop an idea or ask a +question in the [discussions][github-new-discussion]. + +[Ideas & Feature Requests][github-ideas-feature-requests] are in there too. 💡 + +Alternatively, just drop me a message at `github@gertvandijk.nl`. 📬 + +## When *not* to use a Milter + +If you want to accomplish something that could be done using a custom dynamic +lookup in Postfix, such as message routing or policy lookups. +Postfix offers quite some built-in dynamic lookup types and a Milter is probably *not* +what you're looking for. +The Milter protocol is relatively complex and its use may not be required for your use +case. + +Be sure to also have a look at implementing your own custom dynamic lookup table in +Postfix using the [socketmap protocol][postfix-socketmap-table] or policy delegation +with the much simpler [policy delegation protocol][postfix-smtpd-policy-protocol]. +Most of the email's and connection's *metadata* is available there too. +For example, the [postfix-mta-sts-resolver] uses the former and the SPF policy daemon +[pypolicyd-spf] uses the latter. +Sometimes the use of a Milter may still be considered; for example, the SPF verification +filter [spf-milter] is implemented using the Milter protocol. + +For content inspection, there's Postfix's [Content filter][postfix-filter-readme], but +beware that it's running 'after queue'. +It takes quite some orchestration to avoid bounces and correctly feed the mail back into +Postfix. + +Another aspect to consider is MTA support. +While the alternatives for Postfix listed above are still Postfix-specific, other more +generic lookup methods also exist. +For example, a dynamic DNS lookup could be much better adopted when migrating to another +MTA than any of the above. + +Example use cases which are *possible* to implement using a Milter, but what could also +be accomplished using alternative — likely simpler — ways: + +- Inject custom headers to add information on which `smtpd` instance the email was + received for routing/classifications later. + This would typically be done using Postfix's policy delegation returning + `PREPEND headername: headertext` as action. +- Validate sender restrictions for a data backend type not supported by the Postfix, + such as interacting with an HTTP REST API / webhooks. + Again, policy delegation may be much simpler, but if conditions involve mail contents, + then you may need a Milter still. +- Custom centralized rate limiting and billing in an email hosting platform with several + account tiers. + And similarly for this one, policy delegation is probably much simpler. +- A read-only Milter that logs in a structured way and perhaps with certain conditions. + This would eliminate parsing Postfix's text log files, well, for incoming connections + at least. + [Freeaqingme/ClueGetter] is such an application using the Milter protocol for a part + of the functionality. + +## Alternatives to Purepythonmilter + +Python alternatives appear to be unmaintained and no longer actively supported for +years. + +- [python-libmilter]: marked as ['no longer supporting'][python-libmilter-readme-note], + as of late 2022. +- [PpyMilter]: Python 2-only (last commit 2015). + +Alternatives in other programming languages without a dependency on Sendmail's libmilter +are: + +- [indymilter]: an asynchronous Milter library written in **Rust**. +- [Sendmail::PMilter][sendmail-pmilter]: a pure-**Perl** implementation (last release + 2011). +- [emersion/go-milter]: a Milter library written in **Go** (in active development). +- [phalaaxx/milter]: another Milter library written in **Go** (last commit 2020). +- [andybalholm/milter]: a simple framework for writing milters written in **Go** (last + commit 2016). +- [nightcode/jmilter]: a Milter library written in **Java**. +- [sendmail-jilter]: another Milter library written in **Java** (last release 2011). +- [milterjs][Atlantis-Software/milterjs]: a Milter library written in **Javascript** + (last release 2018). + +Other relevant projects (not really reusable libraries): +[phalaaxx/ratemilter], [phalaaxx/pf-milters], [mschneider82/milterclient], +[andybalholm/grayland], [Freeaqingme/ClueGetter]. + +## License + +The major part of the project is [Apache 2.0][apache-license-2] licensed. + +Files deemed insignificant in terms of copyright such as configuration files are +licensed under the public domain "no rights reserved" [CC0] license. + +The repositoy is [REUSE][reuse-home] compliant. + + +[PyMilter]: https://pythonhosted.org/pymilter/ +[PpyMilter]: https://github.com/jmehnle/ppymilter +[python-libmilter]: https://github.com/crustymonkey/python-libmilter +[postfix-socketmap-table]: https://www.postfix.org/socketmap_table.5.html +[postfix-smtpd-policy-protocol]: https://www.postfix.org/SMTPD_POLICY_README.html#protocol +[pypolicyd-spf]: https://launchpad.net/pypolicyd-spf +[dmarc-rfc7489-sec31]: https://datatracker.ietf.org/doc/html/rfc7489#section-3.1 +[opendkim-readme]: http://www.opendkim.org/opendkim-README +[sendmail-pmilter]: https://metacpan.org/pod/Sendmail::PMilter +[postfix-mta-sts-resolver]: https://github.com/Snawoot/postfix-mta-sts-resolver +[wikipedia-mta]: https://en.wikipedia.org/wiki/Message_transfer_agent +[postfix-home]: https://www.postfix.org/ +[sendmail-org-home]: https://www.sendmail.org/ +[sendmail-relnotes-kr-c-deprecation]: https://salsa.debian.org/debian/sendmail/-/blob/0ad6934dd77ca9ef1e2a64a9862ceb9b56a7d3f8/RELEASE_NOTES#L48-53 +[examples-readme]: ./src/purepythonmilter/examples/README.md +[postfix-filter-readme]: https://www.postfix.org/FILTER_README.html +[indymilter]: https://gitlab.com/glts/indymilter +[andybalholm/milter]: https://github.com/andybalholm/milter +[andybalholm/grayland]: https://github.com/andybalholm/grayland +[emersion/go-milter]: https://github.com/emersion/go-milter +[phalaaxx/milter]: https://github.com/phalaaxx/milter +[phalaaxx/ratemilter]: https://github.com/phalaaxx/ratemilter +[phalaaxx/pf-milters]: https://github.com/phalaaxx/pf-milters +[mschneider82/milterclient]: https://github.com/mschneider82/milterclient +[Freeaqingme/ClueGetter]: https://github.com/Freeaqingme/ClueGetter +[nightcode/jmilter]: https://github.com/nightcode/jmilter +[sendmail-jilter]: http://sendmail-jilter.sourceforge.net/ +[Atlantis-Software/milterjs]: https://github.com/Atlantis-Software/milterjs +[github-new-issue]: https://github.com/gertvdijk/purepythonmilter/issues/new/choose +[github-new-discussion]: https://github.com/gertvdijk/purepythonmilter/discussions/new +[github-ideas-feature-requests]: https://github.com/gertvdijk/purepythonmilter/discussions/categories/ideas-feature-requests +[spf-milter]: https://gitlab.com/glts/spf-milter +[python-libmilter-readme-note]: https://github.com/crustymonkey/python-libmilter/blob/9793148913232b726da692c7fd0ae2c3edec497c/README.md#no-longer-supporting +[CC0]: https://creativecommons.org/share-your-work/public-domain/cc0/ +[apache-license-2]: https://www.apache.org/licenses/LICENSE-2.0 +[reuse-home]: https://reuse.software/ +[pep-427]: https://peps.python.org/pep-0427/ +[pymilter-miltermodule-c]: https://github.com/sdgathman/pymilter/blob/master/miltermodule.c + +[^sendmail-relnotes-kr-c-deprecation]: [Sendmail 8.71.1 Release notes][sendmail-relnotes-kr-c-deprecation]: + + > 2021/08/17 + > + > Deprecation notice: due to compatibility problems with some third party code, we + > plan to finally switch from K&R to ANSI C. If you are using sendmail on a system + > which does not have a compiler for ANSI C contact us with details as soon as + > possible so we can determine how to proceed. diff --git a/docs/api.md b/docs/api.md new file mode 100644 index 0000000..fa57e37 --- /dev/null +++ b/docs/api.md @@ -0,0 +1,162 @@ + + +# Purepythonmilter API reference + +***Note**: This document should be generated from source at some point. +It also lacks the documentation of all attributes currently.* + +## `PurePythonMilter` class construction + +A PurePythonMilter app is instantiated by providing the hooks to configure and which +flags to set. + +Example: + +```python +mymilter = PurePythonMilter( + hook_on_connect=my_on_connect, + can_add_headers=True, +) +``` + +### Hooks + +Hooks point to a callable which need to have a return type annotation. + +A hook callable must accept exactly one argument; one of the hook-specific Command +classes listed in the section below. + +A `None` return type annotation will hint the MTA that the hook will not generate any +response and it will continue, so in that case you *must not* return a response. +This applies as possible return type of all of the hooks below. + +- `hook_on_connect(cmd: Connect)` + + return type: `None` or any (subclass) of `VerdictOrContinue` +- `hook_on_helo(cmd: Helo)` + + return type: `None` or any (subclass) of `VerdictOrContinue` +- `hook_on_mail_from(cmd: MailFrom)` + + return type: `None` or any (subclass) of `VerdictOrContinue` +- `hook_on_rcpt_to(cmd: RcptTo)` + + return type: `None` or any (subclass) of `VerdictOrContinue` +- `hook_on_data(cmd: Data)` + + return type: `None` or any (subclass) of `VerdictOrContinue` +- `hook_on_header(cmd: Header)` + + return type: `None` or any (subclass) of `VerdictOrContinue` +- `hook_on_end_of_headers(cmd: EndOfHeaders)` + + return type: `None` or any (subclass) of `VerdictOrContinue` +- `hook_on_body_chunk(cmd: BodyChunk)` + + return type: `None` or any (subclass) of `VerdictOrContinue`, `SkipToNextStage` +- `hook_on_end_of_message(cmd: EndOfMessage)` + + return type: `None` or any (subclass) of `AbstractResponse`. + `None` is translated into `Continue()`. +- `hook_on_abort(cmd: Abort)` + + return type: `None` +- `hook_on_quit(cmd: Quit)` + + return type: `None` +- `hook_on_unknown(cmd: Unknown)` + + return type: `None` or any (subclass) of `VerdictOrContinue` + +### Flags + +#### Hook flags + +- `on_rcpt_to_include_rejected` (default: False) +- `headers_with_leading_space` (default: False) + +#### Manipulation flags + +- `can_add_headers` (default: False) + + in order to use `AppendHeader` or `InsertHeader` manipulations. +- `can_add_recipients` (default: False) + + in order to use the `AddRecipient` manipulation. +- `can_add_recipients_with_esmtp_args` (default: False) + + in order to use the `AddRecipientWithEsmtpArgs` manipulation. +- `can_change_body` (default: False) + + in order to use the `ReplaceBodyChunk` manipulation. +- `can_change_headers` (default: False) + + in order to use the `ChangeHeader` manipulation. +- `can_change_mail_from` (default: False) + + in order to use the `ChangeMailFrom` manipulation. +- `can_remove_recipients` (default: False) + + in order to use the `RemoveRecipient` manipulation. +- `can_quarantine` (default: False) + + in order to use the `Quarantine` response in `hook_on_end_of_message`. + + +## `Command`s + +- *BaseCommand* + - `Data` + - `EndOfHeaders` + - `EndOfMessage` + - `Abort` + - `Quit` + - `QuitNoClose` + - *BaseCommandWithData* + - `OptionsNegotiate` + - `Connect` + - `Helo` + - `Header` + - `BodyChunk` + - `Unknown` + - `DefineMacro` + - *BaseMailFromAndRcptTo* + - `MailFrom` + - `RcptTo` + +## `Response`s + +- *AbstractBaseResponse* + - *AbstractManipulation* + - *BaseChangeRecipient* + - `AddRecipient` + - `AddRecipientWithEsmtpArgs` + - `RemoveRecipient` + - *BaseHeaderManipulation* + - `AppendHeader` + - `InsertHeader` + - `ChangeHeader` + - `ReplaceBodyChunk` + - `ChangeMailFrom` + - *AbstractResponse* + - *AbstractVerdict* + - *BaseVerdictNoData* + - `Accept` + - `Reject` + - `DiscardMessage` + - `CauseConnectionFail` + - *BaseReplyWithCode* + - `RejectWithCode` + - `TempFailWithCode` + - `Quarantine` + - *BaseResponseNoData* + - `Continue` + - `SkipToNextStage` + - `Progress` + - `OptionsNegotiateResponse` + +`VerdictOrContinue` = `AbstractVerdict | Continue` diff --git a/docs/design.md b/docs/design.md new file mode 100644 index 0000000..4c0787b --- /dev/null +++ b/docs/design.md @@ -0,0 +1,125 @@ + + +# Design + +The *MilterServer* class manages asyncio's event loop which is instructed to open a +listening socket and what to call on incoming connections. +It also registers the process signal handlers and takes care of the administration of +pending connections. +(The raw TCP/Socket connection is handled by asyncio's event loop.) + +A method of the *MtaMilterConnectionHandler* is set as callback for new connections. +It then manages the connection between the MTA and the Milter server on the lower level, +e.g. reading from and writing to socket and delegation the decoding/encoding of the +packets. + +A more high-level Milter-protocol connection handler is available as *MtaMilterSession* +and deals with application-layer logic of a Milter server. +This class receives decoded **Command**s in its queue and deals with outgoing +**Response**s pushing them down the socket via *MtaMilterConnectionHandler*. + +At the start of each connection (session), the *MtaMilterSession* will instantiate an +'app' using a factory. +The 'app' (implementing the *AbstractMilterApp*) is where the business logic takes +place, of which parts are to be provided by the user of this library. +For any **Command** or event the *MtaMilterSession* calls the `on_*` methods on the app +instance. +It also provides some conveniences like normalization of header names, carries the +macros along the Milter stages, etc. + +The *PurePythonMilter* builds the object implementing an *AbstractMilterApp* class from +the API. +It also inspects the code of the desired hooks to toggle the most efficient protocol +flags in negotiation with the MTA. +For example, if there's no hook for `on_connect`, it asks the MTA to skip the on-connect +callback. + +In case you don't like the 'batteries included' that come with the above, you could +perhaps reuse/subclass the *MtaMilterSession* from this library as an alternative. +Going deeper than that does not make much sense probably; it's basically boilerplate +around asyncio's event loop and protocol definitions. + +## Opt-in philosophy + +*"You will only get what you asked for."* + +The Milter protocol is opt-out-driven, but the purepythonmilter APIs reverses this for +performance reasons. + +- All callbacks will be disabled, unless declared as desired. +- All available "no reply" protocol flags will be enabled, unless a response is declared + as desired. + It makes MTA callbacks to be asynchronous by default where possible. +- 'Meta' commands like *Options negotiate* and *Define macro* are hidden and the latter + is provided for you as attribute `macros` of the actual command. + +## Modern-only approach + +The Milter protocol seems to suffer heavily from historic changes and some Sendmail +specific implementation details. +Instead of trying to be compatible, purepythonmilter assumes the use of a modern Postfix +with Milter protocol version 6. + +To make the implementation as clean as possible, purepythonmilter may assume or mandate +the use of the latest Python version (3.10 at time of writing) and [mypy-strict] typing +annotations. +While this approach reduces the compatibility heavily, it's common nowadays to use +different packaging (i.e. containers, AppImage, etc.) for operating systems that lack +the latest version of Python. + +This approach keeps focus on correct and complete Milter implementation rather than +spending time on maintaining compatibility with older CPython. + +## Production-ready examples + +This library is not *just* a library; it's supposed to provide some examples available +as runnables (entrypoints) that should work in a production setting. +Integration tests ensure that these examples should always be runnable and up-to-date. + +## Layered decoding of the Milter protocol + +Incoming packets (commands): + +- TCP/Unix socket data is taken as **Packet**s ('Length-Type-Value' encoded datagrams) + by the *MtaConnection* using asyncio's low-level [StreamReader][asyncio-streamreader]. + +- The *PacketDecoder* decodes the length bytes, strips it off and returns zero, one or + more 'Type-Value' **Payload**s as a generator. + The *PacketDecoder* is stateful in the sense that it stores incomplete packet data and + reassembles them as needed. + +- A **Payload** is decoded to **Command**-**CommandData** pair by the *PayloadDecoder*. + +- The *PayloadDecoder* calls the **Command**-specific decoder to decode the + **CommandData** as attributes to the **Command**. + +- The *MtaMilterConnectionHandler* then puts the decoded **Command**s on a + *CommandQueue* ([asyncio.Queue][asyncio-queue]) of the *MtaMilterSession*. + +Outgoing packets (responses): + +- The *MtaMilterSession* receives a **Response** object from the Milter app as + return value of the method called (as per *AbstractMilterApp* interface). + +- This **Response** object is then passed to the *MtaMilterConnectionHandler* writer + where it's encoded to a payload. + +- The *MtaMilterConnectionHandler* writer then encodes the **Payload** into one or more + **Packet**s and writes them on the socket to the MTA using asyncio's + [StreamWriter][asyncio-streamwriter]. + +Common for both incoming and outgoing: + +- At any stage and layer a *ProtocolViolation* may be raised on input error(s). +- At any stage and layer a *NotImplementedError* may be raised on known (defined), but + unsupported input. + + +[asyncio-streamreader]: https://docs.python.org/3/library/asyncio-stream.html#asyncio.StreamReader +[asyncio-streamwriter]: https://docs.python.org/3/library/asyncio-stream.html#asyncio.StreamWriter +[asyncio-queue]: https://docs.python.org/3/library/asyncio-queue.html +[mypy-strict]: https://mypy.readthedocs.io/en/stable/command_line.html#cmdoption-mypy-strict diff --git a/docs/milter-protocol.md b/docs/milter-protocol.md new file mode 100644 index 0000000..cf402b5 --- /dev/null +++ b/docs/milter-protocol.md @@ -0,0 +1,432 @@ + + +# Milter protocol high-level overview + +Sendmail's libmilter documentation mixes both the network protocol and the C-level API a +lot. +[Postfix's Milter documentation][postfix-milter-readme] is great, but assumes prior +knowledge on Sendmail's Milter capabilities. + +This page gives a fresh and more high-level overview of how a Milter operates. +As the Milter protocol is not described by an RFC, most of this information is derived +from public sources like Sendmail's libmilter source code, PyMilter documentation and +experimentation with Postfix's behaviour. + +ℹ️ It is important to understand the basics of SMTP first; the Milter stages are a +superset of the SMTP stages, more or less. +See [`smtp-recap.md`](./smtp-recap.md) if you need some refreshment on SMTP. + +## Milter protocol basics + +During most of the steps during an SMTP conversation an MTA can call a command to a +configured Milter app that 'hooks into' the inspection, mangling and decision making. + +Note that the MTA as mail server is a *client* on the operational level of a Milter and +the Milter app is run as a *server*. + +An MTA-Milter connection shares the lifetime of the SMTP client-MTA connection; there's +one initiated by the MTA for each connection it receives. [^connection-reuse] +Every new connection starts with a negotiation of options. +The protocol is mostly synchronous, but allows for opt-out on commands during +negotiation. +The transport layer must already provide reliability and is typically a Unix socket or +TCP/IP. + +Unlike SMTP, the Milter protocol is binary and not line based. + +Packets are 'Length-Type-Value' encoded meaning that every packet must start with a +length field (unsigned 32-bit integer)... + +- ... for the server (MTA), followed by a *command* (single byte character) and *command + data* (command specific, arbitrary length and optional). + +- ... for the client (Milter app) as response it is followed by an *response type* + (single byte character) and *arguments* as data (action specific, arbitrary length and + optional). + +The client can send zero, one or multiple response packets in reply to a server command, +depending on the command and `negotiated protocol flags. + +Similarly, the server can send one or multiple packets in one go without waiting for the +client to reply (again, depending on the negotiated protocol flags and the command). + +String arguments to commands are basically (concatenated) NULL-terminated C-strings. + +Some responses can be regarded as actions. +Some action`s indicate a final verdict, and some are intermediate. +An example; in reply to *End of message* command; modify multiple headers: +1. ADD_HEADER(...) +1. INSERT_HEADER(...) +1. CONTINUE + +## Milter protocol terminology + +- **callback** or **command**: the hook the MTA will call the Milter app on which + corresponds to the transition to a new SMTP stage or an SMTP command given such as + `RCPT TO` (repeated for every recipient). + Callbacks can be enabled/disabled by the Milter app as desired for the application to + increase performance. +- **macro**: a variable that the MTA may expose to the Milter app. See also *symbol*. +- **symbol**: the identifier of a macro, typically a string or a single character. + E.g. `i` for Postfix queue ID, `{auth_authen}` for the username post-authentication. + It seems that historically single character symbols were used and this was extended + later with longer ones that include braces as safeguard. + +## Milter protocol commands and stages + +### Options negotiate (`SMFIC_OPTNEG`) + +On every SMTP connection the MTA receives, it will open a new connection with the Milter +application. +Every new MTA-Milter connection will (re)start the negotiation with the Milter app and +the results are thereby local to the connection. +Part of the negotiation is the protocol version, MTA capabilities, desired protocol +flags and the set of callbacks the Milter app desires to opt-out for. + +In other words, at this early stage the Milter and the MTA connection options are +exchanged and no MTA-connection or message specifics are included at this point. + +The Milter-enabled MTA must send the command with data to indicate: + +- Supported protocol flags for the Milter app. + Some flags may be disabled server-side by MTA configuration or simply not implemented. +- Actions it may perform on messages. + Some flags may be disabled server-side, e.g. to restrict a Milter to be read-only for + security reasons. + +The Milter application must respond to indicate: + +- Desired protocol flags by the app. + E.g. which callbacks to perform for this app, whether or not the app could send a + reply for a command, whether or not to include the leading space for headers, etc. +- Actions it may perform on messages. + E.g. hint the MTA the Milter may add headers to the message (but not modify the body). +- Optionally, and only if supported by the MTA as indicated by a flag, the list of + desired macros that the MTA should include per protocol stage (from a list defined + separately as macro stages). + +### Macro (`SMFIC_MACRO`) + +Defines a (list of) macro(s). + +Usually called prior to every other regular command (except Options negotiate) to +provide more context. + +### Connection established (`SMFIC_CONNECT`) + +This callback will provide early SMTP connection level details such as the remote IP +address connecting to the MTA. + +Note that this is prior to SMTP application-level negotiation such as upgrading the +connection security with e.g. StartTLS. + +### HELO (`SMFIC_HELO`) + +This callback will provide the HELO/EHLO name. + +Macros may provide more SMTP connection level details such as TLS versions used (only in +case of plain TLS and not StartTLS). + +### Envelope sender address (`SMFIC_MAIL`, SMTP: `MAIL FROM`) + +This callback will provide the sender address (envelope-from). + +Macros will provide more SMTP authentication level details and TLS connection details +may appear the first time in this stage if StartTLS is used. + +### Recipients (`SMFIC_RCPT`, SMTP: `RCPT TO`) + +Called for every recipient. +May also include recipients rejected by the MTA for other reasons (protocol flag +`SMFIP_RCPT_REJ`). + +### DATA (`SMFIC_DATA`, SMTP: `DATA`) + +Starting from this stage, the Postfix queue ID will be available. + +### Header (`SMFIC_HEADER`) + +Called for every header given in the DATA stage, providing the header name and value +(folded). + +### End of headers (`SMFIC_EOH`) + +Empty callback just called before the body is sent. + +### Body (chunked, `SMFIC_BODY`) + +Called multiple times if the body is larger than the chunk size. + +### End of body/message (`SMFIC_BODYEOB`) + +Empty callback after the body. + +Most of the message manipulation actions can only take place in response to this +command. + +### Unknown command (`SMFIC_UNKNOWN`) + +Whenever an unrecognized SMTP command is sent to the MTA by a client. + +### Abort (`SMFIC_ABORT`) + +The MTA may want to abort the milter for reasons of an event that led to a final state +such as a connection error or a rejection that was effectuated by other means than this +Milter app. + +Postfix appears to send this callback twice after responding with *Continue* at *End of +body*, the reason being unclear. + +### Quit (`SMFIC_QUIT`) + +When the SMTP command `QUIT` is sent by the client. + +## Actions (responses) available + +General actions: +- Continue (`SMFIR_CONTINUE`): neutral; proceed processing as normal. +- Accept (`SMFIR_ACCEPT`): positive final verdict; no further callbacks will happen. +- Reject (`SMFIR_REJECT`, optionally with a custom status code `SMFIR_REPLYCODE`): + - on a connection: negative final verdict; reject/reset the connection + - on a recipient command: reject the recipient (not the message) + - on a message: negative final verdict; reject the message +- Tempfail (case of `SMFIR_REPLYCODE`): like Reject, but with a temporary failure code + indicating that the client can try again later. +- Discard (`SMFIR_DISCARD`): pretends to the client that the message is accepted by the + MTA, but it will actually *silently* drop it. + Use with caution. + Invalid on connections. +- Connection fail (`SMFIR_CONN_FAIL`): cause an SMTP connection failure. +- Progress (`SMFIR_PROGRESS`): a 'keepalive' response to inform the MTA the Milter app + is still processing to prevent a reset by timeout from the MTA. + May be sent multiple times. + +On *End of message* callback: +- Change sender address (`SMFIR_CHGFROM`) +- Add recipient (`SMFIR_ADDRCPT`, variant: with ESMTP arguments `SMFIR_ADDRCPT_PAR`) +- Remove recipient (`SMFIR_DELRCPT`) +- Quarantine (`SMFIR_QUARANTINE`): puts the message in the hold queue. + +On Body chunk commands: +- Replace body chunk (`SMFIR_REPLBODY`): replaces the chunk with the one in the response + argument. +- Skip (`SMFIR_SKIP`): to signal the MTA to not call more commands for subsequent + chunks; skip ahead to the *End of message* callback. + +## Limitations & Caveats + +### End of message is special + +#### Manipulations only at End of message + +Most of the message manipulation actions can only be sent in response to an End of +message command. + +#### End of message cannot be disabled + +There's no flag to disable the End of message command and a response by the Milter is +mandatory. + +### No MTA-Milter connection until SMTP connected + +This means that you don't expect to see a connection from the MTA to the Milter(s) prior +to the MTA receiving a connection. +As a result, any potential MTA-Milter connection issues appear not before the first SMTP +connection/message is handled by the MTA. + +### Intermediate replies are unavailable + +SMTP reply codes exists of a basic three-digit code, optionally along with *enhanced* +(or sometimes called *extended*) reply code and an optional textual description. + +It appears that Milters are limited to replies with basic codes indicating negative +*completion* (first digit starting with 4 or 5) and intermediate replies are +unavailable. + +References: +- [RFC 3463][rfc3463] "Enhanced Mail System Status Codes" +- RFC 5321 Simple Mail Transfer Protocol, sections [4.2][rfc5321-s4.2] and + [4.3][rfc5321-s4.3] +- Wikipedia: [List of SMTP server return codes][wikipedia-smtp-returncodes] + +### Slow Milter replies + +If your Milter app takes long to decide, e.g. when doing an external processing task +that may take a long time, you can use the `Action.PROGRESS` as periodic keep-alive +reply to prevent the MTA-Milter connection to time out. + +For example, at *End of body*, run an external virus checking service. +It may take 65 seconds, but the MTA-Milter timeout is 30s: + +1. *time passes, less than MTA-Milter timeout setting* +1. Action.PROGRESS +1. *time passes, less than MTA-Milter timeout setting* +1. Action.CONTINUE + +### Availability of commands and implicit requirements + +- A Reject or Tempfail action may be preceded by a Reply code action (along with + optional extended code and text desciption). + If a (custom) reply status is not provided, it's up to the MTA what code/text to send + to the client to do (unspecified?). + +- The Quarantine action is only available at End of message stage and the reason + parameter is required. + +### Non-SMTP mail submissions + +A Milter app may be involved for mail that is not submitted over SMTP. + +Postfix emulates an SMTP session for Milter applications when it's configured as one of +the [non_smtpd_milters][postconf-non_smtpd_milters]. +If so, then: + +- Client must not send Reject/Tempfail action as reply to RCPT commands. + + > When a non_smtpd_milters application REJECTs or TEMPFAILs a recipient, Postfix will + > report a configuration error, and mail will stay in the queue. + +- > When new mail arrives via the sendmail(1) command line, the Postfix cleanup(8) + > server pretends that the mail arrives with ESMTP from "localhost" with IP address + > "127.0.0.1". + +- > When new mail arrives via the qmqpd(8) server, the Postfix cleanup(8) server + > pretends that the mail arrives with ESMTP, and uses the QMQPD client hostname and IP + > address. + +- > When old mail is re-injected into the queue with "postsuper -r", the Postfix + > cleanup(8) server uses the same client information that was used when the mail + > arrived as new mail. + +### Inconsistent encoding of arrays in commands/responses + +A single argument containing an array of strings is not encoded consistently and depends +on the command/response. +One example of using spaces as separator and a NULL as terminating character is the +requested list of symbols in Options negotiate. +In other cases the array may be encoded with a NULL separator; an example are the Mail +From / Rcpt To ESMTP arguments. +Note that the latter is space-separated on the SMTP-level. 🤯 + +## More on Macros and symbols + +An MTA may send DEFINE_MACRO several times commands with multiple symbols. +The first byte of the argument data indicates the Milter command (stage) to which the +macros apply. +All bytes after are the key/value pairs encoded. +Each pair NULL-terminated and NULL-separated, passed as argument. +A symbol longer than a single character is wrapped in braces. + +Macros may be defined just before sending the command at the following stages: +- `SMFIM_CONNECT` / `SMFIC_CONNECT` +- `SMFIM_HELO` / `SMFIC_HELO` +- `SMFIM_ENVFROM` / `SMFIC_MAIL` +- `SMFIM_ENVRCPT` / `SMFIC_RCPT` +- `SMFIM_DATA` / `SMFIC_DATA` & `SMFIC_HEADER` +- `SMFIM_EOH` / `SMFIC_EOH` +- `SMFIM_EOM` / `SMFIC_BODY` +- `SMFIM_EOM` / `SMFIC_BODYEOB` + +Example: + +``` +C{mysymbol}NULLmyvalueNULL{othersymbol}NULLothervalueNULLiNULLABCD1234NULL +``` + +will be decoded as + +- `C` applies to *Connection established* stage referring to `Command.CONNECTION_INFO`. +- `mysymbol` = `myvalue` +- `othersymbol` = `othervalue` +- `i` = `ABCD1234` + +Sometimes command data arguments and macros appear to be redundant. + +### Macro availability at stages + +See your MTA's documentation on what macros are available at what stage for the Milter +apps. + +It may be required to omit an opt-out for a command in the protocol flags for your app +even though you don't need to perform an action at that point. +For example, Postfix only exposes `{client_connections}` at the *Connection established* +stage, so if you want to vary on that in your action at a different stage, you must not +opt-out with the `NO_CONNECT` protocol flag, and save the macro value in your app's +state. + +For optimization, the MTA can be requested to only send specific macros the Milter app +is interested in and is part of the *Options negotiate* response (see above). + +### Options negotiate and symbols list (macros) + +Sendmail's libmilter documentation and header files suggest that a symbols list can be +set using a response with code `l` as defined by `SMFIR_SETSYMLIST` in a list of +definitions titled `/* actions (replies) */`,... but that appears to be rather different +in practice. +In reality, the payload of the Options negotiate response is extended to include a +structure of `<4-byte macro stage ID>NULL`. + +Example: + +- HELO macro stage, request symbols `j` and `{my}`, and +- RCPT TO macro stage, request symbols `k` and `{other}`. + +will be encoded as + +``` +\x00\x00\x00\x01j {my}\x00\x00\x00\x00\x00\x03k {other}\x00\x00 +``` + +It appears there's no use of the defined `SMFIR_SETSYMLIST` response code in actual +implementations. 🤷 + +### Inability to disable the Define macro command + +It seems there's no way to instruct the MTA to disable sending *Define macro* commands +completely. + +When requesting for an empty set of symbols for a stage, and with the callback for a +stage disabled, Postfix seems to send a full *Define macro* command with the default +macros. 🤷 + +If a Milter would request a non-existent symbol, Postfix still calls the *Define macro* +command, but with an empty set. 😒 + +### MTA-Milter connection reuse + +Have a look at the +[`SMFIC_QUIT_NC` command definition in libmilter][sendmail-libmilter-quit-nc], which +suggests that an MTA can reuse the existing connection to start a new 'session'. + +``` +#define SMFIC_QUIT_NC 'K' /* QUIT but new connection follows * +``` + +Sendmail's libmilter seems to have this defined as separate state and does not close the +connection (but clears other state such as macros) as you'd expect. + +However, there seems to be no use of this comand in both Sendmail MTA and Postfix. 😕 + +... Yet, there's a complaint in Postfix's source code that it's unable to reuse an +existing connection with a milter. 🤪 + +> XXX Sendmail 8 libmilter automatically closes the MTA-to-filter socket when it finds +> out that the SMTP client has disconnected. Because of this behavior, Postfix has to +> open a new MTA-to-filter socket each time an SMTP client connects. +> *[(source)][postfix-milter8c-comment-socket]* + + +[postfix-milter-readme]: https://www.postfix.org/MILTER_README.html +[rfc3463]: https://datatracker.ietf.org/doc/html/rfc3463 +[rfc5321-s4.2]: https://datatracker.ietf.org/doc/html/rfc5321#section-4.2 +[rfc5321-s4.3]: https://datatracker.ietf.org/doc/html/rfc5321#section-4.3 +[wikipedia-smtp-returncodes]: https://en.wikipedia.org/wiki/List_of_SMTP_server_return_codes +[postconf-non_smtpd_milters]: https://www.postfix.org/postconf.5.html#non_smtpd_milters +[postfix-milter8c-comment-socket]: https://github.com/vdukhovni/postfix/blob/fe4e81b23b3ee76c64de73d7cb250882fbaaacb9/postfix/src/milter/milter8.c#L387-L390 +[sendmail-libmilter-quit-nc]: https://salsa.debian.org/debian/sendmail/-/blob/0ad6934dd77ca9ef1e2a64a9862ceb9b56a7d3f8/include/libmilter/mfdef.h#L54 + +[^connection-reuse]: See the section "MTA-Milter connection reuse" below. diff --git a/docs/releasing.md b/docs/releasing.md new file mode 100644 index 0000000..dbce727 --- /dev/null +++ b/docs/releasing.md @@ -0,0 +1,38 @@ +# Releasing Purepythonmilter + +1. Create an annotated and signed git tag. + Make sure the commit lives on a branch available in the GitHub repository and the + working tree is clean. + + ```console + $ git status # Should show 'nothing to commit, working tree clean' + $ git branch --all --contains HEAD + $ git tag --annotate --sign --message="" + ``` + +1. Build a Python source distribution file and a wheel using `build`. + + ```console + $ rm -rf dist/ + $ python -m build + $ ls -l dist/ + ``` + +1. Upload to PyPI testing and verify the release. + + *The magic username `__token__` is a literal that enables the use of an HTTP API key + which is needed for accounts with 2FA enabled.* + + When asked for the password, enter the HTTPS API token key with the account on + test-PyPI for uploading packages to the project. + + ```console + $ twine check --strict dist/* + $ TWINE_USERNAME=__token__ twine upload --repository testpypi dist/* + ``` + +1. Upload to regular PyPI. + + ```console + $ TWINE_USERNAME=__token__ twine upload dist/* + ``` diff --git a/docs/smtp-recap.md b/docs/smtp-recap.md new file mode 100644 index 0000000..d46e180 --- /dev/null +++ b/docs/smtp-recap.md @@ -0,0 +1,36 @@ + + +# SMTP protocol recap + +An SMTP conversation would typically look like this: + +1. Client initiates a TCP connection to the server. + - The MTA should talk first (client should wait!) and print a line with some basic + indicators, e.g.: + ``` + 220 myhost.g3rt.nl ESMTP Postfix (Debian/GNU) + ``` + where 2xx code indicates 'OK'. + - The MTA may already reject the client at this point (e.g. IP blocklisted in RBLs). + - Note that Postfix servers sending mail out are also SMTP clients. +1. Negotiate capabilities, e.g. upgrade connection security with StartTLS. +1. say `HELO mysmtpserver.g3rt.nl` (or `EHLO` instead of `HELO`) + - The MTA may reject the connection already at this point (e.g. invalid HELO name). +1. say `MAIL FROM:` to indicate the envelope sender (Return-Path). + - The MTA may reject the sender address at this point (e.g. prohibit impersonation, + sender address non-existence). +1. say `RCPT TO:` for every recipient. + - The MTA may reject the recipients (e.g. to deny relay access). +1. say `DATA` to proceed to sending headers. +1. say `HeaderName: header value` for every header. +1. say *empty newline* to proceed to sending the body. +1. Send the body as given a negotiated transfer encoding and policy, typically + MIME-encoded with lines wrapped up to 72 characters. +1. say `.` on a line of its own to indicate the end of the message. + +At any stage the MTA can reply to the client with a reply code, indicating extra +information, a successful completion or an error. diff --git a/postfixtest/Dockerfile b/postfixtest/Dockerfile new file mode 100644 index 0000000..61e894b --- /dev/null +++ b/postfixtest/Dockerfile @@ -0,0 +1,21 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: CC0-1.0 + +# syntax=docker/dockerfile:1.3 +ARG FROM_IMAGE=debian:bullseye +FROM $FROM_IMAGE + +COPY files/apt-install-clean.sh /usr/local/bin/apt-install-clean + +RUN apt-install-clean \ + 'postfix=3.*' \ + && cp -a /etc/postfix /etc/postfix-default + +COPY files/entrypoint.sh /usr/local/sbin/postfix-entrypoint.sh + +# Docker default network, IP of host. +ENV POSTFIX_MILTER_HOST=172.17.0.1 +ENV POSTFIX_MILTER_PORT=9000 + +ENTRYPOINT ["/usr/local/sbin/postfix-entrypoint.sh"] diff --git a/postfixtest/Makefile b/postfixtest/Makefile new file mode 100644 index 0000000..5d408df --- /dev/null +++ b/postfixtest/Makefile @@ -0,0 +1,46 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: CC0-1.0 + +DOCKER_BUILD_OPTS?= +FROM_IMAGE=debian:bullseye +CONTAINER_NAME=purepythonmilter-postfixtest + +.NOTPARALLEL: + +.PHONY: default +default: build-postfix run-postfix + +.PHONY: build-postfix +build-postfix: + docker pull "$(FROM_IMAGE)" + docker build \ + $(DOCKER_BUILD_OPTS) \ + --build-arg=FROM_IMAGE="$(FROM_IMAGE)" \ + -t purepythonmilter-postfix:latest \ + . + +.PHONY: run-postfix +run-postfix: + docker run \ + --name $(CONTAINER_NAME) \ + --rm \ + --interactive \ + --tty \ + purepythonmilter-postfix:latest + +.PHONY: shell +shell: + docker run \ + --name $(CONTAINER_NAME) \ + --rm \ + --interactive \ + --tty \ + --entrypoint /bin/bash \ + purepythonmilter-postfix:latest + +.PHONY: get-ipv4 +get-ipv4: + docker inspect -f \ + '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' \ + $(CONTAINER_NAME) diff --git a/postfixtest/files/apt-install-clean.sh b/postfixtest/files/apt-install-clean.sh new file mode 100755 index 0000000..80f338c --- /dev/null +++ b/postfixtest/files/apt-install-clean.sh @@ -0,0 +1,20 @@ +#!/bin/sh -ex + +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: CC0-1.0 + +apt-get update + +if [ "$1" = "--dist-upgrade" ]; then + apt-get dist-upgrade -y + shift +fi + +# Relying on APT configuration that --no-install-recommends is not necessary. +apt-get install -y "$@" + +# Relying on APT configuration that apt-get clean is not necessary. + +# APT lists are not removed automatically. +rm -rf /var/lib/apt/lists/* diff --git a/postfixtest/files/entrypoint.sh b/postfixtest/files/entrypoint.sh new file mode 100755 index 0000000..a8b10c2 --- /dev/null +++ b/postfixtest/files/entrypoint.sh @@ -0,0 +1,48 @@ +#!/bin/bash -e + +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: CC0-1.0 + +echo unconfigured.docker.container.local > /etc/mailname + +# Copy the /etc/services file in the spool/queue directory as Postfix expects +# them to be there. See also +# https://serverfault.com/a/655127 +mkdir -p /var/spool/postfix/etc +cp /etc/services /var/spool/postfix/etc/services +# Same goes for the trusted CA certs bundle. Still needs a configuration option +# smtp_tls_CAfile to point to it. +mkdir -p /var/spool/postfix/etc/ssl/certs/ +cp /etc/ssl/certs/ca-certificates.crt /var/spool/postfix/etc/ssl/certs/ca-certificates.crt +# Also copy the resolv.conf, nsswitch.conf files along with the NSS shared +# libraries to the spool/queue directory as Postfix won't be able to resolve +# hostnames otherwise. +# See also https://askubuntu.com/a/155937 +cp /etc/resolv.conf /etc/nsswitch.conf /var/spool/postfix/etc/ +mkdir -p /var/spool/postfix/lib/x86_64-linux-gnu +cp /lib/x86_64-linux-gnu/libnss_* /var/spool/postfix/lib/x86_64-linux-gnu + +postconf maillog_file=/dev/stdout + +# Rely on the Postfix 3.4+ default master.cf containing the line +# 'postlog unix-dgram [...]' + +postconf "smtpd_milters=inet:${POSTFIX_MILTER_HOST}:${POSTFIX_MILTER_PORT}" + +# Default timeout of 30s for a milter connection and that's rather long when developing. +postconf "milter_command_timeout=2s" +postconf "milter_content_timeout=2s" + +# Accept mail for domain test.local as if it's a real mail server. +postconf "relay_domains=test.local" +# In order to actually queue mails for this imaginary (non-existant) domain, enable a +# transport for it which is unreachable. E.g.: +# postconf "transport_maps=inline:{test.local=[172.17.0.1]}" + +# Specify your client IP/network here if you want more debugging from Postfix for +# connections made from these IPs. Very useful to debug the milter protocol +# implementation too. +# postconf "debug_peer_list=172.17.0.0/16" + +exec /usr/sbin/postfix start-fg "$@" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..7959f56 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,107 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: CC0-1.0 + +[build-system] +requires = [ + "setuptools>=61.0.0", # For reading all configuration from pyproject.toml + "setuptools-scm[toml]>=6.2", +] +build-backend = "setuptools.build_meta" + +[project] +name = "purepythonmilter" +authors = [ + { name = "Gert van Dijk", email = "github@gertvandijk.nl" }, +] +description = "A Milter library and app framework in pure-Python with asyncio" +license = { text = "Apache License 2.0" } +dynamic = ["version", "readme"] +requires-python = ">=3.10" +classifiers = [ + "Development Status :: 3 - Alpha", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Topic :: Communications :: Email", + "Topic :: Communications :: Email :: Filters", + "Topic :: Communications :: Email :: Mail Transport Agents", + "Topic :: Software Development :: Libraries", + "Typing :: Typed", +] +dependencies = [ + "attrs", +] + +[project.optional-dependencies] +development = [ + "black", + "build", + "flake8", + "isort", + "mypy", + "pytest-asyncio", + "pytest-cov", + "pytest", + "pyupgrade", + "reuse", + "ruff", + "setuptools-scm[toml]>=6.2", + "twine", + "validate-pyproject[all]", +] +examples = [ + "click", +] + +[project.urls] +homepage = "https://github.com/gertvdijk/purepythonmilter" +repository = "https://github.com/gertvdijk/purepythonmilter.git" + +[tool.setuptools] +license-files = ["LICENSES/*.txt"] +include-package-data = false + +[tool.setuptools.packages.find] +where = ["src"] + +[tool.setuptools.dynamic] +readme = { file = ["README.md"], content-type = "text/markdown" } + +[tool.setuptools_scm] +version_scheme = "guess-next-dev" + +[tool.black] +# This option takes *multiple* Python versions and should include all supported Python +# versions, but this may change in the future. https://github.com/psf/black/issues/751 +target-version = ["py310", "py311"] + +[tool.isort] +profile = "black" +add_imports = [ + # PEP-563 (new semantics for annotations) is gated behind + # 'from __future__ import annotations', including Python 3.10 & 3.11. + # https://docs.python.org/3/library/__future__.html#id1 + "from __future__ import annotations", +] + +[tool.mypy] +strict = true +namespace_packages = true +show_error_codes = true +show_column_numbers = true +show_error_context = true + +[tool.pytest.ini_options] +# Already move to future-default asyncio strict mode. +# https://github.com/pytest-dev/pytest-asyncio#modes +asyncio_mode = "strict" +addopts = [ + "--capture=no", # Do not capture stdout, may hide output in some cases + "--durations=5", # Get attention to slow tests + "--cov=purepythonmilter", + "--cov-report=term-missing", + "--cov-report=xml:.pytest-cov/coverage.xml", + "--cov-report=html:.pytest-cov/html/", +] +log_level = "DEBUG" diff --git a/run-all-linters b/run-all-linters new file mode 100755 index 0000000..732fc35 --- /dev/null +++ b/run-all-linters @@ -0,0 +1,92 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: CC0-1.0 + +# Stop at first error. +set -e + +# Allow to override path to python interpreter in order to run this from a +# non-virtualenv aware application like VS Code. +PYTHON="${PYTHON_INTERPRETER:-python}" +echo -n "Using Python interpreter at location: $PYTHON " +echo "(to override specify \$PYTHON_INTERPRETER)" + +HADOLINT="${HADOLINT_PATH:-hadolint}" +echo "Using Hadolint at location: $HADOLINT (to override specify \$HADOLINT_PATH)" + +SHELLCHECK="${SHELLCHECK_PATH:-shellcheck}" +echo "Using Shellcheck at location: $SHELLCHECK (to override specify \$SHELLCHECK_PATH)" + +PYTHON_SOURCES_DIRS=(src/ tests/) + +echo "ruff..." +"$PYTHON" -m ruff --diff "${PYTHON_SOURCES_DIRS[@]}" || \ + (echo "Run 'ruff --fix ${PYTHON_SOURCES_DIRS[*]}' to fix auto-fixable."; exit 1) +# Also lint for non-auto-fixables - requires a separate invocation apparently. +# Disabeld for now due to missing support for structural pattern matching: +# https://github.com/charliermarsh/ruff/issues/282 +# "$PYTHON" -m ruff --show-source ${PYTHON_SOURCES_DIRS} +echo "OK!" + +# Pyupgrade will never provide a check-mode in itself unfortunately. 😥 +# For check runs it mandates the use of the author's pre-commit framework. +# https://github.com/asottile/pyupgrade/issues/595#issuecomment-1016484032 +echo "pyupgrade (editing in-place!)..." +find "${PYTHON_SOURCES_DIRS[@]}" -name "*.py" \ + -exec "$PYTHON" -m pyupgrade --py310-plus {} \; +echo "OK!" + +# Black options are specified in pyproject.toml. +echo "black..." +"$PYTHON" -m black --check --diff . || (echo "Run 'black .' to fix."; exit 1) +echo "OK!" + +# isort options are specified in pyproject.toml. +echo "isort..." +"$PYTHON" -m isort --check --diff . || (echo "Run 'isort .' to fix."; exit 1) +echo "OK!" + +# flake8 options via .flake8 configuration file. +echo "flake8..." +"$PYTHON" -m flake8 +echo "OK!" + +# Other than '--cache-dir=/dev/null', mypy options are specified in pyproject.toml. +# Keep in sync with /.vscode/settings.json, key 'python.linting.mypyArgs', except for +# the '--cache-dir' option. +# Observed weird inconsistent results with default --cache-dir enabled (mypy 0.971); +# disable cache explicitly for this script. +echo "mypy (purepythonmilter package)..." +"$PYTHON" -m mypy --cache-dir=/dev/null --package purepythonmilter +echo "OK!" +echo "mypy (purepythonmilter tests folder)..." +"$PYTHON" -m mypy --cache-dir=/dev/null ./tests + +echo "REUSE lint..." +"$PYTHON" -m reuse lint -q 2>/dev/null \ + || (echo "Run 'reuse lint' to view licensing issues."; exit 1) +echo "OK!" + +echo "hadolint..." +HADOLINT_FILES=(Dockerfile) +"$HADOLINT" --version > /dev/null \ + || (echo "Hadolint not found; please install this on your system."; exit 1) +"$HADOLINT" "${HADOLINT_FILES[@]}" +echo "OK!" + +# Keep shellcheck calling arguments in sync with /.vscode/settings.json key +# 'shellcheck.customArgs', except for the '--format' option. +echo "shellcheck..." +SHELLCHECK_FILES=(run-all-linters postfixtest/files/entrypoint.sh) +"$SHELLCHECK" --version > /dev/null \ + || (echo "Shellcheck not found; please install this on your system."; exit 1) +"$SHELLCHECK" --norc --format=gcc "${SHELLCHECK_FILES[@]}" +echo "OK!" + +echo "validate-pyproject..." +"$PYTHON" -m validate_pyproject pyproject.toml +echo "OK!" + +echo "Everything looks OK! 🎉" diff --git a/src/purepythonmilter/__init__.py b/src/purepythonmilter/__init__.py new file mode 100644 index 0000000..6722237 --- /dev/null +++ b/src/purepythonmilter/__init__.py @@ -0,0 +1,94 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 +# +# pyright: reportUnusedImport=false + +from __future__ import annotations + +from ._version import __version__ +from .api.application import PurePythonMilter # noqa: F401 +from .api.models import ( # noqa: F401 + ConnectionInfoArgsIPv4, + ConnectionInfoArgsIPv6, + ConnectionInfoArgsUnixSocket, + ConnectionInfoUnknown, +) +from .protocol.commands import ( # noqa: F401 + Abort, + BodyChunk, + Connect, + Data, + EndOfHeaders, + EndOfMessage, + Header, + Helo, + MailFrom, + Quit, + RcptTo, + Unknown, +) +from .protocol.responses import ( # noqa: F401 + Accept, + AddRecipient, + AddRecipientWithEsmtpArgs, + AppendHeader, + CauseConnectionFail, + ChangeHeader, + ChangeMailFrom, + Continue, + DiscardMessage, + InsertHeader, + Progress, + Quarantine, + Reject, + RejectWithCode, + RemoveRecipient, + ReplaceBodyChunk, + SkipToNextStage, + TempFailWithCode, + VerdictOrContinue, +) + +__all__ = [ + "__version__", + "Abort", + "Accept", + "AddRecipient", + "AddRecipientWithEsmtpArgs", + "AppendHeader", + "BodyChunk", + "CauseConnectionFail", + "ChangeHeader", + "ChangeMailFrom", + "Connect", + "ConnectionInfoArgsIPv4", + "ConnectionInfoArgsIPv6", + "ConnectionInfoArgsUnixSocket", + "ConnectionInfoUnknown", + "Continue", + "Data", + "DiscardMessage", + "EndOfHeaders", + "EndOfMessage", + "Header", + "Helo", + "InsertHeader", + "MailFrom", + "Progress", + "PurePythonMilter", + "Quarantine", + "Quit", + "RcptTo", + "Reject", + "RejectWithCode", + "RemoveRecipient", + "ReplaceBodyChunk", + "SkipToNextStage", + "TempFailWithCode", + "Unknown", + "VerdictOrContinue", +] + +DEFAULT_LISTENING_TCP_IP = "127.0.0.1" +DEFAULT_LISTENING_TCP_PORT = 9000 diff --git a/src/purepythonmilter/_version.py b/src/purepythonmilter/_version.py new file mode 100644 index 0000000..d571495 --- /dev/null +++ b/src/purepythonmilter/_version.py @@ -0,0 +1,9 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import importlib.metadata + +__version__: str = importlib.metadata.version("purepythonmilter") diff --git a/src/purepythonmilter/api/__init__.py b/src/purepythonmilter/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/purepythonmilter/api/application.py b/src/purepythonmilter/api/application.py new file mode 100644 index 0000000..f81fd22 --- /dev/null +++ b/src/purepythonmilter/api/application.py @@ -0,0 +1,355 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import asyncio +import logging +import types +import typing +from collections.abc import Callable, Coroutine +from typing import Any, ClassVar + +import attrs + +from ..protocol import commands, definitions, responses +from ..server import milterserver +from . import interfaces, logger, models + + +class ProgrammingError(BaseException): + ... + + +def symbols_dict_empty_factory() -> dict[definitions.MacroStage, set[str]]: + return {stage: set() for stage in definitions.MacroStage if stage.value <= 6} + + +@attrs.define(kw_only=True) +class PurePythonMilter: + # https://github.com/python/mypy/issues/6473 + name: str = attrs.field(default=__qualname__) # type: ignore[name-defined] + # Connection-context logger + logger: logging.LoggerAdapter[logging.Logger] = attrs.field(init=False) + + # Hooks + hook_on_connect: None | Callable[ + [commands.Connect], Coroutine[Any, Any, None] + ] | Callable[ + [commands.Connect], Coroutine[Any, Any, responses.VerdictOrContinue] + ] = None + hook_on_helo: None | Callable[ + [commands.Helo], Coroutine[Any, Any, None] + ] | Callable[ + [commands.Helo], Coroutine[Any, Any, responses.VerdictOrContinue] + ] = None + hook_on_mail_from: None | Callable[ + [commands.MailFrom], Coroutine[Any, Any, None] + ] | Callable[ + [commands.MailFrom], Coroutine[Any, Any, responses.VerdictOrContinue] + ] = None + hook_on_rcpt_to: None | Callable[ + [commands.RcptTo], Coroutine[Any, Any, None] + ] | Callable[ + [commands.RcptTo], Coroutine[Any, Any, responses.VerdictOrContinue] + ] = None + on_rcpt_to_include_rejected: bool = False + hook_on_data: None | Callable[ + [commands.Data], Coroutine[Any, Any, None] + ] | Callable[ + [commands.Data], Coroutine[Any, Any, responses.VerdictOrContinue] + ] = None + hook_on_header: None | Callable[ + [commands.Header], Coroutine[Any, Any, None] + ] | Callable[ + [commands.Header], Coroutine[Any, Any, responses.VerdictOrContinue] + ] = None + hook_on_end_of_headers: None | Callable[ + [commands.EndOfHeaders], Coroutine[Any, Any, None] + ] | Callable[ + [commands.EndOfHeaders], Coroutine[Any, Any, responses.VerdictOrContinue] + ] = None + hook_on_body_chunk: None | Callable[ + [commands.BodyChunk], Coroutine[Any, Any, None] + ] | Callable[ + [commands.BodyChunk], + Coroutine[Any, Any, responses.VerdictOrContinue | responses.SkipToNextStage], + ] = None + hook_on_end_of_message: None | Callable[ + [commands.EndOfMessage], Coroutine[Any, Any, None] + ] | Callable[ + [commands.EndOfMessage], Coroutine[Any, Any, responses.AbstractResponse] + ] = None + hook_on_abort: None | Callable[[commands.Abort], Coroutine[Any, Any, None]] = None + hook_on_quit: None | Callable[[commands.Quit], Coroutine[Any, Any, None]] = None + hook_on_unknown: None | Callable[ + [commands.Unknown], Coroutine[Any, Any, None] + ] | Callable[ + [commands.Unknown], Coroutine[Any, Any, responses.VerdictOrContinue] + ] = None + + # An empty set of symbols for a stage disables all macros sent for it. + # If a stage is not included, the default set by the MTA will be set (=all). + # So, to receive all the symbols, use `restrict_symbols=None`. + restrict_symbols: dict[definitions.MacroStage, set[str]] | None = attrs.field( + factory=symbols_dict_empty_factory + ) + headers_with_leading_space: bool = False + _milterserver: milterserver.MilterServer | None = attrs.field( + init=False, default=None + ) + + # Manipulation flags + can_add_headers: bool = False + can_add_recipients: bool = False + can_add_recipients_with_esmtp_args: bool = False + can_change_body: bool = False + can_change_headers: bool = False + can_change_mail_from: bool = False + can_remove_recipients: bool = False + can_quarantine: bool = False + + _request_proto_flags: models.RequestProtocolFlags = attrs.field( + init=False, factory=models.RequestProtocolFlags + ) + + def __attrs_post_init__(self) -> None: + self.logger = logger.ConnectionContextLogger().get(self.name) + if self.hook_on_connect is not None: + self._request_proto_flags.call_connect = True + self._request_proto_flags.reply_connect = self._hook_needs_reply( + self.hook_on_connect + ) + if self.hook_on_helo is not None: + self._request_proto_flags.call_helo = True + self._request_proto_flags.reply_helo = self._hook_needs_reply( + self.hook_on_helo + ) + if self.hook_on_mail_from is not None: + self._request_proto_flags.call_mail_from = True + self._request_proto_flags.reply_mail_from = self._hook_needs_reply( + self.hook_on_mail_from + ) + if self.hook_on_rcpt_to is not None: + self._request_proto_flags.call_rcpt_to = True + self._request_proto_flags.reply_rcpt_to = self._hook_needs_reply( + self.hook_on_rcpt_to + ) + self._request_proto_flags.call_rcpt_to_rejected = ( + self.on_rcpt_to_include_rejected + ) + if self.hook_on_data is not None: + self._request_proto_flags.call_data = True + self._request_proto_flags.reply_data = self._hook_needs_reply( + self.hook_on_data + ) + if self.hook_on_header is not None: + self._request_proto_flags.call_headers = True + self._request_proto_flags.reply_headers = self._hook_needs_reply( + self.hook_on_header + ) + if self.hook_on_end_of_headers is not None: + self._request_proto_flags.call_end_of_headers = True + self._request_proto_flags.reply_end_of_headers = self._hook_needs_reply( + self.hook_on_end_of_headers + ) + if self.hook_on_body_chunk is not None: + self._request_proto_flags.call_body_chunk = True + self._request_proto_flags.reply_body_chunk = self._hook_needs_reply( + self.hook_on_body_chunk + ) + # Note: responses cannot be disabled/enabled to End of message (always enabled), + # Abort (always disabled) and Quit (always disabled). + if self.hook_on_unknown is not None: + self._request_proto_flags.call_unknown = True + self._request_proto_flags.reply_unknown = self._hook_needs_reply( + self.hook_on_unknown + ) + self._request_proto_flags.can_specify_macros = bool(self.restrict_symbols) + if self.restrict_symbols is None: + self.restrict_symbols = {} + self._request_proto_flags.headers_with_leading_space = ( + self.headers_with_leading_space + ) + self._request_proto_flags.can_add_headers = self.can_add_headers + self._request_proto_flags.can_add_recipients = self.can_add_recipients + self._request_proto_flags.can_add_recipients_with_esmtp_args = ( + self.can_add_recipients_with_esmtp_args + ) + self._request_proto_flags.can_change_body = self.can_change_body + self._request_proto_flags.can_change_headers = self.can_change_headers + self._request_proto_flags.can_change_mail_from = self.can_change_mail_from + self._request_proto_flags.can_remove_recipients = self.can_remove_recipients + self._request_proto_flags.can_quarantine = self.can_quarantine + + def _hook_needs_reply( + self, + hook: Callable[[Any], Any], + ) -> bool: + hints = typing.get_type_hints(hook) + if "return" not in hints: + raise ProgrammingError( + f"Please annotate the return type for hook {hook.__name__}()." + ) + # Fails flake8 check, but isinstance check on NoneType is not working. + return hints.get("return") is not types.NoneType # noqa: E721 + + def _get_factory(self) -> interfaces.MilterAppFactory: + """ + Create a factory for the connection handler to call on every new connection. + Instead of this being the factory, the "factory of factory" pattern allows for + passing parameters not known / not relevant at this level. + """ + hook_on_connect = self.hook_on_connect + hook_on_helo = self.hook_on_helo + hook_on_mail_from = self.hook_on_mail_from + hook_on_rcpt_to = self.hook_on_rcpt_to + hook_on_data = self.hook_on_data + hook_on_header = self.hook_on_header + hook_on_end_of_headers = self.hook_on_end_of_headers + hook_on_body_chunk = self.hook_on_body_chunk + hook_on_end_of_message = self.hook_on_end_of_message + hook_on_abort = self.hook_on_abort + hook_on_quit = self.hook_on_quit + hook_on_unknown = self.hook_on_unknown + logger_name_ = self.name + request_proto_flags = self._request_proto_flags + assert self.restrict_symbols is not None + symbols_ = self.restrict_symbols + + @attrs.define() + class BaseMilter(interfaces.AbstractMilterApp): + _session: interfaces.AbstractMtaMilterSession = attrs.field() + logger_name: ClassVar[str] = logger_name_ + protocol_flags: ClassVar[models.RequestProtocolFlags] = request_proto_flags + symbols: ClassVar[dict[definitions.MacroStage, set[str]]] = symbols_ + + def __attrs_post_init__(self) -> None: + self.logger = logger.ConnectionContextLogger().get(self.logger_name) + + async def on_connect( + self, command: commands.Connect + ) -> responses.VerdictOrContinue | None: + if hook_on_connect is None: + return None + # Have to add specific type in assignment here or else reveal_type() + # shows this as Any? 🤔 + ret: responses.VerdictOrContinue | None = await hook_on_connect(command) + return ret + + async def on_helo( + self, command: commands.Helo + ) -> responses.VerdictOrContinue | None: + if hook_on_helo is None: + return None + ret: responses.VerdictOrContinue | None = await hook_on_helo(command) + return ret + + async def on_mail_from( + self, command: commands.MailFrom + ) -> responses.VerdictOrContinue | None: + if hook_on_mail_from is None: + return None + ret: responses.VerdictOrContinue | None = await hook_on_mail_from( + command + ) + return ret + + async def on_rcpt_to( + self, command: commands.RcptTo + ) -> responses.VerdictOrContinue | None: + if hook_on_rcpt_to is None: + return None + ret: responses.VerdictOrContinue | None = await hook_on_rcpt_to(command) + return ret + + async def on_data( + self, command: commands.Data + ) -> responses.VerdictOrContinue | None: + if hook_on_data is None: + return None + ret: responses.VerdictOrContinue | None = await hook_on_data(command) + return ret + + async def on_header( + self, command: commands.Header + ) -> responses.VerdictOrContinue | None: + if hook_on_header is None: + return None + ret: responses.VerdictOrContinue | None = await hook_on_header(command) + return ret + + async def on_end_of_headers( + self, command: commands.EndOfHeaders + ) -> responses.VerdictOrContinue | None: + if hook_on_end_of_headers is None: + return None + ret: responses.VerdictOrContinue | None = await hook_on_end_of_headers( + command + ) + return ret + + async def on_body_chunk( + self, command: commands.BodyChunk + ) -> responses.VerdictOrContinue | responses.SkipToNextStage | None: + if hook_on_body_chunk is None: + return None + ret: responses.VerdictOrContinue | responses.SkipToNextStage | None = ( + await hook_on_body_chunk(command) + ) + return ret + + async def on_end_of_message( + self, command: commands.EndOfMessage + ) -> responses.AbstractResponse: + # Note: ensures that a None-response by the app gets translated into a + # Continue response. + if hook_on_end_of_message is None: + return responses.Continue() + ret: responses.AbstractResponse | None = await hook_on_end_of_message( + command + ) + if ret is None: + return responses.Continue() + return ret + + async def on_abort(self, command: commands.Abort) -> None: + if hook_on_abort is not None: + await hook_on_abort(command) + + async def on_quit(self, command: commands.Quit) -> None: + if hook_on_quit is not None: + await hook_on_quit(command) + + async def on_unknown( + self, command: commands.Unknown + ) -> responses.VerdictOrContinue | None: + if hook_on_unknown is None: + return None + ret: responses.VerdictOrContinue | None = await hook_on_unknown(command) + return ret + + async def on_mta_close_connection(self) -> None: + self.logger.debug("on_mta_close_connection") + + async def close_connection(self) -> None: + self.logger.debug("close_connection") + + async def send_progress(self) -> None: + self.logger.debug("send_progress") + + return BaseMilter # pyright: ignore PylancereportGeneralTypeIssues + + async def start_server(self, *, host: str, port: int) -> None: + if self._milterserver: + raise RuntimeError("You can only start this app once.") + srv = milterserver.MilterServer( + app_factory=self._get_factory() # pyright: ignore PylancereportGeneralTypeIssues # noqa: E501 + ) + self._milterserver = srv + await srv.start_server(host=host, port=port) + self._milterserver = None + + def run_server(self, *, host: str, port: int) -> None: + asyncio.run(self.start_server(host=host, port=port)) diff --git a/src/purepythonmilter/api/interfaces.py b/src/purepythonmilter/api/interfaces.py new file mode 100644 index 0000000..eaa2c39 --- /dev/null +++ b/src/purepythonmilter/api/interfaces.py @@ -0,0 +1,200 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import abc +import asyncio +import logging +import typing + +import attrs + +from purepythonmilter.protocol import definitions +from purepythonmilter.protocol.responses import AbstractResponse + +from ..api.models import MilterServerConnectionID, RequestProtocolFlags +from ..protocol import commands, payload, responses + + +class AbstractMtaMilterConnectionHandler(abc.ABC): + _connection_id: MilterServerConnectionID + _reader: asyncio.StreamReader + _writer: asyncio.StreamWriter + app_factory: MilterAppFactory + _session: AbstractMtaMilterSession + _closed: bool + logger: logging.LoggerAdapter[logging.Logger] + + @property + @abc.abstractmethod + def id(self) -> MilterServerConnectionID: + ... # pragma: nocover + + @abc.abstractmethod + async def keep_reading_packets(self) -> None: + ... # pragma: nocover + + @abc.abstractmethod + async def write_response( + self, payload: payload.Payload, *, drain: bool = False + ) -> None: + ... # pragma: nocover + + @abc.abstractmethod + async def close_bottom_up(self) -> None: + ... # pragma: nocover + + @abc.abstractmethod + async def close_top_down(self) -> None: + ... # pragma: nocover + + @abc.abstractmethod + def session_error_callback(self, *, exception: BaseException) -> None: + ... # pragma: nocover + + +@attrs.define(kw_only=True) +class QueueEntry: + command: commands.BaseCommand + done_event: asyncio.Event = attrs.field(factory=asyncio.Event) + + +class AbstractMtaMilterSession(abc.ABC): + _socket_connection: AbstractMtaMilterConnectionHandler + _incoming_command_queue: asyncio.Queue[QueueEntry | None] + _commands_consumer_task: asyncio.Task[typing.Any] + _app: AbstractMilterApp + + @abc.abstractmethod + async def on_options_negotiate(self, command: commands.OptionsNegotiate) -> None: + ... # pragma: nocover + + @abc.abstractmethod + def queue_command(self, command: commands.BaseCommand) -> asyncio.Event: + """ + Queues the command, returns an Event which will be set on completion of the + processing including writing the response data (or exception thrown on error). + + done_event = queue_command(cmd) + await done_event.wait() + """ + ... # pragma: nocover + + @abc.abstractmethod + async def _commands_consumer(self) -> None: + ... # pragma: nocover + + @abc.abstractmethod + async def _send_response(self, response: AbstractResponse) -> None: + ... # pragma: nocover + + @abc.abstractmethod + async def close_bottom_up(self) -> None: + ... # pragma: nocover + + @abc.abstractmethod + async def close_top_down(self) -> None: + ... # pragma: nocover + + +class AbstractMilterApp(abc.ABC): + _session: AbstractMtaMilterSession + protocol_flags: typing.ClassVar[RequestProtocolFlags] + symbols: typing.ClassVar[dict[definitions.MacroStage, set[str]]] + + @abc.abstractmethod + async def on_connect( + self, command: commands.Connect + ) -> responses.VerdictOrContinue | None: + ... # pragma: nocover + + @abc.abstractmethod + async def on_helo( + self, command: commands.Helo + ) -> responses.VerdictOrContinue | None: + ... # pragma: nocover + + @abc.abstractmethod + async def on_mail_from( + self, command: commands.MailFrom + ) -> responses.VerdictOrContinue | None: + ... # pragma: nocover + + @abc.abstractmethod + async def on_rcpt_to( + self, command: commands.RcptTo + ) -> responses.VerdictOrContinue | None: + ... # pragma: nocover + + @abc.abstractmethod + async def on_data( + self, command: commands.Data + ) -> responses.VerdictOrContinue | None: + ... # pragma: nocover + + @abc.abstractmethod + async def on_header( + self, command: commands.Header + ) -> responses.VerdictOrContinue | None: + ... # pragma: nocover + + @abc.abstractmethod + async def on_end_of_headers( + self, command: commands.EndOfHeaders + ) -> responses.VerdictOrContinue | None: + ... # pragma: nocover + + @abc.abstractmethod + async def on_body_chunk( + self, command: commands.BodyChunk + ) -> responses.VerdictOrContinue | responses.SkipToNextStage | None: + ... # pragma: nocover + + @abc.abstractmethod + async def on_end_of_message( + self, command: commands.EndOfMessage + ) -> responses.AbstractResponse: + """ + End of message callback is always called and requires a final response as + mandated by the protocol. + Returning None from the hook should imply a Continue Response (including pending + message manipulations). + """ + ... # pragma: nocover + + @abc.abstractmethod + async def on_abort(self, command: commands.Abort) -> None: + ... # pragma: nocover + + @abc.abstractmethod + async def on_quit(self, command: commands.Quit) -> None: + ... # pragma: nocover + + @abc.abstractmethod + async def on_unknown( + self, command: commands.Unknown + ) -> responses.VerdictOrContinue | None: + ... # pragma: nocover + + @abc.abstractmethod + async def on_mta_close_connection(self) -> None: + """Called when closing the chain bottom-up.""" + ... # pragma: nocover + + @abc.abstractmethod + async def close_connection(self) -> None: + """Request to close the Milter connection; top-down the chain.""" + ... # pragma: nocover + + @abc.abstractmethod + async def send_progress(self) -> None: + """Send an intermediate Progress response.""" + ... # pragma: nocover + + +class MilterAppFactory(typing.Protocol): + @staticmethod + def __call__(*, session: AbstractMtaMilterSession) -> AbstractMilterApp: + ... # pragma: nocover diff --git a/src/purepythonmilter/api/logger.py b/src/purepythonmilter/api/logger.py new file mode 100644 index 0000000..1a4f48c --- /dev/null +++ b/src/purepythonmilter/api/logger.py @@ -0,0 +1,70 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import logging +from collections.abc import MutableMapping +from typing import TYPE_CHECKING, Any, TypeAlias + +from . import models + +LoggingKwargs: TypeAlias = MutableMapping[str, Any] + + +# https://github.com/python/typeshed/issues/7855 +if TYPE_CHECKING: + _LoggerAdapterType = logging.LoggerAdapter[logging.Logger] # pragma: nocover +else: + _LoggerAdapterType = logging.LoggerAdapter + + +def _get_connection_id_or_none() -> models.MilterServerConnectionID | None: + try: + return models.connection_id_context.get() + except LookupError: + return None + + +class _LoggerAdapter(_LoggerAdapterType): + def _format_context_trailer(self) -> str: + assert hasattr(self, "extra") and self.extra is not None + printable_contexts: dict[str, str] = {k: str(v) for k, v in self.extra.items()} + del printable_contexts["connection_id"] + if not printable_contexts: + return "" + keyvalues: list[str] = [f"{k}={v}" for k, v in printable_contexts.items()] + keyvalues_str = ", ".join(keyvalues) + return f" [{keyvalues_str}]" + + def process(self, msg: Any, kwargs: LoggingKwargs) -> tuple[Any, LoggingKwargs]: + assert hasattr(self, "extra") and self.extra is not None + # If we get instantiated with a connection ID context, let's use that. + # Otherwise, try again at process time to obtain it. + match (connection_id := self.extra.get("connection_id")): + case models.MilterServerConnectionID(): + # help mypy (0.991) + # error: "object" has no attribute "short" [attr-defined] + assert isinstance(connection_id, models.MilterServerConnectionID) + connection_id_short = connection_id.short + case _: + if (connection_id_now := _get_connection_id_or_none()) is None: + connection_id_short = "NONE" + else: + connection_id_short = connection_id_now.short + return f"{connection_id_short}: {msg}{self._format_context_trailer()}", kwargs + + +class ConnectionContextLogger: + def get( + self, + name: str, + *, + extra_contexts: dict[str, Any] | None = None, + ) -> logging.LoggerAdapter[logging.Logger]: + _extra: dict[str, Any] = ( + dict() if extra_contexts is None else extra_contexts.copy() + ) + _extra["connection_id"] = _get_connection_id_or_none() + return _LoggerAdapter(logging.getLogger(name), _extra) diff --git a/src/purepythonmilter/api/models.py b/src/purepythonmilter/api/models.py new file mode 100644 index 0000000..602ba90 --- /dev/null +++ b/src/purepythonmilter/api/models.py @@ -0,0 +1,339 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import ipaddress +import uuid +from contextvars import ContextVar + +import attrs + +from ..protocol import definitions + + +class MilterServerConnectionID(uuid.UUID): + @property + def short(self) -> str: + return self.shorten() + + def shorten(self, length: int = 8) -> str: + return str(self)[:length] + + @classmethod + def generate(cls) -> MilterServerConnectionID: + return MilterServerConnectionID(bytes=uuid.uuid4().bytes) + + +connection_id_context: ContextVar[MilterServerConnectionID] = ContextVar( + "connection_id" +) + + +@attrs.define(kw_only=True) +class RequestProtocolFlags: + """ + Default values reflect the very minimum / most optimized negotiation. + Your Milter app won't see much other than End of message callback (mandatory). + """ + + call_connect: bool = False + call_helo: bool = False + call_mail_from: bool = False + call_rcpt_to: bool = False + call_rcpt_to_rejected: bool = False + call_data: bool = False + call_headers: bool = False + call_end_of_headers: bool = False + call_body_chunk: bool = False + call_unknown: bool = False + + reply_connect: bool = False + reply_helo: bool = False + reply_mail_from: bool = False + reply_rcpt_to: bool = False + reply_data: bool = False + reply_headers: bool = False + reply_end_of_headers: bool = False + reply_body_chunk: bool = False + reply_unknown: bool = False + + can_change_mail_from: bool = False + can_add_headers: bool = False + can_change_headers: bool = False + can_change_body: bool = False + can_add_recipients: bool = False + can_add_recipients_with_esmtp_args: bool = False + can_remove_recipients: bool = False + can_quarantine: bool = False + can_specify_macros: bool = True + can_skip_body_chunks: bool = True + + headers_with_leading_space: bool = False + + def encode_to_flags_bitmask(self) -> tuple[int, int]: + protocol_flags, action_flags = 0, 0 + if not self.call_connect: + protocol_flags |= definitions.ProtocolFlagsDisableCallback.CONNECT.value + if not self.call_helo: + protocol_flags |= definitions.ProtocolFlagsDisableCallback.HELO.value + if not self.call_mail_from: + protocol_flags |= definitions.ProtocolFlagsDisableCallback.MAIL_FROM.value + if not self.call_rcpt_to: + protocol_flags |= definitions.ProtocolFlagsDisableCallback.RCPT_TO.value + if self.call_rcpt_to_rejected: + protocol_flags |= ( + definitions.ProtocolFlagsOther.SEND_REJECTED_RCPT_TOS.value + ) + if not self.call_data: + protocol_flags |= definitions.ProtocolFlagsDisableCallback.DATA.value + if not self.call_headers: + protocol_flags |= definitions.ProtocolFlagsDisableCallback.HEADERS.value + if not self.call_end_of_headers: + protocol_flags |= ( + definitions.ProtocolFlagsDisableCallback.END_OF_HEADERS.value + ) + if not self.call_body_chunk: + protocol_flags |= definitions.ProtocolFlagsDisableCallback.BODY.value + if not self.call_unknown: + protocol_flags |= definitions.ProtocolFlagsDisableCallback.UNKNOWN.value + + if not self.reply_connect: + protocol_flags |= ( + definitions.ProtocolFlagsDisableCallback.REPLY_CONNECTION.value + ) + if not self.reply_helo: + protocol_flags |= definitions.ProtocolFlagsDisableCallback.REPLY_HELO.value + if not self.reply_mail_from: + protocol_flags |= ( + definitions.ProtocolFlagsDisableCallback.REPLY_MAIL_FROM.value + ) + if not self.reply_rcpt_to: + protocol_flags |= ( + definitions.ProtocolFlagsDisableCallback.REPLY_RCPT_TO.value + ) + if not self.reply_data: + protocol_flags |= definitions.ProtocolFlagsDisableCallback.REPLY_DATA.value + if not self.reply_headers: + protocol_flags |= ( + definitions.ProtocolFlagsDisableCallback.REPLY_HEADERS.value + ) + if not self.reply_end_of_headers: + protocol_flags |= ( + definitions.ProtocolFlagsDisableCallback.REPLY_END_OF_HEADERS.value + ) + if not self.reply_body_chunk: + protocol_flags |= ( + definitions.ProtocolFlagsDisableCallback.REPLY_BODY_CHUNK.value + ) + if not self.reply_unknown: + protocol_flags |= ( + definitions.ProtocolFlagsDisableCallback.REPLY_UNKNOWN.value + ) + + if self.can_change_mail_from: + action_flags |= definitions.ActionFlags.CHANGE_ENVELOPE_FROM.value + if self.can_add_headers: + action_flags |= definitions.ActionFlags.ADD_HEADERS.value + if self.can_change_headers: + action_flags |= definitions.ActionFlags.CHANGE_HEADERS.value + if self.can_change_body: + action_flags |= definitions.ActionFlags.CHANGE_BODY.value + if self.can_add_recipients: + action_flags |= definitions.ActionFlags.ADD_RECIPIENTS.value + if self.can_add_recipients_with_esmtp_args: + action_flags |= definitions.ActionFlags.ADD_RECIPIENT_ESMTP_ARGS.value + if self.can_remove_recipients: + action_flags |= definitions.ActionFlags.REMOVE_RECIPIENTS.value + if self.can_quarantine: + action_flags |= definitions.ActionFlags.QUARANTINE.value + if self.can_specify_macros: + action_flags |= definitions.ActionFlags.SET_MACROS_LIST.value + if self.can_skip_body_chunks: + protocol_flags |= definitions.ProtocolFlagsOther.SKIP.value + + if self.headers_with_leading_space: + protocol_flags |= ( + definitions.ProtocolFlagsOther.HEADER_VALUE_LEADING_SPACE.value + ) + + return protocol_flags, action_flags + + +@attrs.define(kw_only=True) +class MtaSupportsProtocolFlags: + disable_call_connect: bool + disable_call_helo: bool + disable_call_mail_from: bool + disable_call_rcpt_to: bool + disable_call_rcpt_to_rejected: bool + disable_call_data: bool + disable_call_headers: bool + disable_call_end_of_headers: bool + disable_call_body_chunk: bool + disable_call_unknown: bool + + disable_reply_connect: bool + disable_reply_helo: bool + disable_reply_mail_from: bool + disable_reply_rcpt_to: bool + disable_reply_data: bool + disable_reply_headers: bool + disable_reply_end_of_headers: bool + disable_reply_body_chunk: bool + disable_reply_unknown: bool + + allows_change_mail_from: bool + allows_add_headers: bool + allows_change_headers: bool + allows_change_body: bool + allows_add_recipients: bool + allows_add_recipients_with_esmtp_args: bool + allows_remove_recipients: bool + allows_quarantine: bool + allows_specify_macros: bool + allows_skip_body_chunks: bool + + headers_with_leading_space: bool + + @classmethod + def from_binary_flags( + cls, *, protocol_flags: int, action_flags: int + ) -> MtaSupportsProtocolFlags: + return MtaSupportsProtocolFlags( + disable_call_connect=bool( + protocol_flags & definitions.ProtocolFlagsDisableCallback.CONNECT.value + ), + disable_call_helo=bool( + protocol_flags & definitions.ProtocolFlagsDisableCallback.HELO.value + ), + disable_call_mail_from=bool( + protocol_flags + & definitions.ProtocolFlagsDisableCallback.MAIL_FROM.value + ), + disable_call_rcpt_to=bool( + protocol_flags & definitions.ProtocolFlagsDisableCallback.RCPT_TO.value + ), + disable_call_rcpt_to_rejected=bool( + protocol_flags + & definitions.ProtocolFlagsOther.SEND_REJECTED_RCPT_TOS.value + ), + disable_call_data=bool( + protocol_flags & definitions.ProtocolFlagsDisableCallback.DATA.value + ), + disable_call_headers=bool( + protocol_flags & definitions.ProtocolFlagsDisableCallback.HEADERS.value + ), + disable_call_end_of_headers=bool( + protocol_flags + & definitions.ProtocolFlagsDisableCallback.END_OF_HEADERS.value + ), + disable_call_body_chunk=bool( + protocol_flags & definitions.ProtocolFlagsDisableCallback.BODY.value + ), + disable_call_unknown=bool( + protocol_flags & definitions.ProtocolFlagsDisableCallback.UNKNOWN.value + ), + disable_reply_connect=bool( + protocol_flags + & definitions.ProtocolFlagsDisableCallback.REPLY_CONNECTION.value + ), + disable_reply_helo=bool( + protocol_flags + & definitions.ProtocolFlagsDisableCallback.REPLY_HELO.value + ), + disable_reply_mail_from=bool( + protocol_flags + & definitions.ProtocolFlagsDisableCallback.REPLY_MAIL_FROM.value + ), + disable_reply_rcpt_to=bool( + protocol_flags + & definitions.ProtocolFlagsDisableCallback.REPLY_RCPT_TO.value + ), + disable_reply_data=bool( + protocol_flags + & definitions.ProtocolFlagsDisableCallback.REPLY_DATA.value + ), + disable_reply_headers=bool( + protocol_flags + & definitions.ProtocolFlagsDisableCallback.REPLY_HEADERS.value + ), + disable_reply_end_of_headers=bool( + protocol_flags + & definitions.ProtocolFlagsDisableCallback.REPLY_END_OF_HEADERS.value + ), + disable_reply_body_chunk=bool( + protocol_flags + & definitions.ProtocolFlagsDisableCallback.REPLY_BODY_CHUNK.value + ), + disable_reply_unknown=bool( + protocol_flags + & definitions.ProtocolFlagsDisableCallback.REPLY_UNKNOWN.value + ), + allows_change_mail_from=bool( + action_flags & definitions.ActionFlags.CHANGE_ENVELOPE_FROM.value + ), + allows_add_headers=bool( + action_flags & definitions.ActionFlags.ADD_HEADERS.value + ), + allows_change_headers=bool( + action_flags & definitions.ActionFlags.CHANGE_HEADERS.value + ), + allows_change_body=bool( + action_flags & definitions.ActionFlags.CHANGE_BODY.value + ), + allows_add_recipients=bool( + action_flags & definitions.ActionFlags.ADD_RECIPIENTS.value + ), + allows_add_recipients_with_esmtp_args=bool( + action_flags & definitions.ActionFlags.ADD_RECIPIENT_ESMTP_ARGS.value + ), + allows_remove_recipients=bool( + action_flags & definitions.ActionFlags.REMOVE_RECIPIENTS.value + ), + allows_quarantine=bool( + action_flags & definitions.ActionFlags.QUARANTINE.value + ), + allows_specify_macros=bool( + action_flags & definitions.ActionFlags.SET_MACROS_LIST.value + ), + allows_skip_body_chunks=bool( + protocol_flags & definitions.ProtocolFlagsOther.SKIP.value + ), + headers_with_leading_space=bool( + protocol_flags + & definitions.ProtocolFlagsOther.HEADER_VALUE_LEADING_SPACE.value + ), + ) + + +class ConnectionInfoArgs: + ... + + +@attrs.define(kw_only=True, slots=True, frozen=True) +class ConnectionInfoArgsUnixSocket(ConnectionInfoArgs): + path: str + + +@attrs.define(kw_only=True, slots=True, frozen=True) +class ConnectionInfoArgsIPv4(ConnectionInfoArgs): + hostname: str + addr: ipaddress.IPv4Address + port: int + + +@attrs.define(kw_only=True, slots=True, frozen=True) +class ConnectionInfoArgsIPv6(ConnectionInfoArgs): + hostname: str + addr: ipaddress.IPv6Address + port: int + + +@attrs.define(kw_only=True, slots=True, frozen=True) +class ConnectionInfoUnknown(ConnectionInfoArgs): + description: str + + +EsmtpArgsType = dict[str, str | None] diff --git a/src/purepythonmilter/examples/README.md b/src/purepythonmilter/examples/README.md new file mode 100644 index 0000000..7e109d2 --- /dev/null +++ b/src/purepythonmilter/examples/README.md @@ -0,0 +1,126 @@ + + +# Run an example Milter app with Postfix in Docker + +These steps will guide you to run an example app locally together with a Postfix +instance running in a Docker container. + +## Requirements + +- Have a modern Python at your current `PATH` (e.g. Python 3.10+). + - Alternatively, obtain a modern Python and follow the steps below from a virtualenv + using Pyenv+Direnv with the supplied `.envrc`; that's up to you. +- Docker installed with privileges to build and run containers as the current user on + the system. +- Internet access for building the container that pulls in the base layer and downloads + packages. +- Assumption of a default Docker network set up at `172.17.0.1/16` (for the steps below, + or else adjust accordingly). +- No firewall in the way blocking connections from the Postfix container to your host. +- Have GNU Make (`make`) installed. + +## Steps + +1. Install Purepythonmilter, e.g. like this from sources: + + ```console + $ git clone https://github.com/gertvdijk/purepythonmilter.git + $ cd purepythonmilter + + # N.B. You may want to create and activate a Python virtualenv at this point. + + # This installs the package at the current location (`.`) with the `examples` option + # to indicate extra dependencies to be installed. + $ python -m pip install -e .[examples] + ``` + +1. Run an example Milter app, bound to the Docker default network bridge interface with + debug logging enabled. + + ```console + $ python -m purepythonmilter.examples.debug_log_all \ + --bind-host 172.17.0.1 \ + --log-level=INFO + ``` + + 💡 Change `--log-level=INFO` to `--log-level=DEBUG` and be ready to get a lot of + output, perhaps relevant when testing. + +1. Use the helpers in [`postfixtest/`](../../../postfixtest/) to build and run a Postfix + instance in Docker. + This will start a Postfix container named `purepythonmilter-postfixtest` and runs it + in the foreground. + + ```console + $ make -C postfixtest + ``` + + Wait until Postfix is ready, e.g.: when a line like this is printed: + + ``` + postfix/master[1]: daemon started -- version 3.5.6, configuration /etc/postfix + ``` + +1. Send an email to `user@test.local` using your mail client (e.g. Thunderbird as SMTP + outgoing server or [SWAKS][github-swaks]) to submit email to this Postfix instance. + + To find out what is the IP of the container, you can use the following command: + + ```console + $ make --silent -C postfixtest get-ipv4 + ``` + + One-liner to use SWAKS: + + ```console + $ swaks --to user@test.local --server $(make --silent -C postfixtest get-ipv4) + ``` + +1. Observe the output in the terminal where you started the Milter in step 2 as well as + the output of Postfix. + + The Milter app should print now something like: + + ``` + INFO:purepythonmilter.server.milterserver:Milter server started, awaiting connections... + INFO:debug_log_all:bb5bec76: On connect: args=ConnectionInfoArgsIPv4(hostname='[172.17.0.1]', + addr=IPv4Address('172.17.0.1'), port=53286), macros={'j': ... + INFO:debug_log_all:bb5bec76: On HELO: hostname=[172.17.0.1], macros={} + INFO:debug_log_all:bb5bec76: On MAIL FROM: address=github@gertvandijk.nl, + esmtp_args={'BODY': '8BITMIME', 'SIZE': ... + INFO:debug_log_all:bb5bec76: On RCPT TO: address=user@test.local, esmtp_args={}, ... + [...] + INFO:debug_log_all:bb5bec76: On abort + INFO:debug_log_all:bb5bec76: On quit + ``` + + You will still see errors on relaying the mail to `test.local`, but that does not + exist and you can safely ignore that. + It does not matter for the Milter. + +1. Both the Postfix container and the Milter app can be stopped by pressing + `` + `C` in the terminal where they're running. + +You have now seen a demonstration of the hooks that the Milter gives you with the +'debug_log_all' example app. 🎉 + +If you don't have a mail client at hand, you could use plain 'Old School' *telnet* too: + +1. In another terminal, initiate a connection to Postfix using `telnet`. + Note that the container has been given a dynamic IP address by Docker. + ```console + $ telnet "$(make --silent -C postfixtest get-ipv4)" 25 + ``` +1. Observe the output in the terminal... (as above). +1. Back in the terminal where you started `telnet`, stop the connection to Postfix by + either: + - Type `QUIT` followed by pressing ``, or + - Quit the telnet client using the printed escape sequence (e.g. `` + `]`), + followed by typing `quit` and pressing ``. + + +[github-swaks]: https://github.com/jetmore/swaks diff --git a/src/purepythonmilter/examples/__init__.py b/src/purepythonmilter/examples/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/purepythonmilter/examples/append_header_ip/__init__.py b/src/purepythonmilter/examples/append_header_ip/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/purepythonmilter/examples/append_header_ip/__main__.py b/src/purepythonmilter/examples/append_header_ip/__main__.py new file mode 100644 index 0000000..f715ce5 --- /dev/null +++ b/src/purepythonmilter/examples/append_header_ip/__main__.py @@ -0,0 +1,90 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import logging + +import click + +from purepythonmilter import ( + DEFAULT_LISTENING_TCP_IP, + DEFAULT_LISTENING_TCP_PORT, + AppendHeader, + Connect, + ConnectionInfoArgsIPv4, + ConnectionInfoArgsIPv6, + Continue, + PurePythonMilter, +) + +logger: logging.LoggerAdapter[logging.Logger] # assigned below +_headername: str = "X-UNSET" # global overridden in commandline parsing + + +async def on_connect(cmd: Connect) -> Continue: + """ + Demonstration: add a header without the need to implement an end_of_message + callback, which would be required normally as mandated by the protocol. + The MtaMilterSession will keep track of desired message manipulations and apply them + at the later approriate end_of_message stage for you. + """ + global _headername, logger + match cmd.connection_info_args: + case ConnectionInfoArgsIPv4() | ConnectionInfoArgsIPv6(): + ip = str(cmd.connection_info_args.addr) + logger.info(f"on_connect(): adding header '{_headername}: {ip}'") + return Continue( + manipulations=[AppendHeader(headername=_headername, headertext=ip)] + ) + case _: + logger.warning( + "on_connect(): connection socket family is not IP, skip adding header " + f"{cmd.connection_info_args}" + ) + return Continue() + + +append_header_ip_milter = PurePythonMilter( + name="append_header_ip", + hook_on_connect=on_connect, + can_add_headers=True, +) +logger = append_header_ip_milter.logger + + +# Below is just mostly boilerplate for command line parsing. +@click.command( + context_settings=dict( + show_default=True, + max_content_width=200, + auto_envvar_prefix="PUREPYTHONMILTER", + ) +) +@click.option("--bind-host", default=DEFAULT_LISTENING_TCP_IP, show_envvar=True) +@click.option("--bind-port", default=DEFAULT_LISTENING_TCP_PORT, show_envvar=True) +@click.option( + "--log-level", + type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False), + default="INFO", + show_envvar=True, +) +@click.version_option(package_name="purepythonmilter", message="%(version)s") +@click.option("--headername", default="X-MilterExample-Connect-IP", show_envvar=True) +def main(*, bind_host: str, bind_port: int, log_level: str, headername: str) -> None: + """ + This Milter app appends a header with the value of the connecting IP. + + \b + By default it adds it like this: + X-MilterExample-Connect-IP: 1.2.3.4 + """ + global _headername + logging.basicConfig(level=getattr(logging, log_level)) + _headername = headername + append_header_ip_milter.run_server(host=bind_host, port=bind_port) + + +if __name__ == "__main__": + main() diff --git a/src/purepythonmilter/examples/change_body/__init__.py b/src/purepythonmilter/examples/change_body/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/purepythonmilter/examples/change_body/__main__.py b/src/purepythonmilter/examples/change_body/__main__.py new file mode 100644 index 0000000..3358f9c --- /dev/null +++ b/src/purepythonmilter/examples/change_body/__main__.py @@ -0,0 +1,66 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import logging + +import click + +from purepythonmilter import ( + DEFAULT_LISTENING_TCP_IP, + DEFAULT_LISTENING_TCP_PORT, + Continue, + EndOfMessage, + PurePythonMilter, + ReplaceBodyChunk, +) + +logger: logging.LoggerAdapter[logging.Logger] # assigned below +_newbody: str = "foobar" # global overridden in commandline parsing + + +async def on_end_of_message(cmd: EndOfMessage) -> Continue: + global _newbody + return Continue(manipulations=[ReplaceBodyChunk(chunk=_newbody.encode())]) + + +change_body_milter = PurePythonMilter( + name="change_body", + hook_on_end_of_message=on_end_of_message, + can_change_body=True, +) +logger = change_body_milter.logger + + +# Below is just mostly boilerplate for command line parsing. +@click.command( + context_settings=dict( + show_default=True, + max_content_width=200, + auto_envvar_prefix="PUREPYTHONMILTER", + ) +) +@click.option("--bind-host", default=DEFAULT_LISTENING_TCP_IP, show_envvar=True) +@click.option("--bind-port", default=DEFAULT_LISTENING_TCP_PORT, show_envvar=True) +@click.option( + "--log-level", + type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False), + default="INFO", + show_envvar=True, +) +@click.version_option(package_name="purepythonmilter", message="%(version)s") +@click.option("--newbody", default="foobar", show_envvar=True) +def main(*, bind_host: str, bind_port: int, log_level: str, newbody: str) -> None: + """ + This Milter replaces the body with the value given in the `--newbody` parameter. + """ + global _newbody + logging.basicConfig(level=getattr(logging, log_level)) + _newbody = newbody + change_body_milter.run_server(host=bind_host, port=bind_port) + + +if __name__ == "__main__": + main() diff --git a/src/purepythonmilter/examples/debug_log_all/__init__.py b/src/purepythonmilter/examples/debug_log_all/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/purepythonmilter/examples/debug_log_all/__main__.py b/src/purepythonmilter/examples/debug_log_all/__main__.py new file mode 100644 index 0000000..49f9abd --- /dev/null +++ b/src/purepythonmilter/examples/debug_log_all/__main__.py @@ -0,0 +1,120 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import logging + +import click + +import purepythonmilter +from purepythonmilter import PurePythonMilter + +logger: logging.LoggerAdapter[logging.Logger] + + +async def on_connect(cmd: purepythonmilter.Connect) -> None: + logger.info(f"On connect: args={cmd.connection_info_args}, macros={cmd.macros}") + + +async def on_helo(cmd: purepythonmilter.Helo) -> None: + logger.info(f"On HELO: hostname={cmd.hostname}, macros={cmd.macros}") + + +async def on_mail_from(cmd: purepythonmilter.MailFrom) -> None: + logger.info( + f"On MAIL FROM: address={cmd.address}, esmtp_args={cmd.esmtp_args}, " + f"macros={cmd.macros}" + ) + + +async def on_rcpt_to(cmd: purepythonmilter.RcptTo) -> None: + logger.info( + f"On RCPT TO: address={cmd.address}, esmtp_args={cmd.esmtp_args}, " + f"macros={cmd.macros}" + ) + + +async def on_data(cmd: purepythonmilter.Data) -> None: + logger.info(f"On DATA: macros={cmd.macros}") + + +async def on_header(cmd: purepythonmilter.Header) -> None: + logger.info(f"On header: name={cmd.name} text={cmd.text!r}, macros={cmd.macros}") + + +async def on_end_of_headers(cmd: purepythonmilter.EndOfHeaders) -> None: + logger.info(f"On end of headers: macros={cmd.macros}") + + +async def on_body_chunk(cmd: purepythonmilter.BodyChunk) -> None: + logger.info(f"On body chunk: length={len(cmd.data_raw)}, macros={cmd.macros}") + + +async def on_end_of_message(cmd: purepythonmilter.EndOfMessage) -> None: + logger.info(f"On end of message: macros={cmd.macros}") + + +async def on_abort(cmd: purepythonmilter.Abort) -> None: + logger.info("On abort") + + +async def on_quit(cmd: purepythonmilter.Quit) -> None: + logger.info("On quit") + + +async def on_unknown(cmd: purepythonmilter.Unknown) -> None: + logger.info(f"On unknown command: data_raw={cmd.data_raw!r}") + + +debug_log_all_milter = PurePythonMilter( + name="debug_log_all", + hook_on_connect=on_connect, + hook_on_helo=on_helo, + hook_on_mail_from=on_mail_from, + hook_on_rcpt_to=on_rcpt_to, + hook_on_data=on_data, + hook_on_header=on_header, + hook_on_end_of_headers=on_end_of_headers, + hook_on_body_chunk=on_body_chunk, + hook_on_end_of_message=on_end_of_message, + hook_on_abort=on_abort, + hook_on_quit=on_quit, + hook_on_unknown=on_unknown, + on_rcpt_to_include_rejected=True, + restrict_symbols=None, +) +logger = debug_log_all_milter.logger + + +@click.command( + context_settings=dict( + show_default=True, + max_content_width=200, + auto_envvar_prefix="PUREPYTHONMILTER", + ) +) +@click.option( + "--bind-host", default=purepythonmilter.DEFAULT_LISTENING_TCP_IP, show_envvar=True +) +@click.option( + "--bind-port", default=purepythonmilter.DEFAULT_LISTENING_TCP_PORT, show_envvar=True +) +@click.option( + "--log-level", + type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False), + default="INFO", + show_envvar=True, +) +@click.version_option(package_name="purepythonmilter", message="%(version)s") +def main(*, bind_host: str, bind_port: int, log_level: str) -> None: + """ + This Milter app only logs all events for debugging purposes. + """ + logging.basicConfig(level=getattr(logging, log_level)) + debug_log_all_milter.run_server(host=bind_host, port=bind_port) + + +if __name__ == "__main__": + main() diff --git a/src/purepythonmilter/protocol/__init__.py b/src/purepythonmilter/protocol/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/purepythonmilter/protocol/commands.py b/src/purepythonmilter/protocol/commands.py new file mode 100644 index 0000000..8a48e34 --- /dev/null +++ b/src/purepythonmilter/protocol/commands.py @@ -0,0 +1,520 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import abc +import ipaddress +import logging +import struct +from collections.abc import Mapping +from typing import Any, ClassVar, Literal, TypeAlias + +import attrs + +from ..api import logger, models +from . import definitions +from .exceptions import ProtocolViolationCommandData + +CommandDataRaw: TypeAlias = bytes + +# Filled by BaseCommand.__init_subclass__() +chars_to_command_registry: dict[bytes, type[BaseCommand]] = {} + + +_logger = logging.getLogger(__name__) + + +def _decode_array(data: bytes) -> list[bytes]: + if not data: + return [] + return data.removesuffix(b"\x00").split(b"\x00") + + +# slots=False for any subclass of this, because attrs doesn't play nice with +# __init_subclass__ when enabled. +@attrs.define(auto_attribs=False) +class BaseCommand(abc.ABC): + command_char: ClassVar[bytes] + logger: logging.LoggerAdapter[logging.Logger] = attrs.field( + init=False, eq=False, repr=False + ) + # Attribute 'data_raw' is here to handle any (invalid) data even for commands that + # should not have any and the PacketDecoder does not have to guard about that. + data_raw: CommandDataRaw = attrs.field(default=None) + + def __attrs_post_init__(self) -> None: + self.logger = logger.ConnectionContextLogger().get(__name__) + if self.data_raw is not None and self.data_raw != b"": + raise ProtocolViolationCommandData( + f"Expected no data for command {self.__class__.__name__}" + ) + + def __str__(self) -> str: + return f"{self.__class__.__name__} command [nodata]" + + def __init_subclass__(cls, *args: Any, **kwargs: Any) -> None: + super().__init_subclass__(*args, **kwargs) + if hasattr(cls, "command_char"): + if cmd := chars_to_command_registry.get(cls.command_char): + raise ValueError( + f"Command registration for {cls=!r} failed; command char " + f"{cls.command_char!r} is already registered to {cmd=!r}." + ) + if len(cls.command_char) != 1: + raise ValueError( + f"Command registration for {cls=!r} failed; command char " + f"must be exactly one byte." + ) + _logger.debug(f"registered {cls.command_char!r} to {cmd}") + chars_to_command_registry[cls.command_char] = cls + + +@attrs.define(auto_attribs=False, slots=False) +class BaseCommandWithData(BaseCommand): + data_raw: CommandDataRaw = attrs.field() + + def __attrs_post_init__(self) -> None: + self.logger = logger.ConnectionContextLogger().get(__name__) + self._decode() + + @abc.abstractmethod + def _decode(self) -> None: + ... # pragma: nocover + + def __str__(self) -> str: + return f"{self.__class__.__name__} command [data=<{len(self.data_raw)} bytes>]" + + +@attrs.define(auto_attribs=False, slots=False) +class OptionsNegotiate(BaseCommandWithData): + command_char: ClassVar[bytes] = b"O" # SMFIC_OPTNEG + flags: models.MtaSupportsProtocolFlags = attrs.field(init=False) + + def _decode(self) -> None: + self.logger.debug(f"decoding options negotiate {self.data_raw.hex()=}") + expected_data_length = definitions.BASE_LEN_BYTES * 3 # MILTER_OPTLEN + if len(self.data_raw) != expected_data_length: + raise ProtocolViolationCommandData( + "Length of options negotiate request data is not valid. Got " + f"{len(self.data_raw)}, expected {expected_data_length}." + ) + milter_protocol_version, action_flags, protocol_flags = struct.unpack( + "!III", self.data_raw + ) + self.logger.debug( + f"MTA: {milter_protocol_version=:#08x} {action_flags=:#08x} " + f"{protocol_flags=:#08x}" + ) + if milter_protocol_version != definitions.VERSION: + raise ProtocolViolationCommandData( + f"Unexpected Milter protocol version. Got " + f"{milter_protocol_version}, expected {definitions.VERSION}" + ) + if protocol_flags != definitions.PROTOCOL_FLAGS_ALL: + self.logger.warning( + "This MTA connection does not support all protocol flags. Are " + "you using a modern Postfix? Milter may misbehave. " + f"[{protocol_flags=:#08x}]", + ) + + self.flags = models.MtaSupportsProtocolFlags.from_binary_flags( + protocol_flags=protocol_flags, action_flags=action_flags + ) + self.logger.debug(f"{self.flags=}") + + +@attrs.define(auto_attribs=False, slots=False) +class Connect(BaseCommandWithData): + command_char: ClassVar[bytes] = b"C" # SMFIC_CONNECT + connection_info_args: models.ConnectionInfoArgs = attrs.field(init=False) + macros: Mapping[str, str] = attrs.field(init=False, factory=dict) + + def _decode(self) -> None: + self.connection_info_args = self._decode_connection_info() + + def _decode_connection_info(self) -> models.ConnectionInfoArgs: + # Example data: + # b'[172.17.0.1]\x004\xc36172.17.0.1\x00' + # b'ignored_hostname\x00L\x00\x00/run/mysock\x00' + items = self.data_raw.split(b"\x00", maxsplit=1) + if len(items) != 2: + raise ProtocolViolationCommandData( + "Connection info data does not contain expected number of NULLs to " + "split into hostname, socket family and host address." + ) + hostname_bin, socket_data = items + try: + hostname = hostname_bin.decode("utf-8") + except ValueError as e: + raise ProtocolViolationCommandData( + f"Could not decode hostname in socket data {hostname_bin=!r}" + ) from e + family, hostaddr_port, hostaddr_str = self._decode_socket_tuple( + socket_data.rstrip(b"\x00") + ) + + self.logger.debug( + f"_decode_connection_info {hostname=} " + f"family={family.name} {hostaddr_port=} {hostaddr_str=}" + ) + # We could do a nice pattern matching here extracting the tuple etc, but mypy + # fails: https://github.com/python/mypy/issues/12533#issuecomment-1162496540 + match family: + case definitions.AddressFamily.IPV4: + try: + addr = ipaddress.IPv4Address(hostaddr_str) + except ipaddress.AddressValueError: + raise ProtocolViolationCommandData( + f"Unsupported socket data hostaddr value {hostaddr_str!r} for " + f"family={family.name}" + ) + assert isinstance(hostaddr_port, int) # Have to help mypy here? 😕 + return models.ConnectionInfoArgsIPv4( + hostname=hostname, addr=addr, port=hostaddr_port + ) + case definitions.AddressFamily.IPV6: + try: + addr6 = ipaddress.IPv6Address(hostaddr_str) + except ipaddress.AddressValueError: + raise ProtocolViolationCommandData( + f"Unsupported socket data hostaddr value {hostaddr_str!r} for " + f"family={family.name}" + ) + assert isinstance(hostaddr_port, int) # Have to help mypy here? 😕 + return models.ConnectionInfoArgsIPv6( + hostname=hostname, addr=addr6, port=hostaddr_port + ) + case definitions.AddressFamily.UNIX_SOCKET: + assert isinstance(hostaddr_str, str) # Have to help mypy here? 😕 + return models.ConnectionInfoArgsUnixSocket(path=hostaddr_str) + case definitions.AddressFamily.UNKNOWN: + # This can happen when Postfix is unable to obtain the client IP from + # the kernel for whatever is the reason. Shows up like + # postfix/smtpd[...]: connect from unknown[unknown] + # in Postfix smtpd logs. + return models.ConnectionInfoUnknown(description=hostname) + + def _decode_socket_tuple( + self, socket_data: bytes + ) -> ( + tuple[ + Literal[definitions.AddressFamily.IPV4, definitions.AddressFamily.IPV6], + int, + str, + ] + | tuple[Literal[definitions.AddressFamily.UNIX_SOCKET], None, str] + | tuple[Literal[definitions.AddressFamily.UNKNOWN], None, None] + ): + # Example data: + # b'4\xc36172.17.0.1' + if not socket_data: + raise ProtocolViolationCommandData("Socket data empty") + family_bin: int = struct.unpack("c", socket_data[0:1])[0] + self.logger.debug(f"Decoded socket data {family_bin=!r}") + try: + family = definitions.AddressFamily(family_bin) + except ValueError: + raise ProtocolViolationCommandData( + f"Unsupported socket family {family_bin!r} in connection socket info." + ) + + match family: + case definitions.AddressFamily.IPV4 | definitions.AddressFamily.IPV6: + if not len(socket_data) >= 6: + raise ProtocolViolationCommandData( + "Socket data should contain more than six bytes for IPv4/IPv6." + ) + port: int = struct.unpack("!H", socket_data[1:3])[0] + try: + address = socket_data[3:].decode("ascii") + except ValueError as e: + raise ProtocolViolationCommandData( + f"Could not decode IP address in socket data {socket_data=!r}" + ) from e + else: + return family, port, address + case definitions.AddressFamily.UNIX_SOCKET: + try: + socketpath = socket_data[3:].decode("utf-8") + except ValueError as e: + raise ProtocolViolationCommandData( + f"Could not decode socket path in socket data {socket_data=!r}" + ) from e + else: + return family, None, socketpath + case definitions.AddressFamily.UNKNOWN: + return family, None, None + + +@attrs.define(auto_attribs=False, slots=False) +class Helo(BaseCommandWithData): + command_char: ClassVar[bytes] = b"H" # SMFIC_HELO + hostname: str = attrs.field(init=False) + macros: Mapping[str, str] = attrs.field(init=False, factory=dict) + + def _decode(self) -> None: + if not self.data_raw or self.data_raw[-1:] != b"\x00": + raise ProtocolViolationCommandData( + f"Helo hostname should be NULL-terminated. [data={self.data_raw!r}]" + ) + # HELO/EHLO data can't be UTF-8, because it's this very stage in which SMTPUTF8 + # awareness is negotiated. + # https://datatracker.ietf.org/doc/html/rfc6531#section-3.7.1 + self.hostname = self.data_raw.rstrip(b"\x00").decode( + "ascii", "backslashreplace" + ) + + +@attrs.define(auto_attribs=False, slots=False) +class BaseMailFromAndRcptTo(BaseCommandWithData): + """ + Given the data for a 'MAIL FROM' or 'RCPT TO' command, decode to an address and the + ESMTP parameters. A value is optional, which results in a None value in the + esmtp_args dict. E.g. with input: + b'\x00BODY=8BITMIME\x00FOO\x00' + this should be decoded to: + address='' + esmtp_args={'BODY': '8BITMIME', 'FOO': None} + """ + + address: str = attrs.field(init=False) + esmtp_args: models.EsmtpArgsType = attrs.field(init=False, factory=dict) + macros: Mapping[str, str] = attrs.field(init=False, factory=dict) + + def _decode(self) -> None: + data = self.data_raw + if not data or data[-1:] != b"\x00": + raise ProtocolViolationCommandData( + f"Mail From / Rcpt To address should be NULL-terminated. [{data=!r}]" + ) + data_stripped = data.rstrip(b"\x00") + if not data_stripped: + raise ProtocolViolationCommandData( + f"Mail From / Rcpt To address seems empty. [{data=!r}]" + ) + address_data, esmtp_args_data = data.split(b"\x00", maxsplit=1) + address = address_data.decode("utf-8", "backslashreplace") + + if not address.startswith("<") or not address.endswith(">"): + self.logger.warning( + f"Address in Mail From / Rcpt To {address!r} appears not enclosed in " + "angle brackets." + ) + self.address = address + else: + self.address = address[1:-1] + + if not esmtp_args_data: + return + + esmtp_args_items = _decode_array(esmtp_args_data) + self.esmtp_args: models.EsmtpArgsType = {} + for esmtp_data_item_raw in esmtp_args_items: + if b"=" not in esmtp_data_item_raw[1:]: + # keyword-only case. + keyword_raw, value_raw = esmtp_data_item_raw, None + else: + try: + keyword_raw, value_raw = esmtp_data_item_raw.split(b"=") + except ValueError as e: + raise ProtocolViolationCommandData( + "Could not decode ESMTP keyword/value pair in " + f"{esmtp_data_item_raw=!r}" + ) from e + # Note that esmtp-keyword is not UTF-8 with SMTPUTF8 extension, only the + # esmtp-value is. + # https://datatracker.ietf.org/doc/html/rfc6531#section-3.3 + try: + keyword = keyword_raw.decode("ascii") + except ValueError as e: + raise ProtocolViolationCommandData( + f"Could not decode ESMTP keyword {keyword_raw=!r}" + ) from e + + if value_raw is not None: + value = value_raw.decode("utf-8", "backslashreplace") + else: + value = None + + if ( + former_value := self.esmtp_args.get(keyword) + ) is not None and value != former_value: + self.logger.debug( + "ESMTP keyword already seen for this command, overriding former " + f"value {keyword=} {former_value=} {value=}", + ) + self.esmtp_args[keyword] = value + + +class MailFrom(BaseMailFromAndRcptTo): + command_char: ClassVar[bytes] = b"M" # SMFIC_MAIL + + +class RcptTo(BaseMailFromAndRcptTo): + """Called on each recipient individually.""" + + command_char: ClassVar[bytes] = b"R" # SMFIC_RCPT + + +@attrs.define(auto_attribs=False, slots=False) +class Data(BaseCommand): + command_char: ClassVar[bytes] = b"T" # SMFIC_DATA + macros: Mapping[str, str] = attrs.field(init=False, factory=dict) + + +@attrs.define(auto_attribs=False, slots=False) +class Header(BaseCommandWithData): + """Called on each header individually.""" + + command_char: ClassVar[bytes] = b"L" # SMFIC_HEADER + name: str = attrs.field(init=False) + text: str = attrs.field(init=False) + macros: Mapping[str, str] = attrs.field(init=False, factory=dict) + + def _decode(self) -> None: + # Example data: + # b'From\x00Display Name \x00' + if not self.data_raw or self.data_raw[-1:] != b"\x00": + raise ProtocolViolationCommandData( + f"Header data should be NULL-terminated. [data={self.data_raw!r}]" + ) + items = _decode_array(self.data_raw) + if len(items) != 2: + raise ProtocolViolationCommandData( + f"Could not decode the header data={self.data_raw!r}" + ) + name_raw, value_raw = items + self.name, self.text = name_raw.decode( + "ascii", "backslashreplace" + ), value_raw.decode("utf-8", "backslashreplace") + + +@attrs.define(auto_attribs=False, slots=False) +class EndOfHeaders(BaseCommand): + command_char: ClassVar[bytes] = b"N" # SMFIC_EOH + macros: Mapping[str, str] = attrs.field(init=False, factory=dict) + + +@attrs.define(auto_attribs=False, slots=False) +class BodyChunk(BaseCommandWithData): + command_char: ClassVar[bytes] = b"B" # SMFIC_BODY + macros: Mapping[str, str] = attrs.field(init=False, factory=dict) + + def _decode(self) -> None: + pass + + +@attrs.define(auto_attribs=False, slots=False) +class EndOfMessage(BaseCommand): + command_char: ClassVar[bytes] = b"E" # SMFIC_BODYEOB + macros: Mapping[str, str] = attrs.field(init=False, factory=dict) + + +@attrs.define(auto_attribs=False, slots=False) +class Abort(BaseCommand): + command_char: ClassVar[bytes] = b"A" # SMFIC_ABORT + + +@attrs.define(auto_attribs=False, slots=False) +class Quit(BaseCommand): + command_char: ClassVar[bytes] = b"Q" # SMFIC_QUIT + + +@attrs.define(auto_attribs=False, slots=False) +class QuitNoClose(BaseCommand): + """Like Quit, but new connection follows.""" + + command_char: ClassVar[bytes] = b"K" # SMFIC_QUIT_NC + + +@attrs.define(auto_attribs=False, slots=False) +class Unknown(BaseCommandWithData): + """ + Unrecognized or unimplemented SMTP command. As this is completely unspecified; the + 'data_raw' attribute contains the raw value passed from the MTA with the + NULL-termination removed. + + Example: b"HELP\x00" + Decodes to: b"HELP" + """ + + command_char: ClassVar[bytes] = b"U" # SMFIC_UNKNOWN + macros: Mapping[str, str] = attrs.field(init=False, factory=dict) + + def _decode(self) -> None: + self.data_raw = self.data_raw.removesuffix(b"\x00") + + +@attrs.define(auto_attribs=False, slots=False) +class DefineMacro(BaseCommandWithData): + command_char: ClassVar[bytes] = b"D" # SMFIC_MACRO + stage: definitions.MacroStage = attrs.field(init=False) + macros: dict[str, str] = attrs.field(factory=dict[str, str], init=False) + command_char_to_stage: ClassVar[Mapping[bytes, definitions.MacroStage]] = { + Connect.command_char: definitions.MacroStage.CONNECT, + Helo.command_char: definitions.MacroStage.HELO, + MailFrom.command_char: definitions.MacroStage.MAIL_FROM, + RcptTo.command_char: definitions.MacroStage.RCPT_TO, + Data.command_char: definitions.MacroStage.DATA, + Header.command_char: definitions.MacroStage.HEADER, + EndOfHeaders.command_char: definitions.MacroStage.END_OF_HEADERS, + BodyChunk.command_char: definitions.MacroStage.BODY, + EndOfMessage.command_char: definitions.MacroStage.END_OF_MESSAGE, + Unknown.command_char: definitions.MacroStage.UNKNOWN, + } + + def _decode(self) -> None: + # Example data: + # b'Cj\x00myhost.sub.example.com\x00{daemon_addr}\x00172.17.0.2\x00' + # Should decode to: + # for_command=MacroStage.CONNECT + # macros={'j': 'myhost.sub.example.com', 'daemon_addr': '172.17.0.2'} + self.logger.debug(f"decoding DefineMacro {self.data_raw.hex()=}") + + if not self.data_raw: + raise ProtocolViolationCommandData( + "DefineMacro command data must define a command (stage) for which they " + "apply to." + ) + + stage = self.command_char_to_stage.get(self.data_raw[0:1]) + if stage is None: + raise ProtocolViolationCommandData( + f"Unknown command (stage) {self.data_raw[0:1]!r} for which macros " + "apply to." + ) + self.stage = stage + + macro_data_raw = self.data_raw[1:] + + if not macro_data_raw: + self.logger.debug(f"No macros in DefineMacro for {stage=}") + return + + if macro_data_raw[-1:] != b"\x00": + raise ProtocolViolationCommandData( + "DefineMacro command data must be NULL-terminated. " + f"[data={self.data_raw!r}]" + ) + + items = _decode_array(macro_data_raw) + if len(items) % 2 != 0: + raise ProtocolViolationCommandData( + "Macro data does not contain expected number of NULLs to split into " + "symbol/value pairs" + ) + + for index in range(0, len(items), 2): + symbol_raw, value_raw = items[index], items[index + 1] + try: + symbol, value = symbol_raw.decode("utf-8"), value_raw.decode("utf-8") + except ValueError as e: + raise ProtocolViolationCommandData( + f"Unable to decode macro: {symbol_raw=!r} {value_raw!r}", + ) from e + else: + self.macros[symbol] = value + + self.logger.debug(f"Decoded macros: {self.macros}") diff --git a/src/purepythonmilter/protocol/definitions.py b/src/purepythonmilter/protocol/definitions.py new file mode 100644 index 0000000..64e7bf4 --- /dev/null +++ b/src/purepythonmilter/protocol/definitions.py @@ -0,0 +1,107 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import enum +from typing import Final + +""" +Low-level Milter protocol definitions. +Comments refer to Sendmail's libmilter source code header file definitions. +""" + +VERSION: Final[int] = 6 # SMFI_PROT_VERSION +# length of network byte order 32 bit unsigned integer in bytes +BASE_LEN_BYTES: Final[int] = 4 # MILTER_LEN_BYTES +# Postfix sends packets with payload length 65536 (expected 65535). +MAX_DATA_SIZE: Final[int] = 65536 # MILTER_MAX_DATA_SIZE (+ 1) +PROTOCOL_FLAGS_ALL: Final[int] = 0x001FFFFF # SMFI_CURR_PROT + + +@enum.unique +class ActionFlags(enum.Enum): + ADD_HEADERS = 0x00000001 # SMFIF_ADDHDRS + CHANGE_BODY = 0x00000002 # SMFIF_CHGBODY + ADD_RECIPIENTS = 0x00000004 # SMFIF_ADDRCPT + ADD_RECIPIENT_ESMTP_ARGS = 0x00000080 # SMFIF_ADDRCPT_PAR + REMOVE_RECIPIENTS = 0x00000008 # SMFIF_DELRCPT + CHANGE_HEADERS = 0x00000010 # SMFIF_CHGHDRS + QUARANTINE = 0x00000020 # SMFIF_QUARANTINE + CHANGE_ENVELOPE_FROM = 0x00000040 # SMFIF_CHGFROM + SET_MACROS_LIST = 0x00000100 # SMFIF_SETSYMLIST + + +@enum.unique +class ProtocolFlagsDisableCallback(enum.Enum): + # Skips callback Command.CONNECTION_INFO. + CONNECT = 0x00000001 # SMFIP_NOCONNECT + # Skips callback Command.HELO. + HELO = 0x00000002 # SMFIP_NOHELO + # Skips callback Command.MAIL_FROM. + MAIL_FROM = 0x00000004 # SMFIP_NOMAIL + # Skips callback Command.RCPT_TO. + RCPT_TO = 0x00000008 # SMFIP_NORCPT + # Skips callback Command.BODY_CHUNK. + BODY = 0x00000010 # SMFIP_NOBODY + # Skips callback Command.HEADER. + HEADERS = 0x00000020 # SMFIP_NOHDRS + # Skips callback Command.END_OF_HEADERS. + END_OF_HEADERS = 0x00000040 # SMFIP_NOEOH + # Skips callback Command.UNKNOWN. + UNKNOWN = 0x00000100 # SMFIP_NOUNKNOWN + # Skips callback Command.DATA. + DATA = 0x00000200 # SMFIP_NODATA + + # SMFIP_NR_* flags indicate to the server that this Milter will not send a reply at + # the given command. When enabled, implies Action.CONTINUE statically, and would + # save sending that reply over the network. + REPLY_HEADERS = 0x00000080 # SMFIP_NR_HDR / SMFIP_NOHREPL (sharing value). + REPLY_CONNECTION = 0x00001000 # SMFIP_NR_CONN + REPLY_HELO = 0x00002000 # SMFIP_NR_HELO + REPLY_MAIL_FROM = 0x00004000 # SMFIP_NR_MAIL + REPLY_RCPT_TO = 0x00008000 # SMFIP_NR_RCPT + REPLY_DATA = 0x00010000 # SMFIP_NR_DATA + REPLY_UNKNOWN = 0x00020000 # SMFIP_NR_UNKN + REPLY_END_OF_HEADERS = 0x00040000 # SMFIP_NR_EOH + REPLY_BODY_CHUNK = 0x00080000 # SMFIP_NR_BODY + + +@enum.unique +class ProtocolFlagsOther(enum.Enum): + # Indicates ability to perform Action.SKIP. + SKIP = 0x00000400 # SMFIP_SKIP + # Whether or not to send a callback on recipients that were rejected already. + SEND_REJECTED_RCPT_TOS = 0x00000800 # SMFIP_RCPT_REJ + # Whether or not to keep the leading spaces (continuation) in the unfolded header + # value. + HEADER_VALUE_LEADING_SPACE = 0x00100000 # SMFIP_HDR_LEADSPC + + +ProtocolFlagsAllType = ProtocolFlagsDisableCallback | ProtocolFlagsOther + + +@enum.unique +class MacroStage(enum.Enum): + # Not all stages are defined in libmilter, but Postfix sends them, yet does not + # allow them for customization. 🤷 + # postfix/smtpd[...]: warning: milter [...]: ignoring unknown macro type [...] + CONNECT = 0 # SMFIM_CONNECT + HELO = 1 # SMFIM_HELO + MAIL_FROM = 2 # SMFIM_ENVFROM + RCPT_TO = 3 # SMFIM_ENVRCPT + DATA = 4 # SMFIM_DATA + HEADER = 7 + END_OF_HEADERS = 6 # SMFIM_EOH + BODY = 8 + END_OF_MESSAGE = 5 # SMFIM_EOM + UNKNOWN = 9 + + +@enum.unique +class AddressFamily(enum.Enum): + UNKNOWN = b"U" # SMFIA_UNKNOWN + UNIX_SOCKET = b"L" # SMFIA_UNIX + IPV4 = b"4" # SMFIA_INET + IPV6 = b"6" # SMFIA_INET6 diff --git a/src/purepythonmilter/protocol/exceptions.py b/src/purepythonmilter/protocol/exceptions.py new file mode 100644 index 0000000..5342648 --- /dev/null +++ b/src/purepythonmilter/protocol/exceptions.py @@ -0,0 +1,21 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + + +class ProtocolViolation(BaseException): + ... + + +class ProtocolViolationPacket(ProtocolViolation): + ... + + +class ProtocolViolationPayload(ProtocolViolation): + ... + + +class ProtocolViolationCommandData(ProtocolViolation): + ... diff --git a/src/purepythonmilter/protocol/packet.py b/src/purepythonmilter/protocol/packet.py new file mode 100644 index 0000000..d67311b --- /dev/null +++ b/src/purepythonmilter/protocol/packet.py @@ -0,0 +1,73 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import logging +import struct +from collections.abc import Generator +from typing import TypeAlias + +import attrs + +from ..api import logger, models +from .definitions import BASE_LEN_BYTES, MAX_DATA_SIZE +from .exceptions import ProtocolViolationPacket +from .payload import Payload + +Packet: TypeAlias = bytes + + +@attrs.define(auto_attribs=False, kw_only=True) +class PacketDecoder: + logger: logging.LoggerAdapter[logging.Logger] = attrs.field(init=False) + _connection_id: models.MilterServerConnectionID = attrs.field() + _data_so_far: bytes = attrs.field(default=b"") + + def __attrs_post_init__(self) -> None: + self.logger = logger.ConnectionContextLogger().get(__name__) + + def decode(self, packet: Packet) -> Generator[Payload, None, None]: + """ + May be called multiple times with (incomplete) Packets and generates assembled + Payloads. + """ + # [0:] to prevent bytes with length 1 turning into an int in Python. + self._data_so_far += packet[0:] + while self._data_so_far: + if len(self._data_so_far) < BASE_LEN_BYTES: + # The length of the payload data is indicated by an unsigned int encoded + # as 4 bytes in the TCP segment. We should wait until we have received + # at least that number of bytes. + return + claimed_payload_length = self._parse_payload_length() + if len(self._data_so_far) - 4 < claimed_payload_length: + return + else: + pos_beyond = claimed_payload_length + 4 + assembled_payload = self._data_so_far[4:pos_beyond] + self._data_so_far = self._data_so_far[pos_beyond:] + self.logger.debug( + f"{claimed_payload_length=} {len(assembled_payload)=} " + f"{len(self._data_so_far)=}" + ) + yield assembled_payload + + def _parse_payload_length(self) -> int: + (payload_length_unpacked_any,) = struct.unpack("!I", self._data_so_far[0:4]) + payload_length = int(payload_length_unpacked_any) + + self.logger.debug(f"MTA sent packet claiming {payload_length=} byte(s).") + if payload_length == 0 or payload_length > MAX_DATA_SIZE: + raise ProtocolViolationPacket( + f"Invalid packet data length: {payload_length=} [boundaries: > 0, " + f"< {MAX_DATA_SIZE}, " + f"connection_id={self._connection_id.short}]" + ) + return payload_length + + +def encode_payload(payload: Payload) -> Packet: + data_length_bin = struct.pack("!I", len(payload)) + return Packet(data_length_bin + payload) diff --git a/src/purepythonmilter/protocol/payload.py b/src/purepythonmilter/protocol/payload.py new file mode 100644 index 0000000..7f19c09 --- /dev/null +++ b/src/purepythonmilter/protocol/payload.py @@ -0,0 +1,43 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import logging +from typing import TypeAlias + +import attrs + +from purepythonmilter.protocol.exceptions import ProtocolViolationPayload + +from ..api import logger, models +from . import commands + +Payload: TypeAlias = bytes + + +@attrs.define(auto_attribs=False, kw_only=True) +class PayloadDecoder: + logger: logging.LoggerAdapter[logging.Logger] = attrs.field(init=False) + _connection_id: models.MilterServerConnectionID = attrs.field() + + def __attrs_post_init__(self) -> None: + self.logger = logger.ConnectionContextLogger().get(__name__) + + def decode( + self, payload: Payload + ) -> tuple[type[commands.BaseCommand], commands.CommandDataRaw]: + if not payload: + raise RuntimeError("Payload was empty") + + if ( + command_type := commands.chars_to_command_registry.get(payload[:1]) + ) is None: + raise ProtocolViolationPayload( + f"Received unknown Milter command, char={payload[:1]=!r} is not " + "understood." + ) + + self.logger.debug(f"Got command {command_type}") + return command_type, payload[1:] diff --git a/src/purepythonmilter/protocol/responses.py b/src/purepythonmilter/protocol/responses.py new file mode 100644 index 0000000..8ff54f0 --- /dev/null +++ b/src/purepythonmilter/protocol/responses.py @@ -0,0 +1,468 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import abc +import logging +import struct +from collections.abc import Iterable +from typing import ClassVar, Literal, TypeAlias + +import attrs + +from ..api import logger, models +from . import definitions +from .payload import Payload + +ResponseData: TypeAlias = bytes + + +@attrs.define(auto_attribs=False, kw_only=True) +class AbstractBaseResponse(abc.ABC): + response_char: ClassVar[bytes] + logger: logging.LoggerAdapter[logging.Logger] = attrs.field( + init=False, eq=False, repr=False + ) + + def __attrs_post_init__(self) -> None: + self.logger = logger.ConnectionContextLogger().get(__name__) + + @abc.abstractmethod + def encode(self) -> Payload: + ... # pragma: nocover + + def __str__(self) -> str: + return f"{self.__class__.__name__} response" + + +class AbstractManipulation(AbstractBaseResponse): + pass + + +@attrs.define(auto_attribs=False, kw_only=True) +class AbstractResponse(AbstractBaseResponse): + # All manipulations will be saved up until the End of Message callback. + manipulations: list[AbstractManipulation] = attrs.field(factory=list) + + +class AbstractVerdict(AbstractResponse): + def __str__(self) -> str: + return f"{self.__class__.__name__} (verdict)" + + +class BaseResponseNoData(AbstractResponse): + def encode(self) -> Payload: + return Payload(self.response_char) + + +class BaseVerdictNoData(BaseResponseNoData, AbstractVerdict): + pass + + +@attrs.define(auto_attribs=False, kw_only=True) +class OptionsNegotiateResponse(AbstractResponse): + """ + Only during options negotiation you can send the requested symbols for each command. + """ + + # No SMFIR_* definition available; seems to be the same as with command from server. + response_char: ClassVar[bytes] = b"O" + protocol_version = definitions.VERSION + protocol_flags: models.RequestProtocolFlags = attrs.field() + symbols_for_stage: dict[definitions.MacroStage, set[str]] = attrs.field( + factory=dict + ) + + def __str__(self) -> str: + return f"{self.__class__.__name__}" + + @classmethod + def _encode_symbols_list( + cls, + stage: definitions.MacroStage, + symbols: Iterable[str], + ) -> bytes: + return ( + struct.pack("!I", stage.value) + + b" ".join([(s.encode("utf-8")) for s in sorted(symbols)]) + + b"\x00" + ) + + def _log_flags_table(self, *, protocol_flags: int, action_flags: int) -> None: + self.logger.debug( + "Encoded options negotiate response flags " + f"[version={self.protocol_version=:#08x} action={action_flags=:#08x} " + f"protocol={protocol_flags=:#08x}]" + ) + + def get_proto_flagname(flag: int) -> str | None: + try: + return definitions.ProtocolFlagsDisableCallback(flag).name + except ValueError: + try: + return definitions.ProtocolFlagsOther(flag).name + except ValueError: + return None + + for i in range(0, 32): + int_value = 2**i + if all( + int_value > pf.value for pf in definitions.ProtocolFlagsDisableCallback + ) and all(int_value > pf.value for pf in definitions.ProtocolFlagsOther): + break + self.logger.debug( + f"{(int_value):#034b} {get_proto_flagname(int_value):<30} " + f"{str(bool(protocol_flags & (int_value))):8}" + ) + + def get_action_flagname(flag: int) -> str | None: + try: + return definitions.ActionFlags(flag).name + except ValueError: + return None + + for i in range(0, 32): + int_value = 2**i + if all(int_value > af.value for af in definitions.ActionFlags): + break + self.logger.debug( + f"{(int_value):#034b} {get_action_flagname(int_value):<30} " + f"{str(bool(action_flags & (int_value))):8}" + ) + + def _log_symbols_table(self) -> None: + for stage, symbols in self.symbols_for_stage.items(): + self.logger.debug( + f"{stage!r:<32} {', '.join(symbols) if symbols else ''} " + f"(encoded as {self._encode_symbols_list(stage, symbols)!r})" + ) + + def encode(self) -> Payload: + protocol_flags, action_flags = self.protocol_flags.encode_to_flags_bitmask() + if self.logger.getEffectiveLevel() >= logging.DEBUG: + self._log_flags_table( + protocol_flags=protocol_flags, + action_flags=action_flags, + ) + self._log_symbols_table() + + # Sendmail's libmilter documentation and header files suggests that a list of + # symbols can be set is using a response with code SMFIR_SETSYMLIST, but that + # appears to be a lie. Instead, it's appended to the payload of the Options + # negotiate response. + symbols_bytes = b"".join( + [ + self._encode_symbols_list(stage, symbols) + for stage, symbols in self.symbols_for_stage.items() + ] + ) + self.logger.debug(f"{symbols_bytes=}") + return Payload( + self.response_char + + struct.pack("!III", self.protocol_version, action_flags, protocol_flags) + + symbols_bytes + ) + + +@attrs.define(auto_attribs=False, kw_only=True) +class Continue(BaseResponseNoData): + response_char: ClassVar[bytes] = b"c" # SMFIR_CONTINUE + + +@attrs.define(auto_attribs=False, kw_only=True) +class Accept(BaseVerdictNoData): + response_char: ClassVar[bytes] = b"a" # SMFIR_ACCEPT + + +@attrs.define(auto_attribs=False, kw_only=True) +class Reject(BaseVerdictNoData): + response_char: ClassVar[bytes] = b"r" # SMFIR_REJECT + + +@attrs.define(auto_attribs=False, kw_only=True) +class BaseReplyWithCode(AbstractVerdict): + response_char: ClassVar[bytes] = b"y" # SMFIR_REPLYCODE + + primary_code: tuple[Literal[4, 5], int, int] = attrs.field() + enhanced_code: tuple[Literal[4, 5], int, int] | None = attrs.field(default=None) + text: str | None = attrs.field(default=None) + + def encode(self) -> Payload: + p1, p2, p3 = self.primary_code + bin_parts_args = [self.response_char + f"{p1}{p2}{p3}".encode()] + if self.enhanced_code: + e1, e2, e3 = self.enhanced_code + bin_parts_args.append(f"{e1}.{e2}.{e3}".encode()) + if self.text: + bin_parts_args.append(self.text.encode()) + return Payload(b" ".join(bin_parts_args) + b"\x00") + + +@attrs.define(auto_attribs=False, kw_only=True) +class RejectWithCode(BaseReplyWithCode): + primary_code: tuple[Literal[5], int, int] = attrs.field() + + +@attrs.define(auto_attribs=False, kw_only=True) +class TempFailWithCode(BaseReplyWithCode): + primary_code: tuple[Literal[4], int, int] = attrs.field() + + +@attrs.define(auto_attribs=False, kw_only=True) +class DiscardMessage(BaseVerdictNoData): + """ + Drop the message silently, while pretending to accept it for the sender. + + Invalid with Connect or HELO. + """ + + response_char: ClassVar[bytes] = b"d" # SMFIR_DISCARD + + +@attrs.define(auto_attribs=False, kw_only=True) +class Quarantine(AbstractVerdict): + """ + Put the message in the hold queue. Only valid at End of Message stage. + The reason text is ignored by Postfix at the time of writing. + https://github.com/vdukhovni/postfix/blob/fe4e81b23b3ee76c64de73d7cb250882fbaaacb9/postfix/src/milter/milter8.c#L1336 + """ + + response_char: ClassVar[bytes] = b"q" # SMFIR_QUARANTINE + reason: str + + +@attrs.define(auto_attribs=False, kw_only=True) +class CauseConnectionFail(BaseVerdictNoData): + """ + Cause an SMTP-connection failure. + """ + + response_char: ClassVar[bytes] = b"f" # SMFIR_CONN_FAIL + + +@attrs.define(auto_attribs=False, kw_only=True) +class BaseChangeRecipient(AbstractManipulation): + recipient: str = attrs.field() + + def encode(self) -> Payload: + return Payload(self.response_char + self.recipient.encode() + b"\x00") + + +@attrs.define(auto_attribs=False, kw_only=True) +class AddRecipient(BaseChangeRecipient): + """ + Add a recipient (RCPT TO) to the message. + + Note that this does not adjust 'To' header (the displayed recipients in user + agents). + """ + + response_char: ClassVar[bytes] = b"+" # SMFIR_ADDRCPT + + +@attrs.define(auto_attribs=False, kw_only=True) +class AddRecipientWithEsmtpArgs(BaseChangeRecipient): + response_char: ClassVar[bytes] = b"2" # SMFIR_ADDRCPT_PAR + esmtp_args: models.EsmtpArgsType = attrs.field() + + def encode(self) -> Payload: + esmtp_args_str = " ".join( + f"{key}" if value is None else f"{key}={value}" + for key, value in self.esmtp_args.items() + ) + return Payload( + self.response_char + + self.recipient.encode() + + b"\x00" + + esmtp_args_str.encode() + + b"\x00" + ) + + +@attrs.define(auto_attribs=False, kw_only=True) +class RemoveRecipient(BaseChangeRecipient): + """ + Remove a recipient (RCPT TO) in the message. + + Note that this does not adjust 'To' header (the displayed recipients in user + agents). + """ + + response_char: ClassVar[bytes] = b"-" # SMFIR_DELRCPT + + +@attrs.define(auto_attribs=False, kw_only=True) +class ReplaceBodyChunk(AbstractManipulation): + """ + Replace the body of the message (by chunk). + + This response has to be called for each split in case the body does not fit in a + single chunk. + """ + + response_char: ClassVar[bytes] = b"b" # SMFIR_REPLBODY + chunk: bytes = attrs.field( + validator=attrs.validators.max_len(definitions.MAX_DATA_SIZE - 1) + ) + + def encode(self) -> Payload: + return Payload(self.response_char + self.chunk) + + +@attrs.define(auto_attribs=False, kw_only=True) +class ChangeMailFrom(AbstractManipulation): + """ + Replace the envelope-sender (Return-Path) of the message. + + Note that this does not adjust 'From' header (the displayed sender address in user + agents). + + Note that oddly enough, the Milter protocol has a separate command for adding + recipients with and without ESMTP arguments, but for changing envelope-from it uses + a single command. + """ + + response_char: ClassVar[bytes] = b"e" # SMFIR_CHGFROM + mail_from: str = attrs.field() + esmtp_args: models.EsmtpArgsType = attrs.field(factory=dict) + + def encode(self) -> Payload: + if self.esmtp_args: + esmtp_args_str = " ".join( + f"{key}" if value is None else f"{key}={value}" + for key, value in self.esmtp_args.items() + ) + return Payload( + self.response_char + + self.mail_from.encode() + + b"\x00" + + esmtp_args_str.encode() + + b"\x00" + ) + else: + return Payload(self.response_char + self.mail_from.encode() + b"\x00") + + +def validate_headername_rfc5322(headername: str) -> None: + if not headername: + raise ValueError("Header field name cannot be empty.") + if not headername.isascii() or not headername.isprintable() or " " in headername: + raise ValueError( + "Header field names must contain only US-ASCII printable characters " + "with values between 33 and 126 (RFC5322)" + ) + if ":" in headername: + raise ValueError("Header field names must not contain a colon (RFC5322)") + + +@attrs.define(auto_attribs=False, kw_only=True) +class BaseHeaderManipulation(AbstractManipulation): + headername: str = attrs.field() + headertext: str = attrs.field() + + @headername.validator # pyright: ignore PylancereportUnknownMemberType + def check_headername( + self, attribute: attrs.Attribute[BaseHeaderManipulation], value: str + ) -> None: + return validate_headername_rfc5322(value) + + def _encode(self, *, index: int | None = None) -> Payload: + index_bytes = struct.pack("!I", index) if index is not None else b"" + return Payload( + self.response_char + + index_bytes + + self.headername.encode() + + b"\x00" + + self.headertext.encode() + + b"\x00" + ) + + +@attrs.define(auto_attribs=False, kw_only=True) +class AppendHeader(BaseHeaderManipulation): + """ + Append a header. + """ + + response_char: ClassVar[bytes] = b"h" # SMFIR_ADDHEADER + + def encode(self) -> Payload: + return super()._encode(index=None) + + +@attrs.define(auto_attribs=False, kw_only=True) +class InsertHeader(BaseHeaderManipulation): + """ + Add a header at a given position. If you don't care about the position or you don't + need to deal with having multiple headers with the same name, use AppendHeader + instead. + """ + + response_char: ClassVar[bytes] = b"i" # SMFIR_INSHEADER + index: int = attrs.field() + + @index.validator # pyright: ignore PylancereportUnknownMemberType + def check_index(self, attribute: attrs.Attribute[InsertHeader], value: int) -> None: + if bool(value < 0): + raise ValueError("Header index must be positive.") + + def encode(self) -> Payload: + return super()._encode(index=self.index) + + +@attrs.define(auto_attribs=False, kw_only=True) +class ChangeHeader(BaseHeaderManipulation): + """ + Replace the header by the provided headername. If mulitple are present, indicate the + occurence by its index within the set with the name. + + Provide an empty headertext to delete the header. + """ + + response_char: ClassVar[bytes] = b"m" # SMFIR_CHGHEADER + nth_occurrence: int = attrs.field(default=0) + + @nth_occurrence.validator # pyright: ignore PylancereportUnknownMemberType + def check_nth_occurrence( + self, attribute: attrs.Attribute[ChangeHeader], value: int + ) -> None: + if bool(value < 0): + raise ValueError("Header index (nth_occurrence) must be positive.") + + def encode(self) -> Payload: + return super()._encode(index=self.nth_occurrence) + + +@attrs.define(auto_attribs=False, kw_only=True) +class SkipToNextStage(BaseResponseNoData): + """ + On Postfix, this means skipping any further events of the same type; e.g. returning + this during an Rcpt To hook, the MTA skips over subsequent Rcpt To calls, so the + next one is likely to be Data. + + Useful in case this involves many calls and bandwidth/latency, to indicate enough + information has been received to make a decision (in a next stage!). + + On Sendmail this is only valid as a response to the Body chunk commands to skip + further chunks and move to the End of message stage. + """ + + response_char: ClassVar[bytes] = b"s" # SMFIR_SKIP + + +@attrs.define(auto_attribs=False, kw_only=True) +class Progress(BaseResponseNoData): + """ + Inform the MTA that the Milter is still processing and that it's still alive (resets + connection timeout). + + TODO: adjust the API to allow for sending this response multiple times before + sending a verdict response. + """ + + response_char: ClassVar[bytes] = b"p" # SMFIR_PROGRESS + + +VerdictOrContinue: TypeAlias = AbstractVerdict | Continue diff --git a/src/purepythonmilter/py.typed b/src/purepythonmilter/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/src/purepythonmilter/server/__init__.py b/src/purepythonmilter/server/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/purepythonmilter/server/connectionhandler.py b/src/purepythonmilter/server/connectionhandler.py new file mode 100644 index 0000000..410ee27 --- /dev/null +++ b/src/purepythonmilter/server/connectionhandler.py @@ -0,0 +1,204 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import asyncio +import logging +from collections.abc import Callable +from typing import Any, ClassVar + +import attrs + +from ..api import logger, models +from ..api.interfaces import ( + AbstractMtaMilterConnectionHandler, + AbstractMtaMilterSession, + MilterAppFactory, +) +from ..protocol import definitions +from ..protocol.exceptions import ProtocolViolation +from ..protocol.packet import Packet, PacketDecoder, encode_payload +from ..protocol.payload import Payload, PayloadDecoder +from .session import MtaMilterSession + + +class MtaMilterConnectionHandlerClosed(BaseException): + ... + + +@attrs.define(auto_attribs=False) +class MtaMilterConnectionHandler(AbstractMtaMilterConnectionHandler): + _connection_id: models.MilterServerConnectionID = attrs.field() + _reader: asyncio.StreamReader = attrs.field() + _writer: asyncio.StreamWriter = attrs.field() + app_factory: MilterAppFactory = attrs.field() + _server_on_close_cb: Callable[ + [models.MilterServerConnectionID], None + ] = attrs.field() + _keep_reading_packets_task: asyncio.Task[Any] = attrs.field(init=False) + _session: AbstractMtaMilterSession = attrs.field(init=False) + _closed: bool = attrs.field(init=False) + logger: logging.LoggerAdapter[logging.Logger] = attrs.field(init=False) + READER_CHUNK_SIZE: ClassVar[int] = ( + definitions.MAX_DATA_SIZE + definitions.BASE_LEN_BYTES + ) + + def __attrs_post_init__(self) -> None: + self._closed = False + self.logger = logger.ConnectionContextLogger().get(__name__) + self._session = MtaMilterSession( + socket_connection=self, # pyright: ignore PylancereportGeneralTypeIssues + ) + self._keep_reading_packets_task = asyncio.create_task( + self.keep_reading_packets(), name=f"keep_reading_packets-{self.id.short}" + ) + self._keep_reading_packets_task.add_done_callback( + self._keep_reading_packets_task_done, + ) + + def _keep_reading_packets_task_done(self, _: asyncio.Future[Any]) -> None: + writer, reader = self._writer, self._reader + if not writer.is_closing(): + self.logger.warning( + f"Reading packets task is done without having writer closed. {writer=}" + ) + if not reader.at_eof(): + self.logger.warning( + f"Reading packets task is done without reader at at_eof. {reader=}" + ) + if reader.exception(): + self.logger.warning( + "Reading packets task is done with reader " + f"exception={reader.exception=!r}" + ) + self.logger.debug(f"DISCONNECTED {reader=} {writer=}") + + task = self._keep_reading_packets_task + if not task.cancelled() and (exception := task.exception()) is not None: + self.logger.exception( + "_keep_reading_packets_task_done: " + "Got an exception in the connection keep_reading_packets task. " + f"[task={task.get_name()}, {exception=}, cancelled={task.cancelled()}]", + exc_info=exception, + ) + + def _cancel_reader_task(self) -> None: + if self._keep_reading_packets_task.done(): + exception = self._keep_reading_packets_task.exception() + else: + exception = None + self.logger.debug( + "_cancel_reader_task: " + f"{self._keep_reading_packets_task.done()=} " + f"{self._keep_reading_packets_task.cancelled()=} " + f"{exception=}" + ) + if not self._keep_reading_packets_task.cancelled(): + self._keep_reading_packets_task.cancel() + + def session_error_callback(self, *, exception: BaseException) -> None: + self.logger.exception( + "Error callback in in MtaMilterSession", + exc_info=exception, + ) + self.logger.debug("_hl_error_callback: Cancelling the socket reader task") + self._cancel_reader_task() + + @property + def id(self) -> models.MilterServerConnectionID: + return self._connection_id + + async def keep_reading_packets(self) -> None: + assert not self._closed + packet_decoder = PacketDecoder( + connection_id=self.id, # pyright: ignore PylancereportGeneralTypeIssues + ) + payload_decoder = PayloadDecoder( + connection_id=self.id, # pyright: ignore PylancereportGeneralTypeIssues + ) + while True: + try: + self.logger.debug(f"request to read {self.READER_CHUNK_SIZE} bytes") + packet: Packet = await self._reader.read(self.READER_CHUNK_SIZE) + if not len(packet): + if self._reader.at_eof(): + raise MtaMilterConnectionHandlerClosed() + else: + raise RuntimeError( + "Should not reach here; reading 0 bytes with " + f"{self._reader.at_eof()=}" + ) + self.logger.debug(f"got {len(packet)=} bytes [{packet=!r}]") + for payload in packet_decoder.decode(packet=packet): + command_class, command_data = payload_decoder.decode( + payload=payload + ) + self.logger.debug(f"{command_class=} {command_data=}") + command = command_class(data_raw=command_data) + self._session.queue_command(command) + else: + self.logger.debug("No payload from packet (yet)") + except ProtocolViolation: + self.logger.error( + "Protocol violation, going to close the connection.", + exc_info=True, + ) + await self.close_bottom_up() + break + except MtaMilterConnectionHandlerClosed: + self.logger.debug("Milter-MTA connection closed") + await self.close_bottom_up() + break + except ConnectionResetError: + self.logger.error( + "Milter-MTA connection reset unexpectedly. This may indicate a " + "protocol violation as observed from the MTA." + ) + await self.close_bottom_up() + break + except asyncio.CancelledError: + await self._close() + break + + async def write_response(self, payload: Payload, *, drain: bool = False) -> None: + packet = encode_payload(payload) + self.logger.debug(f"writing packet len={len(packet)} {packet=!r}") + self._writer.write(packet) + if drain: + await self._writer.drain() + + async def _close(self, *, cancel_reader_task: bool = True) -> None: + self.logger.debug(f"close_top_down; going to {cancel_reader_task=}") + if cancel_reader_task: + self._cancel_reader_task() + self.logger.debug("close_top_down; _cancel_reader_task done") + if self._closed: + self.logger.debug("close_top_down; Already closed this connection?") + return + + self._closed = True + self._server_on_close_cb(self.id) + self.logger.debug(f"writing EOF if {self._writer.can_write_eof()=}") + try: + if self._writer.can_write_eof(): + self._writer.can_write_eof() + self._writer.write_eof() + await self._writer.drain() + if not self._writer.is_closing(): + self._writer.close() + await self._writer.wait_closed() + else: + self.logger.debug("Transport writer already marked as closed.") + except Exception: + self.logger.error("Error closing client writer, ignoring.", exc_info=True) + + async def close_bottom_up(self) -> None: + self.logger.debug("close_bottom_up") + await self._session.close_bottom_up() + await self._close() + + async def close_top_down(self) -> None: + self.logger.debug("close_top_down") + await self._close() diff --git a/src/purepythonmilter/server/milterserver.py b/src/purepythonmilter/server/milterserver.py new file mode 100644 index 0000000..a45e27f --- /dev/null +++ b/src/purepythonmilter/server/milterserver.py @@ -0,0 +1,133 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import asyncio +import enum +import functools +import logging +import signal + +import attrs + +from .._version import __version__ as purepythonmilter_version +from ..api.interfaces import MilterAppFactory +from ..api.models import MilterServerConnectionID, connection_id_context +from .connectionhandler import MtaMilterConnectionHandler + +logger = logging.getLogger(__name__) + + +@enum.unique +class MilterServerState(enum.Enum): + INITIALIZING = enum.auto() + STARTING = enum.auto() + STARTED = enum.auto() + STOPPING = enum.auto() + STOPPED = enum.auto() + + +@attrs.define(kw_only=True) +class MilterServer: + _connections: dict[ + MilterServerConnectionID, MtaMilterConnectionHandler + ] = attrs.field(init=False, factory=dict) + _state: MilterServerState = attrs.field( + init=False, default=MilterServerState.INITIALIZING + ) + _app_factory: MilterAppFactory + + async def start_server(self, *, host: str, port: int) -> None: + logger.info(f"Purepythonmilter version {purepythonmilter_version} starting...") + + assert self._state in ( + MilterServerState.INITIALIZING, + MilterServerState.STOPPED, + ) + self._state = MilterServerState.STARTING + + def cancel_tasks_handler(signal: signal.Signals) -> None: + logger.debug(f"Got {signal=}!") + logger.info("Shutting down milter on shutdown signal...") + tasks = asyncio.all_tasks() + logger.debug(f"Cancelling {len(tasks)} task(s).") + [task.cancel() for task in tasks] + + loop = asyncio.get_running_loop() + loop.add_signal_handler( + signal.SIGINT, functools.partial(cancel_tasks_handler, signal.SIGINT) + ) + loop.add_signal_handler( + signal.SIGTERM, functools.partial(cancel_tasks_handler, signal.SIGTERM) + ) + + async with ( + srv := await asyncio.start_server(self.handle_connection, host, port) + ): + self._state = MilterServerState.STARTED + _host, _port = srv.sockets[0].getsockname() + logger.info(f"Server started, awaiting connections on {_host}:{_port}...") + try: + await srv.serve_forever() + except asyncio.CancelledError: + await self.shutdown() + + async def handle_connection( + self, reader: asyncio.StreamReader, writer: asyncio.StreamWriter + ) -> None: + """ + Called when the MTA initiates a connection to this Milter instance (TCP or Unix + socket). + """ + connection_id = MilterServerConnectionID.generate() + peername: str = writer.get_extra_info("peername") + logger.debug( + f"{connection_id.short}: MTA connected. {peername=} {reader} {writer}" + ) + + # The connection_id should be set as a context variable local to the task of + # handling the connection. + # asyncio.create_task() will copy the context for us, see: + # https://docs.python.org/3/library/contextvars.html#asyncio-support + connection_id_context.set(connection_id) + + def server_on_close_cb(connection_id: MilterServerConnectionID) -> None: + logger.debug("server_on_close_cb") + del self._connections[connection_id] + + connection = MtaMilterConnectionHandler( + reader=reader, # pyright: ignore PylancereportGeneralTypeIssues + writer=writer, # pyright: ignore PylancereportGeneralTypeIssues + app_factory=self._app_factory, + connection_id=connection_id, # pyright: ignore PylancereportGeneralTypeIssues # noqa: E501 + server_on_close_cb=server_on_close_cb, # pyright: ignore PylancereportGeneralTypeIssues # noqa: E501 + ) + self._connections[connection_id] = connection + + async def shutdown(self) -> None: + self._state = MilterServerState.STOPPING + # Copy into list, or else the dict value reader may change during iteration. + connections = [c for c in self._connections.values()] + logger.debug( + f"Shutting down, closing {len(connections)} connections. " + f"[{self._connections=}]" + ) + # TODO: let current connections finish gracefully first. + # TODO: make this run in parallel in case there are many connections? + for conn in connections: + await conn.close_bottom_up() + + # Ugly loop to await the task_done_del_connection_cb has been called. + for i in range(1, 51): + if n_connections := len(self._connections): + if i % 5 == 0: + logger.warning(f"Still {n_connections} pending connections...") + logger.debug(f"{connections=}") + await asyncio.sleep(0.001 * i) + else: + break + + logger.info("Milter shutdown complete.") + self._state = MilterServerState.STOPPED diff --git a/src/purepythonmilter/server/session.py b/src/purepythonmilter/server/session.py new file mode 100644 index 0000000..0e9d182 --- /dev/null +++ b/src/purepythonmilter/server/session.py @@ -0,0 +1,308 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import asyncio +import logging +import types +from contextlib import ContextDecorator +from typing import Any + +import attrs + +from ..api import logger +from ..api.interfaces import ( + AbstractMilterApp, + AbstractMtaMilterConnectionHandler, + AbstractMtaMilterSession, + QueueEntry, +) +from ..protocol import commands, definitions, responses + +QUEUE_READER_TIMEOUT_SECONDS_DEFAULT = 30 + + +@attrs.define(kw_only=True) +class DoneEventContextManager(ContextDecorator): + event: asyncio.Event + logger: logging.LoggerAdapter[logging.Logger] + + def __enter__(self) -> None: + pass + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc: BaseException | None, + traceback: types.TracebackType | None, + ) -> None: + self.logger.debug("Setting queue item done_event") + self.event.set() + + +@attrs.define(auto_attribs=False) +class MtaMilterSession(AbstractMtaMilterSession): + _socket_connection: AbstractMtaMilterConnectionHandler = attrs.field() + _incoming_command_queue: asyncio.Queue[QueueEntry | None] = attrs.field( + factory=asyncio.Queue + ) + _commands_consumer_task: asyncio.Task[Any] = attrs.field(init=False) + _app: AbstractMilterApp = attrs.field(init=False) + macros_per_stage: dict[definitions.MacroStage, dict[str, str]] = attrs.field( + init=False, factory=dict + ) + _last_macro_command: commands.DefineMacro | None = None + all_macros: dict[str, str] = attrs.field(init=False, factory=dict) + queue_reader_timeout_seconds: float = attrs.field( + default=QUEUE_READER_TIMEOUT_SECONDS_DEFAULT + ) + _pending_manipulations: list[responses.AbstractManipulation] = attrs.field( + init=False, factory=list + ) + _manipulations_sent: bool = False + + def __attrs_post_init__(self) -> None: + self._closed = False + self.logger = logger.ConnectionContextLogger().get(__name__) + self._app = self._socket_connection.app_factory(session=self) + self.logger.debug("Starting commands_consumer task") + self._commands_consumer_task = asyncio.create_task( + self._commands_consumer(), + name=f"commands_consumer_task-{self._socket_connection.id.short}", + ) + self._commands_consumer_task.add_done_callback(self.commands_consumer_task_done) + + def commands_consumer_task_done(self, future: asyncio.Future[Any]) -> None: + self.logger.debug( + "task done! " + f"[task={self._commands_consumer_task.get_name()}, " + f"done={self._commands_consumer_task.done()}, " + f"cancelled={self._commands_consumer_task.cancelled()}]" + ) + if (exception := self._commands_consumer_task.exception()) is not None: + self.logger.error( + "Got an exception in the commands consumer task. " + f"[task={self._commands_consumer_task.get_name()}, " + f"exception={self._commands_consumer_task.exception()}, " + f"cancelled={self._commands_consumer_task.cancelled()}]" + ) + self._socket_connection.session_error_callback(exception=exception) + + def queue_command(self, command: commands.BaseCommand) -> asyncio.Event: + if self._commands_consumer_task.done(): + raise RuntimeError( + "Queue is not being read anymore! " + f"[task={self._commands_consumer_task.get_name()}, " + f"exception={self._commands_consumer_task.exception()}, " + f"cancelled={self._commands_consumer_task.cancelled()}]" + ) + self.logger.debug(f"queue_command: {command=}") + self._incoming_command_queue.put_nowait(entry := QueueEntry(command=command)) + self.logger.debug( + f"incoming_command_queue size={self._incoming_command_queue.qsize()}" + ) + return entry.done_event + + async def _commands_consumer(self) -> None: + last_macro_command: commands.DefineMacro | None = None + while True: + self.logger.debug( + f"commands_consumer: going to read the queue {last_macro_command=}" + ) + had_timeout = False + read_queue_wrapped_coro_task = asyncio.Task( + self._incoming_command_queue.get(), + name=f"read_queue_wrapped_coro_task-{self._socket_connection.id.short}", + ) + try: + queue_item = await asyncio.wait_for( + read_queue_wrapped_coro_task, + timeout=self.queue_reader_timeout_seconds, + ) + except asyncio.TimeoutError: + self.logger.debug("timeout reading the command queue") + had_timeout = True + read_queue_wrapped_coro_task.cancel() + continue + except asyncio.CancelledError: + self.logger.debug( + "commands_consumer task cancelled! " + f"{read_queue_wrapped_coro_task.cancelled()=}" + ) + if not read_queue_wrapped_coro_task.cancelled(): + self._incoming_command_queue.put_nowait(None) + read_queue_wrapped_coro_task.cancel() + self.logger.debug(f"{read_queue_wrapped_coro_task.cancelled()=}") + return + + self.logger.debug(f"commands_consumer: got {queue_item=}") + if queue_item is None: + self.logger.debug( + f"Got None on the incoming command queue. {had_timeout=}" + ) + if had_timeout: + continue + else: + return + + with DoneEventContextManager( + event=queue_item.done_event, logger=self.logger + ): + await self._process_queue_item(queue_item) + + async def _process_queue_item(self, queue_item: QueueEntry) -> None: + match queue_item.command: + case commands.OptionsNegotiate(): + # This one is an exception to the rule; implemented here. + await self.on_options_negotiate(queue_item.command) + case commands.DefineMacro(): + # A second exception; let's save the macro data to attach later when the + # actual command is seen (see below) and keep track of all macros seen + # during the session. + self.on_define_macro(queue_item.command) + self._last_macro_command = queue_item.command + case _: + if self._last_macro_command is not None: + self._attach_macros_to_command( + command=queue_item.command, + last_macro_command=self._last_macro_command, + ) + self._last_macro_command = None + response = await self.handle_command_in_app(command=queue_item.command) + if response is not None: + self.save_manipulations(manipulations=response.manipulations) + + # If it's End of Message, we have to send pending manipulations first. + if isinstance(queue_item.command, commands.EndOfMessage): + assert response is not None + self._manipulations_sent = True + self.logger.debug( + f"Sending {len(self._pending_manipulations)} manipulations " + "before end_of_message response." + ) + for manipulation_response in self._pending_manipulations: + await self._send_response(manipulation_response) + if response is not None: + await self._send_response(response) + + def _attach_macros_to_command( + self, *, command: commands.BaseCommand, last_macro_command: commands.DefineMacro + ) -> None: + if ( + last_macro_command is not None + and isinstance( + command, + commands.Connect + | commands.Helo + | commands.MailFrom + | commands.RcptTo + | commands.Data + | commands.Header + | commands.EndOfHeaders + | commands.BodyChunk + | commands.EndOfMessage + | commands.Unknown, + ) + and commands.DefineMacro.command_char_to_stage.get(command.command_char) + == last_macro_command.stage + ): + command.macros = last_macro_command.macros.copy() + + async def _stop_commands_consumer(self) -> None: + task = self._commands_consumer_task + if task.done(): + exception = task.exception() + else: + exception = None + self.logger.debug( + f"_stop_commands_consumer [task={task.get_name()}, exception={exception}, " + f"cancelled={task.cancelled()}]" + ) + if not task.cancelled(): + task.cancel() + try: + await asyncio.wait_for(task, 0.1) + except asyncio.TimeoutError: + task.cancel() + + async def close_bottom_up(self) -> None: + self.logger.debug("close_bottom_up") + await self._stop_commands_consumer() + await self._app.on_mta_close_connection() + + async def close_top_down(self) -> None: + self.logger.debug("close_top_down") + await self._socket_connection.close_top_down() + await self._stop_commands_consumer() + + async def _send_response( + self, response: responses.AbstractResponse | responses.AbstractManipulation + ) -> None: + await self._socket_connection.write_response(response.encode()) + + async def on_options_negotiate(self, command: commands.OptionsNegotiate) -> None: + self.logger.debug("on_options_negotiate") + response = responses.OptionsNegotiateResponse( + protocol_flags=self._app.protocol_flags, + symbols_for_stage=self._app.symbols, + ) + await self._send_response(response) + + def on_define_macro(self, command: commands.DefineMacro) -> None: + self.logger.debug(f"on_define_macro {command.macros=}") + self.macros_per_stage[command.stage] = command.macros.copy() + for key, value in command.macros.items(): + self.all_macros[key] = value + + def save_manipulations( + self, *, manipulations: list[responses.AbstractManipulation] + ) -> None: + if self._manipulations_sent: + self.logger.warning( + "Adding manipulations after End of Message callback is not allowed; " + f"ignoring: {manipulations}." + ) + return + self.logger.debug( + f"Adding {len(manipulations)} to current list of length " + f"{len(self._pending_manipulations)}" + ) + self._pending_manipulations.extend(manipulations) + + async def handle_command_in_app( + self, command: commands.BaseCommand + ) -> responses.AbstractResponse | None: + self.logger.debug(f"handle_command_in_app {command=}") + match command: + case commands.Connect(): + return await self._app.on_connect(command) + case commands.Helo(): + return await self._app.on_helo(command) + case commands.MailFrom(): + return await self._app.on_mail_from(command) + case commands.RcptTo(): + return await self._app.on_rcpt_to(command) + case commands.Data(): + return await self._app.on_data(command) + case commands.Header(): + return await self._app.on_header(command) + case commands.EndOfHeaders(): + return await self._app.on_end_of_headers(command) + case commands.BodyChunk(): + return await self._app.on_body_chunk(command) + case commands.EndOfMessage(): + return await self._app.on_end_of_message(command) + case commands.Unknown(): + return await self._app.on_unknown(command) + case commands.Abort(): + await self._app.on_abort(command) + return None + case commands.Quit(): + await self._app.on_quit(command) + return None + case _: + raise NotImplementedError( + f"Command {command.__class__.__name__} not implemented" + ) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/api/__init__.py b/tests/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/api/test_application.py b/tests/api/test_application.py new file mode 100644 index 0000000..66ac075 --- /dev/null +++ b/tests/api/test_application.py @@ -0,0 +1,67 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 + +# pyright: reportPrivateUsage=false +from __future__ import annotations + +import pytest + +from purepythonmilter.api.application import ProgrammingError, PurePythonMilter +from purepythonmilter.protocol import commands, responses + +from ..conftest import FakeMtaMilterSession + + +def test_fails_no_annotation_return_type() -> None: + async def myhook(cmd: commands.Connect): # type: ignore[no-untyped-def] + return None + + with pytest.raises(ProgrammingError): + PurePythonMilter(hook_on_connect=myhook) + + +def test_set_noreturn_callback_by_annotation() -> None: + async def myhook(cmd: commands.Connect) -> None: + return None + + ppm = PurePythonMilter(hook_on_connect=myhook) + assert not ppm._request_proto_flags.reply_connect + + +def test_set_return_callback_by_annotation() -> None: + async def myhook(cmd: commands.Connect) -> responses.BaseVerdictNoData: + return responses.Accept() + + ppm = PurePythonMilter(hook_on_connect=myhook) + assert ppm._request_proto_flags.reply_connect + + +@pytest.mark.asyncio +async def test_basemilter_end_of_message_none_to_continue( + fake_session: FakeMtaMilterSession, +) -> None: + async def myhook(cmd: commands.EndOfMessage) -> None: + return None + + ppm = PurePythonMilter(hook_on_end_of_message=myhook) + basemilter = ppm._get_factory()(session=fake_session) + ret = await basemilter.on_end_of_message( + commands.EndOfMessage(data_raw=commands.CommandDataRaw(b"")) + ) + assert isinstance(ret, responses.Continue) + + +@pytest.mark.asyncio +async def test_basemilter_end_of_message_not_none_kept( + fake_session: FakeMtaMilterSession, +) -> None: + async def myhook(cmd: commands.EndOfMessage) -> responses.Accept: + return responses.Accept() + + ppm = PurePythonMilter(hook_on_end_of_message=myhook) + basemilter = ppm._get_factory()(session=fake_session) + ret = await basemilter.on_end_of_message( + commands.EndOfMessage(data_raw=commands.CommandDataRaw(b"")) + ) + assert isinstance(ret, responses.Accept) diff --git a/tests/api/test_logger.py b/tests/api/test_logger.py new file mode 100644 index 0000000..fa49aae --- /dev/null +++ b/tests/api/test_logger.py @@ -0,0 +1,65 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import logging + +import pytest + +from purepythonmilter.api.logger import ConnectionContextLogger +from purepythonmilter.api.models import MilterServerConnectionID, connection_id_context + + +def test_connectioncontext_logger_no_connection_id_no_extra( + caplog: pytest.LogCaptureFixture, +) -> None: + logger = ConnectionContextLogger().get("mylogger") + with caplog.at_level(logging.ERROR): + logger.error("foo") + messages = [rec.message for rec in caplog.records if rec.levelno >= logging.ERROR] + assert messages == ["NONE: foo"] + + +def test_connectioncontext_logger_no_connection_id_but_extras( + caplog: pytest.LogCaptureFixture, +) -> None: + logger = ConnectionContextLogger().get( + "mylogger", + extra_contexts={"myctx": "myval", "myint": 123}, + ) + with caplog.at_level(logging.ERROR): + logger.error("foo") + messages = [rec.message for rec in caplog.records if rec.levelno >= logging.ERROR] + assert messages == ["NONE: foo [myctx=myval, myint=123]"] + + +def test_connectioncontext_logger_connection_id_at_start_extras( + caplog: pytest.LogCaptureFixture, +) -> None: + token = connection_id_context.set(MilterServerConnectionID(bytes=b"\x02" * 16)) + logger = ConnectionContextLogger().get( + "mylogger", + extra_contexts={"myctx": "myval", "myint": 123}, + ) + with caplog.at_level(logging.ERROR): + logger.error("foo") + connection_id_context.reset(token) + messages = [rec.message for rec in caplog.records if rec.levelno >= logging.ERROR] + assert messages == ["02020202: foo [myctx=myval, myint=123]"] + + +def test_connectioncontext_logger_connection_id_later_extras( + caplog: pytest.LogCaptureFixture, +) -> None: + logger = ConnectionContextLogger().get( + "mylogger", + extra_contexts={"myctx": "myval", "myint": 123}, + ) + with caplog.at_level(logging.ERROR): + token = connection_id_context.set(MilterServerConnectionID(bytes=b"\x04" * 16)) + logger.error("foo") + connection_id_context.reset(token) + messages = [rec.message for rec in caplog.records if rec.levelno >= logging.ERROR] + assert messages == ["04040404: foo [myctx=myval, myint=123]"] diff --git a/tests/api/test_models.py b/tests/api/test_models.py new file mode 100644 index 0000000..5f80135 --- /dev/null +++ b/tests/api/test_models.py @@ -0,0 +1,126 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import pytest + +from purepythonmilter.api.models import RequestProtocolFlags + + +def test_request_protocol_flags_default() -> None: + r = RequestProtocolFlags() + pf, af = r.encode_to_flags_bitmask() + expected_pf = ( + 0x00000001 # SMFIP_NOCONNECT / call_connect: bool = False + | 0x00000002 # SMFIP_NOHELO / call_helo: bool = False + | 0x00000004 # SMFIP_NOMAIL / call_mail_from: bool = False + | 0x00000008 # SMFIP_NORCPT / call_rcpt_to: bool = False + # | 0x00000800 # SMFIP_RCPT_REJ / call_rcpt_to_rejected: bool = False + | 0x00000200 # SMFIP_NODATA / call_data: bool = False + | 0x00000020 # SMFIP_NOHDRS / call_headers: bool = False + | 0x00000040 # SMFIP_NOEOH / call_end_of_headers: bool = False + | 0x00000010 # SMFIP_NOBODY / call_body_chunk: bool = False + | 0x00000100 # SMFIP_NOUNKNOWN / call_unknown: bool = False + | 0x00001000 # SMFIP_NR_CONN / reply_connect: bool = False + | 0x00002000 # SMFIP_NR_HELO / reply_helo: bool = False + | 0x00004000 # SMFIP_NR_MAIL / reply_mail_from: bool = False + | 0x00008000 # SMFIP_NR_RCPT / reply_rcpt_to: bool = False + | 0x00010000 # SMFIP_NR_DATA / reply_data: bool = False + | 0x00020000 # SMFIP_NR_UNKN / reply_unknown: bool = False + | 0x00000080 # SMFIP_NR_HDR / SMFIP_NOHREPL / reply_headers: bool = False + | 0x00040000 # SMFIP_NR_EOH / reply_end_of_headers: bool = False + | 0x00080000 # SMFIP_NR_BODY / reply_body_chunk: bool = False + | 0x00000400 # SMFIP_SKIP / can_skip_body_chunks: bool = True + # | 0x00100000 # SMFIP_HDR_LEADSPC / headers_with_leading_space: bool = False + ) + expected_af = ( + 0x00000000 # Dummy for next lines as comments preserving '|' + # | 0x00000001 # SMFIF_ADDHDRS + # | 0x00000002 # SMFIF_CHGBODY + # | 0x00000004 # SMFIF_ADDRCPT + # | 0x00000080 # SMFIF_ADDRCPT_PAR + # | 0x00000008 # SMFIF_DELRCPT + # | 0x00000010 # SMFIF_CHGHDRS + # | 0x00000020 # SMFIF_QUARANTINE + # | 0x00000040 # SMFIF_CHGFROM + | 0x00000100 # SMFIF_SETSYMLIST + ) + + assert f"{pf:#08x}" == f"{expected_pf:#08x}" + assert f"{af:#08x}" == f"{expected_af:#08x}" + + +@pytest.mark.parametrize( + "request_obj,flag_test", + [ + pytest.param( + RequestProtocolFlags(call_rcpt_to_rejected=True), + 0x00000800, # SMFIP_RCPT_REJ + id="call_rcpt_to_rejected", + ), + pytest.param( + RequestProtocolFlags(headers_with_leading_space=True), + 0x00100000, # SMFIP_HDR_LEADSPC + id="headers_with_leading_space", + ), + ], +) +def test_request_protocol_flags_non_default_protocol( + request_obj: RequestProtocolFlags, flag_test: int +) -> None: + pf, _ = request_obj.encode_to_flags_bitmask() + assert pf & flag_test + + +@pytest.mark.parametrize( + "request_obj,flag_test", + [ + pytest.param( + RequestProtocolFlags(can_add_headers=True), + 0x00000001, # SMFIF_ADDHDRS + id="can_add_headers", + ), + pytest.param( + RequestProtocolFlags(can_change_body=True), + 0x00000002, # SMFIF_CHGBODY + id="can_change_body", + ), + pytest.param( + RequestProtocolFlags(can_add_recipients=True), + 0x00000004, # SMFIF_ADDRCPT + id="can_add_recipients", + ), + pytest.param( + RequestProtocolFlags(can_add_recipients_with_esmtp_args=True), + 0x00000080, # SMFIF_ADDRCPT_PAR + id="can_add_recipients_with_esmtp_args", + ), + pytest.param( + RequestProtocolFlags(can_remove_recipients=True), + 0x00000008, # SMFIF_DELRCPT + id="can_remove_recipients", + ), + pytest.param( + RequestProtocolFlags(can_change_headers=True), + 0x00000010, # SMFIF_CHGHDRS + id="can_change_headers", + ), + pytest.param( + RequestProtocolFlags(can_quarantine=True), + 0x00000020, # SMFIF_QUARANTINE + id="can_quarantine", + ), + pytest.param( + RequestProtocolFlags(can_change_mail_from=True), + 0x00000040, # SMFIF_CHGFROM + id="can_change_mail_from", + ), + ], +) +def test_request_protocol_flags_non_default_actions( + request_obj: RequestProtocolFlags, flag_test: int +) -> None: + _, af = request_obj.encode_to_flags_bitmask() + assert af & flag_test diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..dd55089 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,259 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 + +# pyright: reportPrivateUsage=false +from __future__ import annotations + +import asyncio +import logging +from collections.abc import AsyncGenerator, Callable, Generator +from typing import TYPE_CHECKING, Any +from unittest.mock import AsyncMock + +import attrs +import pytest +import pytest_asyncio + +from purepythonmilter.api import models +from purepythonmilter.api.application import PurePythonMilter +from purepythonmilter.api.interfaces import ( + AbstractMilterApp, + AbstractMtaMilterConnectionHandler, + AbstractMtaMilterSession, + MilterAppFactory, +) +from purepythonmilter.protocol import payload, responses +from purepythonmilter.server import session + +logger = logging.getLogger(__name__) + + +@pytest.fixture() +def event_loop() -> Generator[asyncio.AbstractEventLoop, None, None]: + """ + Same reason as in https://stackoverflow.com/a/72104554, we need to override the + built-in fixture properly close the loop in teardown. Not before all tasks have been + awaited to finish. Similar to high-level functions as asyncio.run() do. + """ + loop = asyncio.get_event_loop_policy().new_event_loop() + yield loop + for i in range(1, 51): + pending_tasks = [task for task in asyncio.all_tasks(loop) if not task.done()] + if n_tasks := len(pending_tasks): + if i % 5 == 0: + logger.warning(f"Still {n_tasks} pending tasks...") + logger.debug(f"{pending_tasks=}") + loop.run_until_complete(asyncio.sleep(0.001 * i)) + else: + break + else: + pending_tasks = [task for task in asyncio.all_tasks(loop) if not task.done()] + raise Exception( + f"Still having { len(pending_tasks)} pending tasks... {pending_tasks=}" + ) + loop.close() + + +@pytest.fixture() +def full_conversation_packets() -> list[bytes]: + return [ + # 0: Options negotiate + b"\x00\x00\x00\rO\x00\x00\x00\x06\x00\x00\x01\xff\x00\x1f\xff\xff", + # 1: Connect + b"\x00\x00\x00\x1cC[172.17.0.1]\x004\x81|172.17.0.1\x00", + # 2: Helo + b"\x00\x00\x00\x0eH[172.17.0.1]\x00", + # 3: Mail From + b"\x00\x00\x00:M\x00BODY=8BITMIME\x00" + b"SIZE=466\x00", + # 4: Rcpt To + b"\x00\x00\x00\x10R\x00", + # 5: Data + b"\x00\x00\x00\x01T", + # 6, 7: Header, another Header + b"\x00\x00\x00BLMessage-ID\x00<5037ef9b-0616-86fd-0561-b0f3c198edc4" + b"@gertvandijk.nl>\x00", + b"\x00\x00\x00YLUser-Agent\x00Mozilla/5.0 (X11; Linux x86_64; rv:91.0) " + b"Gecko/20100101\n Thunderbird/91.10.0\x00", + # 8: End of Headers + b"\x00\x00\x00\x01N", + # 9: Body Chunk + b"\x00\x00\x00\x19Btest\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n", + # 10: End of Message + b"\x00\x00\x00\x01E", + # 11: Some random Unknown command + b"\x00\x00\x00\x06UHELP\x00", + # 12: Abort, another Abort + b"\x00\x00\x00\x01A", + b"\x00\x00\x00\x01A", + # 13: Quit + b"\x00\x00\x00\x01Q", + ] + + +class MilterAppFactoryFixtureParams: + return_on_connect: Any = None + return_on_end_of_message: Any = responses.Continue() + return_on_unknown: Any = None + + +if TYPE_CHECKING: + + class FixtureRequest: + param: MilterAppFactoryFixtureParams | None + +else: + FixtureRequest = Any + + +@pytest_asyncio.fixture # pyright: ignore reportUntypedFunctionDecorator +async def fake_app_factory( + request: FixtureRequest, # indirect parameter to specify mock return values +) -> MilterAppFactory: + mocked_return_on_connect: Any = None + # application.PurePythonMilter should translate None into responses.Continue(). + mocked_return_on_end_of_message = responses.Continue() + mocked_return_on_unknown: Any = None + if hasattr(request, "param") and request.param is not None: + mocked_return_on_connect = request.param.return_on_connect + mocked_return_on_end_of_message = request.param.return_on_end_of_message + mocked_return_on_unknown = request.param.return_on_unknown + + def app_factory(session: AbstractMtaMilterSession) -> AbstractMilterApp: + ppm = PurePythonMilter() + factory = ppm._get_factory() + app = factory(session=session) + app.on_connect = AsyncMock(return_value=mocked_return_on_connect) # type: ignore[assignment] # noqa: E501 + app.on_helo = AsyncMock(return_value=None) # type: ignore[assignment] + app.on_mail_from = AsyncMock(return_value=None) # type: ignore[assignment] + app.on_rcpt_to = AsyncMock(return_value=None) # type: ignore[assignment] + app.on_data = AsyncMock(return_value=None) # type: ignore[assignment] + app.on_header = AsyncMock(return_value=None) # type: ignore[assignment] + app.on_end_of_headers = AsyncMock(return_value=None) # type: ignore[assignment] + app.on_body_chunk = AsyncMock(return_value=None) # type: ignore[assignment] + app.on_end_of_message = AsyncMock(return_value=mocked_return_on_end_of_message) # type: ignore[assignment] # noqa: E501 + app.on_abort = AsyncMock(return_value=None) # type: ignore[assignment] + app.on_quit = AsyncMock(return_value=None) # type: ignore[assignment] + app.on_unknown = AsyncMock(return_value=mocked_return_on_unknown) # type: ignore[assignment] # noqa: E501 + return app + + return app_factory + + +@pytest_asyncio.fixture # pyright: ignore reportUntypedFunctionDecorator +async def fake_socket_connection( + fake_app_factory: MilterAppFactory, +) -> AbstractMtaMilterConnectionHandler: + class FakeStreamWriter: + def write(self, data: bytes) -> None: + pass + + def writelines(self, data: bytes) -> None: + pass + + def write_eof(self) -> None: + pass + + def can_write_eof(self) -> bool: + return True + + def close(self) -> None: + pass + + def is_closing(self) -> bool: + return False + + async def wait_closed(self) -> None: + pass + + def get_extra_info(self, name: str) -> None: + pass + + async def drain(self) -> None: + pass + + def sever_callback(connection_id: models.MilterServerConnectionID) -> None: + pass + + @attrs.define(auto_attribs=False) + class FakeSocketConnection(AbstractMtaMilterConnectionHandler): + _connection_id = models.MilterServerConnectionID.generate() + _reader = asyncio.StreamReader() + _writer = FakeStreamWriter() # type: ignore[assignment] + app_factory: MilterAppFactory = attrs.field() + _server_on_close_cb: Callable[ + [models.MilterServerConnectionID], None + ] = sever_callback + + @property + def id(self) -> models.MilterServerConnectionID: + return self._connection_id + + async def keep_reading_packets(self) -> None: + return None + + async def write_response( + self, payload: payload.Payload, *, drain: bool = False + ) -> None: + return None + + async def close_bottom_up(self) -> None: + return None + + async def close_top_down(self) -> None: + return None + + def session_error_callback(self, *, exception: BaseException) -> None: + return None + + return FakeSocketConnection(app_factory=fake_app_factory) + + +@attrs.define(auto_attribs=False) +class FakeMtaMilterSession(session.MtaMilterSession): + """ + MtaMilterSession that does not send responses down to the + MtaMilterConnectionHandler, but saves those which would have been sent. + """ + + responses_written: list[ + (responses.AbstractResponse | responses.AbstractManipulation) + ] = attrs.field(init=False, factory=list) + + async def _send_response( + self, response: responses.AbstractResponse | responses.AbstractManipulation + ) -> None: + self.responses_written.append(response) + + +@pytest_asyncio.fixture # pyright: ignore reportUntypedFunctionDecorator +async def fake_session( + fake_socket_connection: AbstractMtaMilterConnectionHandler, +) -> AsyncGenerator[FakeMtaMilterSession, None]: + mms = FakeMtaMilterSession( + socket_connection=fake_socket_connection, # pyright: ignore PylancereportGeneralTypeIssues # noqa: E501 + # Let's set a very short timeout to not have tests run so long. + queue_reader_timeout_seconds=0.01, + ) + + yield mms + if ( + mms._commands_consumer_task.done() + and (exc := mms._commands_consumer_task.exception()) is not None + ): + raise exc + assert not mms._commands_consumer_task.done() + mms._commands_consumer_task.cancel() + + +@pytest_asyncio.fixture # pyright: ignore reportUntypedFunctionDecorator +async def fake_session_should_fail( + fake_socket_connection: AbstractMtaMilterConnectionHandler, +) -> AsyncGenerator[FakeMtaMilterSession, None]: + mms = FakeMtaMilterSession( + socket_connection=fake_socket_connection # pyright: ignore PylancereportGeneralTypeIssues # noqa: E501 + ) + yield mms + assert mms._commands_consumer_task.done() + assert mms._commands_consumer_task.exception() is not None diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py new file mode 100644 index 0000000..99ca68a --- /dev/null +++ b/tests/integration/conftest.py @@ -0,0 +1,140 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 + +# pyright: reportPrivateUsage=false +from __future__ import annotations + +import asyncio +import logging +import time +from collections.abc import Generator +from typing import TYPE_CHECKING + +import pytest + +from purepythonmilter.api.application import PurePythonMilter +from purepythonmilter.server.milterserver import MilterServer + +if TYPE_CHECKING: + + class FixtureRequest: + param: PurePythonMilter + +else: + from typing import Any + + FixtureRequest = Any + +pytestmark = pytest.mark.asyncio + +logger = logging.getLogger(__name__) + +HOST_LOCALHOST = "127.0.0.1" + + +async def assert_read( + reader: asyncio.StreamReader, *, until_seen: bytes, timeout_ms: int = 1000 +) -> bytes: + bytes_read = b"" + start = time.time() + for _ in range(0, timeout_ms): + if time.time() - start > (float(timeout_ms) / 1000): + break + try: + bytes_read += await asyncio.wait_for(reader.read(1000), 0.001) + except asyncio.TimeoutError: + continue + if until_seen in bytes_read: + return bytes_read + await asyncio.sleep(0.001) + + raise RuntimeError( + f"Did not read expected {until_seen!r} within {timeout_ms=}, {bytes_read=!r}." + ) + + +async def assert_reader_closed( + reader: asyncio.StreamReader, *, timeout_ms: int = 1000 +) -> bytes: + bytes_read = b"" + start = time.time() + for _ in range(0, timeout_ms): + if time.time() - start > (float(timeout_ms) / 1000): + break + try: + bytes_read += await asyncio.wait_for(reader.read(-1), 0.001) + except asyncio.TimeoutError: + continue + if reader.at_eof(): + return bytes_read + await asyncio.sleep(0.001) + + raise RuntimeError( + f"Did not reach expected reader.at_eof() within {timeout_ms=}, {bytes_read=!r}." + ) + + +async def await_connection_count( + srv: MilterServer, *, count: int, timeout_ms: int = 1000 +) -> None: + for _ in range(0, timeout_ms): + if len(srv._connections) == count: + break + await asyncio.sleep(0.001) + else: + raise RuntimeError( + f"Did not see expected connection {count=} within {timeout_ms=}." + ) + + +async def _await_startup( + port: int, +) -> tuple[asyncio.StreamReader, asyncio.StreamWriter]: + for _ in range(0, 1000): + await asyncio.sleep(0.001) + logger.info("checking if server is accepting connections already...") + try: + reader, writer = await asyncio.open_connection(HOST_LOCALHOST, port) + except ConnectionRefusedError: + logger.info("no, will check again...") + continue + else: + return reader, writer + else: + raise RuntimeError("Server not accepting connections") + + +async def _await_shutdown(server_task: asyncio.Task[None]) -> None: + for _ in range(0, 1000): + await asyncio.sleep(0.001) + logger.info("checking if server is shut down...") + if server_task.done(): + logger.info("yes, done!") + break + else: + logger.info("no, will check again...") + + +@pytest.fixture() +def start_testserver( + request: FixtureRequest, # indirect parameter to specify app factory + event_loop: asyncio.AbstractEventLoop, + unused_tcp_port: int, + caplog: pytest.LogCaptureFixture, +) -> Generator[ + tuple[MilterServer, asyncio.StreamReader, asyncio.StreamWriter], None, None +]: + app = request.param + server_task = asyncio.ensure_future( + app.start_server(host=HOST_LOCALHOST, port=unused_tcp_port), loop=event_loop + ) + reader, writer = event_loop.run_until_complete(_await_startup(unused_tcp_port)) + assert app._milterserver is not None + + try: + yield app._milterserver, reader, writer + finally: + server_task.cancel() + event_loop.run_until_complete(_await_shutdown(server_task)) + assert not [rec for rec in caplog.records if rec.levelno >= logging.WARNING] diff --git a/tests/integration/examples/__init__.py b/tests/integration/examples/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/integration/examples/test_append_header_ip.py b/tests/integration/examples/test_append_header_ip.py new file mode 100644 index 0000000..25a11c9 --- /dev/null +++ b/tests/integration/examples/test_append_header_ip.py @@ -0,0 +1,62 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 + +# pyright: reportPrivateUsage=false +from __future__ import annotations + +import asyncio +import logging + +import pytest + +from purepythonmilter.examples.append_header_ip.__main__ import append_header_ip_milter +from purepythonmilter.server.milterserver import MilterServer + +from ..conftest import assert_read, await_connection_count + +pytestmark = pytest.mark.asyncio + + +@pytest.mark.parametrize( + "start_testserver", [pytest.param(append_header_ip_milter)], indirect=True +) +async def test_append_header_ip( + start_testserver: tuple[MilterServer, asyncio.StreamReader, asyncio.StreamWriter], + caplog: pytest.LogCaptureFixture, + event_loop: asyncio.AbstractEventLoop, + full_conversation_packets: list[bytes], +) -> None: + caplog.set_level(logging.WARNING) + srv, reader, writer = start_testserver + + await await_connection_count(srv, count=1) + + for packet in full_conversation_packets[:2]: + writer.write(packet) + await writer.drain() + + data1 = await assert_read(reader, until_seen=b"\x00\x00\x00\x01c") + assert b"hX-UNSET" not in data1 + + for packet in full_conversation_packets[2:11]: + writer.write(packet) + await writer.drain() + + await assert_read( + reader, + until_seen=b"\x00\x00\x00\x14hX-UNSET\x00172.17.0.1\x00\x00\x00\x00\x01c", + ) + + for packet in full_conversation_packets[11:]: + writer.write(packet) + await writer.drain() + + writer.close() + await writer.wait_closed() + + data_after_eom = await reader.read(-1) + assert not data_after_eom + + await await_connection_count(srv, count=0) + assert not [rec for rec in caplog.records if rec.levelno >= logging.INFO] diff --git a/tests/integration/examples/test_change_body.py b/tests/integration/examples/test_change_body.py new file mode 100644 index 0000000..2eab213 --- /dev/null +++ b/tests/integration/examples/test_change_body.py @@ -0,0 +1,66 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 + +# pyright: reportPrivateUsage=false +from __future__ import annotations + +import asyncio +import logging + +import pytest + +from purepythonmilter.examples.change_body.__main__ import change_body_milter +from purepythonmilter.server.milterserver import MilterServer + +from ..conftest import assert_read, await_connection_count + +pytestmark = pytest.mark.asyncio + + +@pytest.mark.parametrize( + "start_testserver", [pytest.param(change_body_milter)], indirect=True +) +async def test_change_body( + start_testserver: tuple[MilterServer, asyncio.StreamReader, asyncio.StreamWriter], + caplog: pytest.LogCaptureFixture, + event_loop: asyncio.AbstractEventLoop, + full_conversation_packets: list[bytes], +) -> None: + caplog.set_level(logging.WARNING) + srv, reader, writer = start_testserver + + await await_connection_count(srv, count=1) + + for packet in full_conversation_packets[:10]: + writer.write(packet) + await writer.drain() + + data1 = await assert_read( + reader, + # empty set for MacroStage.END_OF_MESSAGE (5) as part of + # OptionsNegotiateResponse + until_seen=b"\x00\x00\x00\x05\x00", + ) + assert b"foobar" not in data1 + + writer.write(full_conversation_packets[10]) + await writer.drain() + + await assert_read( + reader, + until_seen=b"\x07bfoobar\x00\x00\x00\x01c", + ) + + for packet in full_conversation_packets[11:]: + writer.write(packet) + await writer.drain() + + writer.close() + await writer.wait_closed() + + data_after_eom = await reader.read(-1) + assert not data_after_eom + + await await_connection_count(srv, count=0) + assert not [rec for rec in caplog.records if rec.levelno >= logging.INFO] diff --git a/tests/integration/examples/test_debug_log_all.py b/tests/integration/examples/test_debug_log_all.py new file mode 100644 index 0000000..00e3358 --- /dev/null +++ b/tests/integration/examples/test_debug_log_all.py @@ -0,0 +1,59 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 + +# pyright: reportPrivateUsage=false +from __future__ import annotations + +import asyncio +import logging + +import pytest + +from purepythonmilter.examples.debug_log_all.__main__ import debug_log_all_milter +from purepythonmilter.server.milterserver import MilterServer + +from ..conftest import assert_read, await_connection_count + +pytestmark = pytest.mark.asyncio + + +@pytest.mark.parametrize( + "start_testserver", [pytest.param(debug_log_all_milter)], indirect=True +) +async def test_debug_log_all( + start_testserver: tuple[MilterServer, asyncio.StreamReader, asyncio.StreamWriter], + caplog: pytest.LogCaptureFixture, + event_loop: asyncio.AbstractEventLoop, + full_conversation_packets: list[bytes], +) -> None: + caplog.set_level(logging.WARNING) + srv, reader, writer = start_testserver + + await await_connection_count(srv, count=1) + + for packet in full_conversation_packets[:1]: + writer.write(packet) + await writer.drain() + + await assert_read(reader, until_seen=b"O\x00\x00\x00\x06") + + for packet in full_conversation_packets[1:11]: + writer.write(packet) + await writer.drain() + + data_connect_to_eom = await assert_read(reader, until_seen=b"\x00\x00\x00\x01c") + assert data_connect_to_eom == b"\x00\x00\x00\x01c" + + for packet in full_conversation_packets[11:]: + writer.write(packet) + await writer.drain() + + writer.close() + await writer.wait_closed() + + data_after_eom = await reader.read(-1) + assert not data_after_eom + + await await_connection_count(srv, count=0) + assert not [rec for rec in caplog.records if rec.levelno >= logging.INFO] diff --git a/tests/integration/test_server.py b/tests/integration/test_server.py new file mode 100644 index 0000000..ce319e2 --- /dev/null +++ b/tests/integration/test_server.py @@ -0,0 +1,141 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 + +# pyright: reportPrivateUsage=false +from __future__ import annotations + +import asyncio +import logging + +import pytest + +from purepythonmilter.api.application import PurePythonMilter +from purepythonmilter.protocol import commands +from purepythonmilter.server.milterserver import MilterServer + +from .conftest import assert_read, assert_reader_closed, await_connection_count + +pytestmark = pytest.mark.asyncio + +logger = logging.getLogger(__name__) +HOST_LOCALHOST = "127.0.0.1" + + +on_connect_call_seen: commands.BaseCommand | None = None + + +async def on_connect(cmd: commands.Connect) -> None: + global on_connect_call_seen + on_connect_call_seen = cmd + + +mytestmilter_on_connect = PurePythonMilter( + name="mytestmilter_on_connect", + hook_on_connect=on_connect, +) + +mytestmilter_no_hooks = PurePythonMilter(name="mytestmilter_no_hooks") + + +@pytest.mark.parametrize( + "start_testserver", [pytest.param(mytestmilter_on_connect)], indirect=True +) +async def test_server_basic( + start_testserver: tuple[MilterServer, asyncio.StreamReader, asyncio.StreamWriter], + caplog: pytest.LogCaptureFixture, + event_loop: asyncio.AbstractEventLoop, + full_conversation_packets: list[bytes], +) -> None: + global on_connect_call_seen + caplog.set_level(logging.WARNING) + srv, reader, writer = start_testserver + await await_connection_count(srv, count=1) + + for packet in full_conversation_packets[:1]: + writer.write(packet) + await writer.drain() + await assert_read(reader, until_seen=b"O") + + for packet in full_conversation_packets[1:11]: + writer.write(packet) + await writer.drain() + data_until_eom = await assert_read(reader, until_seen=b"\x00\x00\x00\x01c") + assert data_until_eom == b"\x00\x00\x00\x01c" + assert isinstance(on_connect_call_seen, commands.Connect) + + for packet in full_conversation_packets[11:]: + writer.write(packet) + await writer.drain() + writer.close() + await writer.wait_closed() + assert not await reader.read(-1) + + await await_connection_count(srv, count=0) + + assert not [rec for rec in caplog.records if rec.levelno >= logging.WARNING] + on_connect_call_seen = None + + +@pytest.mark.parametrize( + "start_testserver", [pytest.param(mytestmilter_no_hooks)], indirect=True +) +async def test_server_basic_nohooks( + start_testserver: tuple[MilterServer, asyncio.StreamReader, asyncio.StreamWriter], + caplog: pytest.LogCaptureFixture, + event_loop: asyncio.AbstractEventLoop, + full_conversation_packets: list[bytes], +) -> None: + srv, reader, writer = start_testserver + await await_connection_count(srv, count=1) + + for packet in full_conversation_packets[:1]: + writer.write(packet) + await writer.drain() + await assert_read(reader, until_seen=b"O") + + for packet in full_conversation_packets[1:11]: + writer.write(packet) + await writer.drain() + data_until_eom = await assert_read(reader, until_seen=b"\x00\x00\x00\x01c") + assert data_until_eom == b"\x00\x00\x00\x01c" + + for packet in full_conversation_packets[11:]: + writer.write(packet) + await writer.drain() + writer.close() + await writer.wait_closed() + assert not await reader.read(-1) + + await await_connection_count(srv, count=0) + + assert not [rec for rec in caplog.records if rec.levelno >= logging.WARNING] + + +@pytest.mark.parametrize( + "start_testserver", [pytest.param(mytestmilter_on_connect)], indirect=True +) +async def test_server_protocol_violation_close_connection( + start_testserver: tuple[MilterServer, asyncio.StreamReader, asyncio.StreamWriter], + caplog: pytest.LogCaptureFixture, + event_loop: asyncio.AbstractEventLoop, +) -> None: + global on_connect_call_seen + caplog.set_level(logging.WARNING) + srv, reader, writer = start_testserver + await await_connection_count(srv, count=1) + + options_negotiate_packet = ( + b"\xf0\x00\x00\rO\x00\x00\x00\x06\x00\x00\x01\xff\x00\x1f\xff\xff" + ) + writer.write(options_negotiate_packet) + await writer.drain() + data_until_closed = await assert_reader_closed(reader) + assert not data_until_closed + + assert on_connect_call_seen is None + + warnings = [rec for rec in caplog.records if rec.levelno >= logging.WARNING] + assert len(warnings) == 1 + assert "Protocol violation, going to close the connection." in warnings[0].message + on_connect_call_seen = None diff --git a/tests/protocol/__init__.py b/tests/protocol/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/protocol/test_commands.py b/tests/protocol/test_commands.py new file mode 100644 index 0000000..7f5589b --- /dev/null +++ b/tests/protocol/test_commands.py @@ -0,0 +1,852 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import ipaddress +import logging +import struct +from collections.abc import Generator, Sequence +from typing import Any, ClassVar, Final + +import attrs +import pytest + +from purepythonmilter.api import models +from purepythonmilter.protocol import definitions +from purepythonmilter.protocol.commands import ( + Abort, + BaseCommand, + BodyChunk, + CommandDataRaw, + Connect, + DefineMacro, + Header, + Helo, + MailFrom, + OptionsNegotiate, + Quit, + RcptTo, + Unknown, + chars_to_command_registry, +) +from purepythonmilter.protocol.exceptions import ProtocolViolationCommandData + +optneg_data_no_flags: Final[CommandDataRaw] = CommandDataRaw( + # Just protocol version 6 and no other capabilities. + b"\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00" +) + + +def _make_options_negotiate_data( + flag: definitions.ProtocolFlagsAllType | definitions.ActionFlags | None, + version_mask: int = 0, +) -> CommandDataRaw: + version_int = ( + int.from_bytes(optneg_data_no_flags[0:4], "big", signed=False) | version_mask + ) + action_int = int.from_bytes(optneg_data_no_flags[4:8], "big", signed=False) + proto_int = int.from_bytes(optneg_data_no_flags[8:12], "big", signed=False) + match flag: + case ( + definitions.ProtocolFlagsDisableCallback() + | definitions.ProtocolFlagsOther() + ): + proto_int |= flag.value + case definitions.ActionFlags(): + action_int |= flag.value + case None: + pass + return CommandDataRaw(struct.pack("!III", version_int, action_int, proto_int)) + + +def _assert_nothing_logged(records: Sequence[logging.LogRecord]) -> None: + assert not [rec for rec in records if rec.levelno >= logging.INFO] + + +@pytest.mark.parametrize( + "data,attributes_enabled", + [ + pytest.param( + optneg_data_no_flags, + [], + id="empty", + ), + pytest.param( + _make_options_negotiate_data(definitions.ActionFlags.ADD_HEADERS), + [attrs.fields(models.MtaSupportsProtocolFlags).allows_add_headers], + id="allows-add-headers", + ), + pytest.param( + _make_options_negotiate_data(definitions.ProtocolFlagsDisableCallback.DATA), + [attrs.fields(models.MtaSupportsProtocolFlags).disable_call_data], + id="disable-call-data", + ), + ], +) +def test_options_negotiate_ok_but_warn( + data: CommandDataRaw, + attributes_enabled: Sequence[attrs.Attribute[models.MtaSupportsProtocolFlags]], + caplog: pytest.LogCaptureFixture, +) -> None: + cmd_flags = OptionsNegotiate(data_raw=data).flags + for flags_attribute in attrs.fields(models.MtaSupportsProtocolFlags): + assert getattr(cmd_flags, flags_attribute.name) == ( + flags_attribute in attributes_enabled + ) + # None of the protocol flags sent are normal. + warnings_logged = [rec for rec in caplog.records if rec.levelno >= logging.WARNING] + assert len(warnings_logged) == 1 + assert ( + "This MTA connection does not support all protocol flags. Are you using a " + "modern Postfix? Milter may misbehave." + ) in warnings_logged[0].msg + + +def test_options_negotiate_ok_normal_modern_postfix( + caplog: pytest.LogCaptureFixture, +) -> None: + OptionsNegotiate(data_raw=b"\x00\x00\x00\x06\x00\x00\x01\xff\x00\x1f\xff\xff") + _assert_nothing_logged(caplog.records) + + +@pytest.mark.parametrize( + "data", + [ + pytest.param( + CommandDataRaw(b"\x00"), + id="empty", + ), + pytest.param( + CommandDataRaw(b""), + id="empty", + ), + pytest.param( + CommandDataRaw(bytes(optneg_data_no_flags) + b"\x00"), + id="too-long", + ), + pytest.param( + CommandDataRaw(_make_options_negotiate_data(None, version_mask=7)), + id="unsupported-protocol-version", + ), + ], +) +def test_options_negotiate_invalid( + data: CommandDataRaw, + caplog: pytest.LogCaptureFixture, +) -> None: + with pytest.raises(ProtocolViolationCommandData): + OptionsNegotiate(data_raw=data) + _assert_nothing_logged(caplog.records) + + +def test_command_str_repr() -> None: + o = OptionsNegotiate(data_raw=b"\x00\x00\x00\x06\x00\x00\x01\xff\x00\x1f\xff\xff") + assert str(o) == "OptionsNegotiate command [data=<12 bytes>]" + assert "logger=" not in repr(o) + + +def test_command_eq() -> None: + o1 = OptionsNegotiate(data_raw=b"\x00\x00\x00\x06\x00\x00\x01\xff\x00\x1f\xff\xff") + o2 = OptionsNegotiate(data_raw=b"\x00\x00\x00\x06\x00\x00\x01\xff\x00\x1f\xff\xff") + # Would fail if logger attribute is not excluded in __eq__. + assert o1 == o2 + + +@pytest.mark.parametrize( + "data,stage,expected_macros", + [ + pytest.param( + CommandDataRaw(b"C"), + definitions.MacroStage.CONNECT, + {}, + id="for-connect-empty", + ), + pytest.param( + CommandDataRaw( + b"Cj\x00myhost.sub.example.com\x00{daemon_addr}\x00172.17.0.2\x00" + ), + definitions.MacroStage.CONNECT, + {"j": "myhost.sub.example.com", "{daemon_addr}": "172.17.0.2"}, + id="for-connect-with-data", + ), + pytest.param( + CommandDataRaw(b"H"), + definitions.MacroStage.HELO, + {}, + id="for-helo-empty", + ), + pytest.param( + CommandDataRaw(b"U"), + definitions.MacroStage.UNKNOWN, + {}, + id="for-unknown-empty", + ), + ], +) +def test_define_macro_ok( + data: CommandDataRaw, + stage: definitions.MacroStage, + expected_macros: dict[str, str], + caplog: pytest.LogCaptureFixture, +) -> None: + dm = DefineMacro(data_raw=data) + assert dm.stage == stage + assert dm.macros == expected_macros + _assert_nothing_logged(caplog.records) + + +@pytest.mark.parametrize( + "data", + [ + pytest.param( + CommandDataRaw(b""), + id="empty", + ), + pytest.param( + CommandDataRaw(b"\x01"), + id="invalid-command-without-data", + ), + pytest.param( + CommandDataRaw(b"\x01j\x00myhost.sub.example.com\x00"), + id="invalid-command-with-data", + ), + pytest.param( + CommandDataRaw( + b"Cj\x00myhost.sub.example.com\x00{daemon_addr}\x00172.17.0.2" + ), + id="data-missing-null-termination", + ), + pytest.param( + CommandDataRaw( + b"Cj\x00myhost.sub.example.com{daemon_addr}\x00172.17.0.2\x00" + ), + id="data-invalid-num-separators", + ), + pytest.param( + CommandDataRaw( + b"Cj\x00myhost.sub.example.com\x00{\xffaemon_addr}\x00172.17.0.2\x00" + ), + id="utf8-impossible-byte-3.5.1-symbol", + ), + pytest.param( + CommandDataRaw( + b"Cj\x00myhost.sub.example.com\x00{daemon_addr}\x00172.\xff7.0.2\x00" + ), + id="utf8-impossible-byte-3.5.1-value", + ), + ], +) +def test_define_macro_invalid( + data: CommandDataRaw, + caplog: pytest.LogCaptureFixture, +) -> None: + with pytest.raises(ProtocolViolationCommandData): + DefineMacro(data_raw=data) + _assert_nothing_logged(caplog.records) + + +@pytest.mark.parametrize( + "data,expected_connection_info_args", + [ + pytest.param( + CommandDataRaw(b"[172.17.0.1]\x004\xc36172.17.0.1\x00"), + models.ConnectionInfoArgsIPv4( + hostname="[172.17.0.1]", + addr=ipaddress.IPv4Address("172.17.0.1"), + port=49974, + ), + id="example-ipv4-no-reverse", + ), + pytest.param( + CommandDataRaw(b"myhostname.mydomain.tld\x004\xc36172.17.0.1\x00"), + models.ConnectionInfoArgsIPv4( + hostname="myhostname.mydomain.tld", + addr=ipaddress.IPv4Address("172.17.0.1"), + port=49974, + ), + id="example-ipv4-with-reverse", + ), + pytest.param( + CommandDataRaw( + b"[2607:f8b0:4864:20::748]\x006\xa3\x162607:f8b0:4864:20::748\x00" + ), + models.ConnectionInfoArgsIPv6( + hostname="[2607:f8b0:4864:20::748]", + addr=ipaddress.IPv6Address("2607:f8b0:4864:20::748"), + port=41750, + ), + id="example-ipv6-no-reverse", + ), + pytest.param( + CommandDataRaw( + b"mail-oi1-x234.google.com\x006\x82.2607:f8b0:4864:20::234\x00" + ), + models.ConnectionInfoArgsIPv6( + hostname="mail-oi1-x234.google.com", + addr=ipaddress.IPv6Address("2607:f8b0:4864:20::234"), + port=33326, + ), + id="example-ipv6-with-reverse", + ), + pytest.param( + CommandDataRaw(b"ignored_hostname\x00L\x00\x00/run/mysock\x00"), + models.ConnectionInfoArgsUnixSocket( + path="/run/mysock", + ), + id="example-unix-socket", + ), + pytest.param( + CommandDataRaw(b"ignored_hostname\x00L\x00\x00/run/\xc3\xb1ysock\x00"), + models.ConnectionInfoArgsUnixSocket( + path="/run/ñysock", + ), + id="example-unix-socket-utf8", # TODO: verify this is sent as listed here. + ), + pytest.param( + CommandDataRaw(b"unknown\x00U"), + models.ConnectionInfoUnknown(description="unknown"), + id="unknown", + ), + ], +) +def test_connect_ok( + data: CommandDataRaw, + expected_connection_info_args: models.ConnectionInfoArgs, + caplog: pytest.LogCaptureFixture, +) -> None: + assert Connect(data_raw=data).connection_info_args == expected_connection_info_args + _assert_nothing_logged(caplog.records) + + +@pytest.mark.parametrize( + "data", + [ + pytest.param( + CommandDataRaw(b"[172.17.0.1]"), + id="structure", + ), + pytest.param( + CommandDataRaw(b"[172.17.0.1]\x00"), + id="structure", + ), + pytest.param( + CommandDataRaw(b"[172.17.0.1]\x004\xc361\x00"), + id="socket-data-too-short", + ), + pytest.param( + CommandDataRaw(b"[256.17.0.1]\x004\xc36256.17.0.1\x00"), + id="ipv4-invalid", + ), + pytest.param( + CommandDataRaw(b"[172.17.0.1]\x004\xc36172.\xff7.0.1\x00"), + id="ipv4-invalid-char-ip", + ), + pytest.param( + CommandDataRaw(b"[172.\xff.0.1]\x004\xc36172.17.0.1\x00"), + id="ipv4-invalid-char-hostname", + ), + pytest.param( + CommandDataRaw( + b"[2607:f8b0:4864:20::748]\x006\xa3\x162607:f8b0:4864:20:::748\x00" + ), + id="ipv6-invalid", + ), + pytest.param( + CommandDataRaw( + b"[2607:f8b0:4864:20::748]\x006\xa3\x162607:f8b0:4864:20::11748\x00" + ), + id="ipv6-invalid", + ), + pytest.param( + CommandDataRaw( + b"[2607:f8b0:4864:20::748]\x006\xa3\x162607:f8b0:4864:20::\xff\x00" + ), + id="ipv6-invalid-char-ip", + ), + pytest.param( + CommandDataRaw( + b"[2607:f8b0:4864:20::\xff]\x006\xa3\x162607:f8b0:4864:20::748\x00" + ), + id="ipv6-invalid-char-hostname", + ), + pytest.param( + CommandDataRaw(b"[\xff.17.0.1]\x004\xc36172.17.0.1\x00"), + # https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt + id="hostname-utf8-impossible-byte-3.5.1", + ), + pytest.param( + CommandDataRaw(b"[172.17.0.1]\x005\xc36172.17.0.1\x00"), + id="unsupported-socket-family-ipv5", + ), + pytest.param( + CommandDataRaw(b"ignored_hostname\x00L\x00\x00/run/\xffysock\x00"), + id="unix-socket-invalid-char", + ), + ], +) +def test_connect_invalid( + data: CommandDataRaw, + caplog: pytest.LogCaptureFixture, +) -> None: + with pytest.raises(ProtocolViolationCommandData): + Connect(data_raw=data) + _assert_nothing_logged(caplog.records) + + +@pytest.mark.parametrize( + "data,expected_hostname", + [ + pytest.param( + CommandDataRaw(b"\x00"), + "", + id="hostname-empty", + ), + pytest.param( + CommandDataRaw(b"foobar\x00"), + "foobar", + id="hostname-string", + ), + pytest.param( + CommandDataRaw(b"[172.17.0.1]\x00"), + "[172.17.0.1]", + id="hostname-ip", + ), + pytest.param( + CommandDataRaw(b"foo\xe0\xb8\xbfar\x00"), + r"foo\xe0\xb8\xbfar", # not foo฿ar + id="hostname-string-valid-utf8-to-ascii", + ), + pytest.param( + CommandDataRaw(b"foo\xffbar\x00"), + r"foo\xffbar", + # https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt + id="hostname-string-utf8-impossible-byte-backslashreplace", + ), + ], +) +def test_helo_ok( + data: CommandDataRaw, + expected_hostname: str, + caplog: pytest.LogCaptureFixture, +) -> None: + assert Helo(data_raw=data).hostname == expected_hostname + _assert_nothing_logged(caplog.records) + + +@pytest.mark.parametrize( + "data", + [ + pytest.param( + CommandDataRaw(b""), + id="hostname-empty-no-null-termination", + ), + pytest.param( + CommandDataRaw(b"foobar"), + id="hostname-string-no-null-termination", + ), + ], +) +def test_helo_invalid( + data: CommandDataRaw, + caplog: pytest.LogCaptureFixture, +) -> None: + with pytest.raises(ProtocolViolationCommandData): + Helo(data_raw=data) + _assert_nothing_logged(caplog.records) + + +# No type annotations for pytest.ParameterSet, sigh. 😞 +def generate_mail_from_and_rcpt_to_testparams_ok() -> Generator[Any, None, None]: + for cmd_class in (MailFrom, RcptTo): + yield from [ + pytest.param( + cmd_class, + CommandDataRaw(b"\x00"), + "g@g3rt.nl", + {}, + id=f"{cmd_class.__name__}-address-string", + ), + pytest.param( + cmd_class, + CommandDataRaw(b"\x00FOO=BAR\x00SIZE=1234\x00"), + "g@g3rt.nl", + {"FOO": "BAR", "SIZE": "1234"}, + id=f"{cmd_class.__name__}-simple-esmtp-args", + ), + pytest.param( + cmd_class, + CommandDataRaw(b"\x00FOO=BAR\x00FOO=BAZ\x00"), + "g@g3rt.nl", + {"FOO": "BAZ"}, + id=f"{cmd_class.__name__}-esmtp-args-last-key-wins", + ), + pytest.param( + cmd_class, + CommandDataRaw( + b"\x00BODY=8BITMIME" + b"\x00SMTPUTF8\x00" + ), + "bounce+1-local=domain.tld@example.com", + {"BODY": "8BITMIME", "SMTPUTF8": None}, + id=f"{cmd_class.__name__}-esmtp-args-value-is-optional", + ), + pytest.param( + cmd_class, + CommandDataRaw(b"\x00FOO=\xc3\xb1BAR\x00SIZE=1234\x00"), + "g@g3rt.nl", + {"FOO": "ñBAR", "SIZE": "1234"}, + id=f"{cmd_class.__name__}-non-ascii-esmtp-arg-value", + ), + pytest.param( + cmd_class, + CommandDataRaw(b"\x00"), + r"g\xff@g3rt.nl", + {}, + # https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt + id=f"{cmd_class.__name__}-address-utf8-impossible-backslashreplace", + ), + ] + + +@pytest.mark.parametrize( + "cmd_class,data,expected_address,expected_esmtp_args", + generate_mail_from_and_rcpt_to_testparams_ok(), +) +def test_mail_from_and_rcpt_to_ok( + cmd_class: type[MailFrom | RcptTo], + data: CommandDataRaw, + expected_address: str, + expected_esmtp_args: dict[str, str], + caplog: pytest.LogCaptureFixture, +) -> None: + cmd = cmd_class(data_raw=data) + assert cmd.address == expected_address + assert cmd.esmtp_args == expected_esmtp_args + _assert_nothing_logged(caplog.records) + + +def generate_mail_from_and_rcpt_to_testparams_angle_brackets() -> ( + Generator[Any, None, None] +): + for cmd_class in (MailFrom, RcptTo): + yield from [ + pytest.param( + cmd_class, + CommandDataRaw(b"g@g3rt.nl\x00"), + "g@g3rt.nl", + {}, + id=f"{cmd_class.__name__}-plain-address", + ), + pytest.param( + cmd_class, + CommandDataRaw(b"g@g3rt.nl\x00FOO=BAR\x00SIZE=1234\x00"), + "g@g3rt.nl", + {"FOO": "BAR", "SIZE": "1234"}, + id=f"{cmd_class.__name__}-plain-address-with-esmtp-args", + ), + pytest.param( + cmd_class, + CommandDataRaw(b"\x00"), + "g@g3rt.nl>", + {}, + id=f"{cmd_class.__name__}-left-angle-bracket-missing", + ), + ] + + +@pytest.mark.parametrize( + "cmd_class,data,expected_address,expected_esmtp_args", + generate_mail_from_and_rcpt_to_testparams_angle_brackets(), +) +def test_mail_from_and_rcpt_to_angle_brackets( + cmd_class: type[MailFrom | RcptTo], + data: CommandDataRaw, + expected_address: str, + expected_esmtp_args: dict[str, str], + caplog: pytest.LogCaptureFixture, +) -> None: + cmd = cmd_class(data_raw=data) + assert cmd.address == expected_address + assert cmd.esmtp_args == expected_esmtp_args + warnings_logged = [rec for rec in caplog.records if rec.levelno == logging.WARNING] + assert len(warnings_logged) == 1 + assert ( + f"Address in Mail From / Rcpt To '{expected_address}' appears not " + "enclosed in angle brackets." + ) in warnings_logged[0].msg + assert not [ + rec + for rec in caplog.records + if rec.levelno >= logging.INFO and rec.levelno != logging.WARNING + ] + + +def generate_mail_from_and_rcpt_to_testparams_invalid() -> Generator[Any, None, None]: + for cmd_class in (MailFrom, RcptTo): + yield from [ + pytest.param( + cmd_class, + CommandDataRaw(b"\x00"), + id=f"{cmd_class.__name__}-address-empty", + ), + pytest.param( + cmd_class, + CommandDataRaw(b""), + id=f"{cmd_class.__name__}-address-empty-no-null-termination", + ), + pytest.param( + cmd_class, + CommandDataRaw(b""), + id=f"{cmd_class.__name__}-address-no-null-termination", + ), + pytest.param( + cmd_class, + CommandDataRaw( + b"\x00F\xc3\xb3\xc3\xb3=BAR\x00SIZE=1234\x00" + ), + id=f"{cmd_class.__name__}-non-ascii-esmtp-arg-name", + ), + pytest.param( + cmd_class, + CommandDataRaw( + CommandDataRaw(b"\x00FOO==BAR\x00SIZE=1234\x00"), + ), + id=f"{cmd_class.__name__}-multiple-key-value-separators", + ), + ] + + +@pytest.mark.parametrize( + "cmd_class,data", + generate_mail_from_and_rcpt_to_testparams_invalid(), +) +def test_mail_from_rcpt_to_invalid( + cmd_class: type[MailFrom | RcptTo], + data: CommandDataRaw, + caplog: pytest.LogCaptureFixture, +) -> None: + with pytest.raises(ProtocolViolationCommandData): + cmd_class(data_raw=data) + _assert_nothing_logged(caplog.records) + + +@pytest.mark.parametrize( + "data,name,text", + [ + pytest.param( + CommandDataRaw(b"From\x00Display Name \x00"), + "From", + "Display Name ", + id="simple-from-header", + ), + pytest.param( + CommandDataRaw(b"From\x00Display \xe0\xb8\x84ame \x00"), + "From", + "Display คame ", + id="utf8-header-rfc6532", + ), + pytest.param( + CommandDataRaw(b"X-Spam-Level\x00\x00"), + "X-Spam-Level", + "", + id="empty-header-value-is-ok", + ), + pytest.param( + CommandDataRaw(b"From\x00Display Name\xff \x00"), + "From", + r"Display Name\xff ", + # https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt + id="utf8-impossible-byte-backslashreplace", + ), + pytest.param( + CommandDataRaw( + b"Subject\x00Dit servicebericht bevat essenti\xeble informatie\x00" + ), + "Subject", + r"Dit servicebericht bevat essenti\xeble informatie", + id="utf8-invalid-subject-backslashreplace", + ), + ], +) +def test_header_ok( + data: CommandDataRaw, + name: str, + text: str, + caplog: pytest.LogCaptureFixture, +) -> None: + hdr = Header(data_raw=data) + assert hdr.name == name + assert hdr.text == text + _assert_nothing_logged(caplog.records) + + +@pytest.mark.parametrize( + "data", + [ + pytest.param( + CommandDataRaw(b"From\x00Display Name "), + id="structure", + ), + pytest.param( + CommandDataRaw(b"From Display Name \x00"), + id="structure", + ), + pytest.param( + CommandDataRaw(b"From: Display Name \x00"), + id="structure", + ), + pytest.param( + CommandDataRaw(b"From: Display Name "), + id="structure", + ), + ], +) +def test_header_invalid( + data: CommandDataRaw, + caplog: pytest.LogCaptureFixture, +) -> None: + with pytest.raises(ProtocolViolationCommandData): + Header(data_raw=data) + _assert_nothing_logged(caplog.records) + + +test_commands_nodata_params = [ + pytest.param( + CommandDataRaw(b"\x00"), + id="null-byte", + ), + pytest.param( + CommandDataRaw(b"foobar"), + id="something-else", + ), + pytest.param( + CommandDataRaw(b"foobar\x00"), + id="something-else", + ), +] + + +@pytest.mark.parametrize( + "data", + [ + pytest.param( + CommandDataRaw(b""), + id="empty-should-work", + ), + pytest.param( + CommandDataRaw(b"foo"), + id="simple-content", + ), + pytest.param( + CommandDataRaw(b"foo\xffbar"), + id="do-not-care-about-encoding", + ), + ], +) +def test_body_chunk( + data: CommandDataRaw, + caplog: pytest.LogCaptureFixture, +) -> None: + bdc = BodyChunk(data_raw=data) + assert bdc.data_raw == data + _assert_nothing_logged(caplog.records) + + +def test_abort_ok(caplog: pytest.LogCaptureFixture) -> None: + Abort(data_raw=CommandDataRaw(b"")) + _assert_nothing_logged(caplog.records) + + +@pytest.mark.parametrize( + "data", + test_commands_nodata_params, +) +def test_abort_invalid(data: CommandDataRaw, caplog: pytest.LogCaptureFixture) -> None: + with pytest.raises(ProtocolViolationCommandData): + Abort(data_raw=CommandDataRaw(data)) + _assert_nothing_logged(caplog.records) + + +def test_quit_ok(caplog: pytest.LogCaptureFixture) -> None: + Quit(data_raw=CommandDataRaw(b"")) + _assert_nothing_logged(caplog.records) + + +@pytest.mark.parametrize( + "data", + test_commands_nodata_params, +) +def test_quit_invalid(data: CommandDataRaw, caplog: pytest.LogCaptureFixture) -> None: + with pytest.raises(ProtocolViolationCommandData): + Quit(data_raw=CommandDataRaw(data)) + _assert_nothing_logged(caplog.records) + + +@pytest.mark.parametrize( + "data", + [ + pytest.param( + CommandDataRaw(b""), + id="empty-should-work", + ), + pytest.param( + CommandDataRaw(b"HELP\x00"), + id="unknown-command-as-data", + ), + pytest.param( + CommandDataRaw(b"HELP"), + id="not-null-terminated", + ), + pytest.param( + CommandDataRaw(b"foo\xffbar\x00"), + id="do-not-care-about-encoding", + ), + ], +) +def test_unknown_ok(data: CommandDataRaw, caplog: pytest.LogCaptureFixture) -> None: + u = Unknown(data_raw=data) + assert u.data_raw == data.removesuffix(b"\x00") + _assert_nothing_logged(caplog.records) + + +def test_command_registry_populated() -> None: + assert len(chars_to_command_registry.keys()) == 15 + assert all(len(char) == 1 for char in chars_to_command_registry.keys()) + + +@pytest.mark.parametrize( + "char", + [ + pytest.param( + b"O", + id="taken-by-OptionsNegotiate", + ), + pytest.param( + b"", + id="length-invalid-zero", + ), + pytest.param( + b"ZZ", + id="length-invalid-more-than-one", + ), + ], +) +def test_command_registry_fails_definition_time(char: bytes) -> None: + with pytest.raises(ValueError): + + class CommandWithCharInvalid( # pyright: ignore PylancereportUnusedClass + BaseCommand + ): + command_char: ClassVar[bytes] = char + + # NOTE: do not attempt to define a class with an actual valid/available char as it + # would affect the rest of the test run. 😕 diff --git a/tests/protocol/test_packet.py b/tests/protocol/test_packet.py new file mode 100644 index 0000000..acb8fd1 --- /dev/null +++ b/tests/protocol/test_packet.py @@ -0,0 +1,114 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import logging +import struct +from collections.abc import Sequence + +import pytest + +from purepythonmilter.api.models import MilterServerConnectionID +from purepythonmilter.protocol.definitions import MAX_DATA_SIZE +from purepythonmilter.protocol.exceptions import ProtocolViolationPacket +from purepythonmilter.protocol.packet import Packet, PacketDecoder +from purepythonmilter.protocol.payload import Payload + + +def _assert_nothing_logged(records: Sequence[logging.LogRecord]) -> None: + assert not [rec for rec in records if rec.levelno >= logging.INFO] + + +@pytest.fixture +def decoder() -> PacketDecoder: + connection_id = MilterServerConnectionID.generate() + return PacketDecoder( + connection_id=connection_id, # pyright: ignore PylancereportGeneralTypeIssues + ) + + +def test_decode_empty(decoder: PacketDecoder, caplog: pytest.LogCaptureFixture) -> None: + payloads = [p for p in decoder.decode(Packet(b""))] + assert payloads == [] + _assert_nothing_logged(caplog.records) + + +def test_decode_simple( + decoder: PacketDecoder, + caplog: pytest.LogCaptureFixture, +) -> None: + packet_bytes = b"\x00\x00\x00\rO\x00\x00\x00\x06\x00\x00\x01\xff\x00\x1f\xff\xff" + payloads = [p for p in decoder.decode(Packet(packet_bytes))] + assert payloads == [Payload(packet_bytes[4:])] + _assert_nothing_logged(caplog.records) + + +def test_decode_chunks( + decoder: PacketDecoder, + caplog: pytest.LogCaptureFixture, +) -> None: + packet_bytes = b"\x00\x00\x00\rO\x00\x00\x00\x06\x00\x00\x01\xff\x00\x1f\xff\xff" + # Beyond the length field, but before the end. + payloads = [p for p in decoder.decode(Packet(packet_bytes[:10]))] + assert payloads == [] + payloads = [p for p in decoder.decode(Packet(packet_bytes[10:]))] + assert payloads == [Payload(packet_bytes[4:])] + _assert_nothing_logged(caplog.records) + + +def test_decode_chunks_mini( + decoder: PacketDecoder, + caplog: pytest.LogCaptureFixture, +) -> None: + packet_bytes = b"\x00\x00\x00\rO\x00\x00\x00\x06\x00\x00\x01\xff\x00\x1f\xff\xff" + # Before the length field finished. + payloads = [p for p in decoder.decode(Packet(packet_bytes[:2]))] + assert payloads == [] + payloads = [p for p in decoder.decode(Packet(packet_bytes[2:]))] + assert payloads == [Payload(packet_bytes[4:])] + _assert_nothing_logged(caplog.records) + + +def construct_bogus_packet(payload_length: int) -> Packet: + return struct.pack("!I", payload_length) + b" " * payload_length + + +@pytest.mark.parametrize( + "packet,valid", + [ + pytest.param( + construct_bogus_packet(0), + False, + id="zero-invalid", + ), + pytest.param( + construct_bogus_packet(1), + True, + id="lower-boundary-one", + ), + pytest.param( + construct_bogus_packet(MAX_DATA_SIZE), + True, + id="upper-boundary", + ), + pytest.param( + construct_bogus_packet(MAX_DATA_SIZE + 1), + False, + id="over-limit", + ), + ], +) +def test_decode_length_boundaries( + decoder: PacketDecoder, + packet: Packet, + valid: bool, + caplog: pytest.LogCaptureFixture, +) -> None: + if not valid: + with pytest.raises(ProtocolViolationPacket): + [p for p in decoder.decode(packet)] + else: + [p for p in decoder.decode(packet)] + _assert_nothing_logged(caplog.records) diff --git a/tests/protocol/test_payload.py b/tests/protocol/test_payload.py new file mode 100644 index 0000000..1c3ac15 --- /dev/null +++ b/tests/protocol/test_payload.py @@ -0,0 +1,54 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import logging +from collections.abc import Sequence + +import pytest + +from purepythonmilter.api.models import MilterServerConnectionID +from purepythonmilter.protocol.commands import OptionsNegotiate +from purepythonmilter.protocol.exceptions import ProtocolViolationPayload +from purepythonmilter.protocol.payload import Payload, PayloadDecoder + + +def _assert_nothing_logged(records: Sequence[logging.LogRecord]) -> None: + assert not [rec for rec in records if rec.levelno >= logging.INFO] + + +@pytest.fixture +def decoder() -> PayloadDecoder: + connection_id = MilterServerConnectionID.generate() + return PayloadDecoder( + connection_id=connection_id, # pyright: ignore PylancereportGeneralTypeIssues + ) + + +def test_decode_empty( + decoder: PayloadDecoder, + caplog: pytest.LogCaptureFixture, +) -> None: + with pytest.raises(RuntimeError): + decoder.decode(Payload(b"")) + _assert_nothing_logged(caplog.records) + + +def test_decode_options_negotiate( + decoder: PayloadDecoder, + caplog: pytest.LogCaptureFixture, +) -> None: + payload = Payload(b"O\x00\x00\x00\x06\x00\x00\x01\xff\x00\x1f\xff\xff") + assert decoder.decode(payload) == (OptionsNegotiate, payload[1:]) + _assert_nothing_logged(caplog.records) + + +def test_decode_options_not_implemented( + decoder: PayloadDecoder, + caplog: pytest.LogCaptureFixture, +) -> None: + with pytest.raises(ProtocolViolationPayload): + decoder.decode(Payload(b"\x01\x00")) + _assert_nothing_logged(caplog.records) diff --git a/tests/protocol/test_responses.py b/tests/protocol/test_responses.py new file mode 100644 index 0000000..f0c89cf --- /dev/null +++ b/tests/protocol/test_responses.py @@ -0,0 +1,422 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import logging +from collections.abc import Sequence + +import pytest + +from purepythonmilter.api.models import EsmtpArgsType, RequestProtocolFlags +from purepythonmilter.protocol.definitions import MacroStage +from purepythonmilter.protocol.payload import Payload +from purepythonmilter.protocol.responses import ( + AddRecipient, + AddRecipientWithEsmtpArgs, + AppendHeader, + ChangeHeader, + ChangeMailFrom, + Continue, + InsertHeader, + OptionsNegotiateResponse, + RejectWithCode, + RemoveRecipient, + ReplaceBodyChunk, + TempFailWithCode, + validate_headername_rfc5322, +) + + +def _assert_nothing_logged(records: Sequence[logging.LogRecord]) -> None: + assert not [rec for rec in records if rec.levelno >= logging.INFO] + + +@pytest.mark.parametrize( + "flags,payload", + [ + pytest.param( + RequestProtocolFlags(), + Payload(b"O\x00\x00\x00\x06\x00\x00\x01\x00\x00\x0f\xf7\xff"), + id="default", + ), + pytest.param( + RequestProtocolFlags(call_data=True), + Payload(b"O\x00\x00\x00\x06\x00\x00\x01\x00\x00\x0f\xf5\xff"), + id="enable-callback-data", + ), + pytest.param( + RequestProtocolFlags(can_add_headers=True), + Payload(b"O\x00\x00\x00\x06\x00\x00\x01\x01\x00\x0f\xf7\xff"), + id="enable-action-add-headers", + ), + ], +) +def test_options_negotiate( + flags: RequestProtocolFlags, + payload: Payload, + caplog: pytest.LogCaptureFixture, +) -> None: + assert OptionsNegotiateResponse(protocol_flags=flags).encode() == payload + _assert_nothing_logged(caplog.records) + + +@pytest.mark.parametrize( + "set_symbolslist_commands,payload_expected", + [ + pytest.param( + {MacroStage.END_OF_MESSAGE: {"i"}}, + Payload( + b"O\x00\x00\x00\x06\x00\x00\x01\x00\x00\x0f\xf7\xff" + b"\x00\x00\x00\x05i\x00" + ), + id="singular-request-symbol-i-on-end-of-message", + ), + pytest.param( + {MacroStage.CONNECT: {"j", "{my}"}}, + Payload( + b"O\x00\x00\x00\x06\x00\x00\x01\x00\x00\x0f\xf7\xff" + b"\x00\x00\x00\x00j {my}\x00" + ), + id="singular-request-two-symbols-on-connect", + ), + pytest.param( + {MacroStage.CONNECT: {"j"}, MacroStage.HELO: {"{my}"}}, + Payload( + b"O\x00\x00\x00\x06\x00\x00\x01\x00\x00\x0f\xf7\xff" + b"\x00\x00\x00\x00j\x00" + b"\x00\x00\x00\x01{my}\x00" + ), + id="multiple-request-symbols-on-connect-and-helo", + ), + ], +) +def test_options_negotiate_with_symbolslist( + set_symbolslist_commands: dict[MacroStage, set[str]], + payload_expected: Payload, + caplog: pytest.LogCaptureFixture, +) -> None: + r = OptionsNegotiateResponse( + protocol_flags=RequestProtocolFlags(), + symbols_for_stage=set_symbolslist_commands, + ) + assert r.encode() == payload_expected + _assert_nothing_logged(caplog.records) + + +def test_response_str_repr() -> None: + o = OptionsNegotiateResponse(protocol_flags=RequestProtocolFlags()) + assert str(o) == "OptionsNegotiateResponse" + assert "logger=" not in repr(o) + + +def test_response_eq() -> None: + o1 = OptionsNegotiateResponse(protocol_flags=RequestProtocolFlags()) + o2 = OptionsNegotiateResponse(protocol_flags=RequestProtocolFlags()) + # Would fail if logger attribute is not excluded in __eq__. + assert o1 == o2 + + +def test_continue(caplog: pytest.LogCaptureFixture) -> None: + assert Continue().encode() == Payload(b"c") + _assert_nothing_logged(caplog.records) + + +def test_reply_with_code_primary_only(caplog: pytest.LogCaptureFixture) -> None: + assert TempFailWithCode(primary_code=(4, 7, 1)).encode() == Payload(b"y471\x00") + assert RejectWithCode(primary_code=(5, 7, 1)).encode() == Payload(b"y571\x00") + _assert_nothing_logged(caplog.records) + + +def test_reply_with_code_enhanced(caplog: pytest.LogCaptureFixture) -> None: + assert TempFailWithCode( + primary_code=(4, 7, 1), enhanced_code=(4, 7, 1) + ).encode() == Payload(b"y471 4.7.1\x00") + assert RejectWithCode( + primary_code=(5, 7, 1), enhanced_code=(5, 7, 1) + ).encode() == Payload(b"y571 5.7.1\x00") + _assert_nothing_logged(caplog.records) + + +def test_reply_with_code_text(caplog: pytest.LogCaptureFixture) -> None: + assert TempFailWithCode(primary_code=(4, 7, 1), text="foobar").encode() == Payload( + b"y471 foobar\x00" + ) + assert RejectWithCode(primary_code=(5, 7, 1), text="foobar").encode() == Payload( + b"y571 foobar\x00" + ) + _assert_nothing_logged(caplog.records) + + +def test_reply_with_code_text_and_enhanced(caplog: pytest.LogCaptureFixture) -> None: + assert TempFailWithCode( + primary_code=(4, 7, 1), enhanced_code=(4, 7, 1), text="foobar" + ).encode() == Payload(b"y471 4.7.1 foobar\x00") + assert RejectWithCode( + primary_code=(5, 7, 1), enhanced_code=(5, 7, 1), text="foobar" + ).encode() == Payload(b"y571 5.7.1 foobar\x00") + _assert_nothing_logged(caplog.records) + + +@pytest.mark.parametrize( + "name_text_tuple,expected_payload", + [ + pytest.param( + ("Foo", "Bar"), + Payload(b"hFoo\x00Bar\x00"), + id="simple", + ), + pytest.param( + ("Foo", ""), + Payload(b"hFoo\x00\x00"), + id="empty-headertext", + ), + pytest.param( + ("Foo", "Bárม"), + Payload(b"hFoo\x00B\xc3\xa1r\xe0\xb8\xa1\x00"), + id="unciode-headertext", + ), + pytest.param( + ("Fo~o", "Bar"), + Payload(b"hFo~o\x00Bar\x00"), + id="us-ascii-126-inclusive", + ), + pytest.param( + ("Fo!o", "Bar"), + Payload(b"hFo!o\x00Bar\x00"), + id="us-ascii-33-inclusive", + ), + ], +) +def test_append_header_ok( + name_text_tuple: tuple[str, str], + expected_payload: Payload, + caplog: pytest.LogCaptureFixture, +) -> None: + name, text = name_text_tuple + appendheader = AppendHeader(headername=name, headertext=text) + assert appendheader.encode() == expected_payload + _assert_nothing_logged(caplog.records) + + +def test_headername_validator_usascii_rfc5322() -> None: + for c in range(0, 128): + if c == 58 or c < 33 or c > 126: + # Colon (58) and < 33 and > 126 should not be accepted + with pytest.raises(ValueError): + validate_headername_rfc5322(chr(c)) + else: + validate_headername_rfc5322(chr(c)) + + +def test_headername_validator_extended_ascii_rfc5322() -> None: + for c in range(128, 256): + with pytest.raises(ValueError): + validate_headername_rfc5322(chr(c)) + + +invalid_headernames = [ + pytest.param("", id="empty-headername"), + pytest.param("X:ColonIllegal", id="colon-not-allowed"), + pytest.param("X No-Space", id="space-not-allowed"), +] + + +@pytest.mark.parametrize("headername", invalid_headernames) +def test_append_header_name_invalid( + headername: str, + caplog: pytest.LogCaptureFixture, +) -> None: + with pytest.raises(ValueError): + AppendHeader(headername=headername, headertext="Foo") + _assert_nothing_logged(caplog.records) + + +@pytest.mark.parametrize( + "insertheader,expected_payload", + [ + pytest.param( + InsertHeader(headername="Foo", headertext="Bar", index=123), + Payload(b"i\x00\x00\x00\x7bFoo\x00Bar\x00"), + id="simple", + ), + pytest.param( + InsertHeader(headername="Foo", headertext="", index=123), + Payload(b"i\x00\x00\x00\x7bFoo\x00\x00"), + id="empty-headertext", + ), + pytest.param( + InsertHeader(headername="Foo", headertext="Bar", index=0), + Payload(b"i\x00\x00\x00\x00Foo\x00Bar\x00"), + id="simple-at-zero", + ), + pytest.param( + InsertHeader(headername="Foo", headertext="Bárม", index=123), + Payload(b"i\x00\x00\x00\x7bFoo\x00B\xc3\xa1r\xe0\xb8\xa1\x00"), + id="unciode-headertext", + ), + ], +) +def test_insert_header_ok( + insertheader: InsertHeader, + expected_payload: Payload, + caplog: pytest.LogCaptureFixture, +) -> None: + assert insertheader.encode() == expected_payload + _assert_nothing_logged(caplog.records) + + +@pytest.mark.parametrize( + "headername,headertext,index", + [ + pytest.param("", "Bar", 123, id="empty-headername"), + pytest.param("Foo", "Bar", -1, id="index-negative"), + ], +) +def test_insert_header_invalid( + headername: str, + headertext: str, + index: int, + caplog: pytest.LogCaptureFixture, +) -> None: + with pytest.raises(ValueError): + InsertHeader(headername=headername, headertext=headertext, index=index) + _assert_nothing_logged(caplog.records) + + +@pytest.mark.parametrize("headername", invalid_headernames) +def test_insert_header_name_invalid( + headername: str, + caplog: pytest.LogCaptureFixture, +) -> None: + with pytest.raises(ValueError): + InsertHeader(headername=headername, headertext="Foo", index=1) + _assert_nothing_logged(caplog.records) + + +@pytest.mark.parametrize( + "changeheader,expected_payload", + [ + pytest.param( + ChangeHeader(headername="Foo", headertext="Bar", nth_occurrence=123), + Payload(b"m\x00\x00\x00\x7bFoo\x00Bar\x00"), + id="simple", + ), + pytest.param( + ChangeHeader(headername="Foo", headertext="", nth_occurrence=123), + Payload(b"m\x00\x00\x00\x7bFoo\x00\x00"), + id="empty-headertext", + ), + pytest.param( + ChangeHeader(headername="Foo", headertext="Bar", nth_occurrence=0), + Payload(b"m\x00\x00\x00\x00Foo\x00Bar\x00"), + id="simple-at-zero", + ), + pytest.param( + ChangeHeader(headername="Foo", headertext="Bárม", nth_occurrence=123), + Payload(b"m\x00\x00\x00\x7bFoo\x00B\xc3\xa1r\xe0\xb8\xa1\x00"), + id="unciode-headertext", + ), + ], +) +def test_change_header_ok( + changeheader: ChangeHeader, + expected_payload: Payload, + caplog: pytest.LogCaptureFixture, +) -> None: + assert changeheader.encode() == expected_payload + _assert_nothing_logged(caplog.records) + + +@pytest.mark.parametrize( + "headername,headertext,nth_occurrence", + [ + pytest.param("", "Bar", 123, id="empty-headername"), + pytest.param("Foo", "Bar", -1, id="nth_occurrence-negative"), + ], +) +def test_change_header_invalid( + headername: str, + headertext: str, + nth_occurrence: int, + caplog: pytest.LogCaptureFixture, +) -> None: + with pytest.raises(ValueError): + ChangeHeader( + headername=headername, headertext=headertext, nth_occurrence=nth_occurrence + ) + _assert_nothing_logged(caplog.records) + + +@pytest.mark.parametrize("headername", invalid_headernames) +def test_change_header_name_invalid( + headername: str, + caplog: pytest.LogCaptureFixture, +) -> None: + with pytest.raises(ValueError): + ChangeHeader(headername=headername, headertext="Foo", nth_occurrence=1) + _assert_nothing_logged(caplog.records) + + +def test_add_recipient_ok() -> None: + assert AddRecipient( + recipient="purepythonmilter@gertvandijk.nl" + ).encode() == Payload(b"+purepythonmilter@gertvandijk.nl\x00") + + +esmtp_args_params = [ + pytest.param( + {"FOO": "BAR", "LOREM": "IPSUM"}, + b"FOO=BAR LOREM=IPSUM\x00", + id="simple", + ), + pytest.param( + {"FOO": "BAR", "LOREM": None}, + b"FOO=BAR LOREM\x00", + id="esmtp-key-only", + ), +] + + +@pytest.mark.parametrize("esmtp_args,expected_partial_payload", esmtp_args_params) +def test_add_recipient_with_esmtp_args_ok( + esmtp_args: EsmtpArgsType, expected_partial_payload: Payload +) -> None: + assert AddRecipientWithEsmtpArgs( + recipient="purepythonmilter@gertvandijk.nl", esmtp_args=esmtp_args + ).encode() == Payload( + b"2purepythonmilter@gertvandijk.nl\x00" + expected_partial_payload + ) + + +def test_remove_recipient_ok() -> None: + assert RemoveRecipient( + recipient="purepythonmilter@gertvandijk.nl" + ).encode() == Payload(b"-purepythonmilter@gertvandijk.nl\x00") + + +def test_change_mail_from_ok() -> None: + assert ChangeMailFrom( + mail_from="purepythonmilter@gertvandijk.nl" + ).encode() == Payload(b"epurepythonmilter@gertvandijk.nl\x00") + + +@pytest.mark.parametrize("esmtp_args,expected_partial_payload", esmtp_args_params) +def test_change_mail_from_with_esmtp_args_ok( + esmtp_args: EsmtpArgsType, expected_partial_payload: Payload +) -> None: + assert ChangeMailFrom( + mail_from="purepythonmilter@gertvandijk.nl", esmtp_args=esmtp_args + ).encode() == Payload( + b"epurepythonmilter@gertvandijk.nl\x00" + expected_partial_payload + ) + + +def test_replace_body_chunk_ok() -> None: + assert ReplaceBodyChunk(chunk=b"foobar").encode() == Payload(b"bfoobar") + + +def test_replace_body_chunk_too_large() -> None: + with pytest.raises(ValueError): + ReplaceBodyChunk(chunk=b"1" * 65536).encode() diff --git a/tests/server/__init__.py b/tests/server/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/server/test_session.py b/tests/server/test_session.py new file mode 100644 index 0000000..5c4dece --- /dev/null +++ b/tests/server/test_session.py @@ -0,0 +1,326 @@ +# SPDX-FileCopyrightText: 2023 Gert van Dijk +# +# SPDX-License-Identifier: Apache-2.0 + +# pyright: reportPrivateUsage=false +from __future__ import annotations + +import asyncio +import logging +from collections.abc import Sequence +from typing import Any + +import pytest + +from purepythonmilter.protocol import commands, responses + +from ..conftest import FakeMtaMilterSession, MilterAppFactoryFixtureParams + +pytestmark = pytest.mark.asyncio + + +def _assert_nothing_logged(records: Sequence[logging.LogRecord]) -> None: + assert not [rec for rec in records if rec.levelno >= logging.INFO] + + +@pytest.mark.parametrize( + "cmd,app_method", + [ + pytest.param( + commands.Connect( + data_raw=commands.CommandDataRaw( + b"[172.17.0.1]\x004\xc36172.17.0.1\x00" + ) + ), + "on_connect", + id="connect", + ), + pytest.param( + commands.Helo(data_raw=commands.CommandDataRaw(b"[172.17.0.1]\x00")), + "on_helo", + id="helo", + ), + pytest.param( + commands.MailFrom(data_raw=commands.CommandDataRaw(b"\x00")), + "on_mail_from", + id="mail-from", + ), + pytest.param( + commands.RcptTo(data_raw=commands.CommandDataRaw(b"\x00")), + "on_rcpt_to", + id="rcpt-to", + ), + pytest.param( + commands.Data(data_raw=commands.CommandDataRaw(b"")), + "on_data", + id="data", + ), + pytest.param( + commands.Header( + data_raw=commands.CommandDataRaw( + b"From\x00Display Name \x00" + ) + ), + "on_header", + id="header", + ), + pytest.param( + commands.EndOfHeaders(data_raw=commands.CommandDataRaw(b"")), + "on_end_of_headers", + id="end-of-headers", + ), + pytest.param( + commands.BodyChunk(data_raw=commands.CommandDataRaw(b"foo")), + "on_body_chunk", + id="body-chunk", + ), + pytest.param( + commands.EndOfMessage(data_raw=commands.CommandDataRaw(b"")), + "on_end_of_message", + id="end-of-message", + ), + pytest.param( + commands.Unknown(data_raw=commands.CommandDataRaw(b"HELP\x00")), + "on_unknown", + id="unknown-command", + ), + ], +) +async def test_session_command_queue_no_macros_to_app( + cmd: ( + commands.Connect + | commands.Helo + | commands.MailFrom + | commands.RcptTo + | commands.Data + | commands.Header + | commands.EndOfHeaders + | commands.BodyChunk + | commands.EndOfMessage + | commands.Unknown + ), + app_method: str, + fake_session: FakeMtaMilterSession, + caplog: pytest.LogCaptureFixture, +) -> None: + done_event = fake_session.queue_command(cmd) + await done_event.wait() + method = getattr(fake_session._app, app_method) + method.assert_called() + assert not cmd.macros + _assert_nothing_logged(caplog.records) + + +@pytest.mark.parametrize( + "cmd,app_method", + [ + pytest.param( + commands.Abort(data_raw=commands.CommandDataRaw(b"")), + "on_abort", + id="abort", + ), + pytest.param( + commands.Quit(data_raw=commands.CommandDataRaw(b"")), + "on_quit", + id="quit", + ), + ], +) +async def test_session_command_queue_commands_without_macro( + cmd: commands.BaseCommand, + app_method: str, + fake_session: FakeMtaMilterSession, + caplog: pytest.LogCaptureFixture, +) -> None: + done_event = fake_session.queue_command(cmd) + await done_event.wait() + method = getattr(fake_session._app, app_method) + method.assert_called() + _assert_nothing_logged(caplog.records) + + +async def test_session_command_queue_not_implemented( + fake_session_should_fail: FakeMtaMilterSession, + caplog: pytest.LogCaptureFixture, +) -> None: + class NonExistingCommand(commands.BaseCommand): + pass + + done_event = fake_session_should_fail.queue_command(NonExistingCommand()) + await asyncio.wait_for(done_event.wait(), 1) + assert fake_session_should_fail._commands_consumer_task.done() + assert isinstance( + fake_session_should_fail._commands_consumer_task.exception(), + NotImplementedError, + ) + errors_logged = [rec for rec in caplog.records if rec.levelno >= logging.ERROR] + assert len(errors_logged) == 1 + assert ("Got an exception in the commands consumer task.") in errors_logged[0].msg + assert not [ + rec + for rec in caplog.records + if rec.levelno >= logging.INFO and rec.levelno != logging.ERROR + ] + + +async def test_session_command_queue_macro_attached( + fake_session: FakeMtaMilterSession, + caplog: pytest.LogCaptureFixture, +) -> None: + c1 = commands.DefineMacro( + data_raw=commands.CommandDataRaw(b"Cj\x00myhost.sub.example.com\x00") + ) + fake_session.queue_command(c1) + c2 = commands.Connect( + data_raw=commands.CommandDataRaw(b"[172.17.0.1]\x004\xc36172.17.0.1\x00") + ) + e2 = fake_session.queue_command(c2) + await e2.wait() + assert c2.macros == {"j": "myhost.sub.example.com"} + _assert_nothing_logged(caplog.records) + + +async def test_session_command_queue_macro_attached_wrong_stage_ignored( + fake_session: FakeMtaMilterSession, + caplog: pytest.LogCaptureFixture, +) -> None: + c1 = commands.DefineMacro( + # Macro for Connect stage,... + data_raw=commands.CommandDataRaw(b"Cj\x00myhost.sub.example.com\x00") + ) + fake_session.queue_command(c1) + # ... but Data command follows, and thus... + c2 = commands.Data(data_raw=commands.CommandDataRaw(b"")) + e2 = fake_session.queue_command(c2) + await e2.wait() + # ... should not attach macros. + assert not c2.macros + _assert_nothing_logged(caplog.records) + + +async def test_session_command_queue_timeout( + fake_session: FakeMtaMilterSession, + caplog: pytest.LogCaptureFixture, +) -> None: + c1 = commands.DefineMacro( + data_raw=commands.CommandDataRaw(b"Cj\x00myhost.sub.example.com\x00") + ) + fake_session.queue_command(c1) + # Wait for more than timeout + await asyncio.sleep(0.02) + c2 = commands.Connect( + data_raw=commands.CommandDataRaw(b"[172.17.0.1]\x004\xc36172.17.0.1\x00") + ) + e2 = fake_session.queue_command(c2) + await e2.wait() + assert c2.macros == {"j": "myhost.sub.example.com"} + _assert_nothing_logged(caplog.records) + + +async def test_session_send_manipulations_before_end_of_message( + fake_session: FakeMtaMilterSession, + caplog: pytest.LogCaptureFixture, +) -> None: + assert not fake_session._manipulations_sent + + fake_session._pending_manipulations.append( + responses.AppendHeader(headername="Foo", headertext="Bar") + ) + eom = commands.EndOfMessage(data_raw=commands.CommandDataRaw(b"")) + done_event = fake_session.queue_command(eom) + await done_event.wait() + + assert fake_session._manipulations_sent + _assert_nothing_logged(caplog.records) + + +class TwoCommandsMilterAppFactoryParams(MilterAppFactoryFixtureParams): + """Fake app that would add a manipulation at on_connect and on_end_of_message.""" + + return_on_connect: Any = responses.Continue( + manipulations=[ + responses.AppendHeader(headername="X-On-Connect", headertext="Foo") + ] + ) + return_on_end_of_message: Any = responses.Continue( + manipulations=[responses.AppendHeader(headername="X-On-EOM", headertext="Bar")] + ) + + +@pytest.mark.parametrize( + "fake_app_factory", + [pytest.param(TwoCommandsMilterAppFactoryParams())], + indirect=True, +) +async def test_session_send_manipulations_before_end_of_message_merged( + fake_session: FakeMtaMilterSession, + caplog: pytest.LogCaptureFixture, +) -> None: + """ + Assert that manipulations added at the on_connect callback (or any other basically) + and on_end_of_message are combined and sent at on_end_of_message time. + """ + assert not fake_session._manipulations_sent + + c1 = commands.Connect( + data_raw=commands.CommandDataRaw(b"[172.17.0.1]\x004\xc36172.17.0.1\x00") + ) + fake_session.queue_command(c1) + c2 = commands.EndOfMessage(data_raw=commands.CommandDataRaw(b"")) + e2 = fake_session.queue_command(c2) + await e2.wait() + + assert fake_session.responses_written == [ + TwoCommandsMilterAppFactoryParams.return_on_connect, + responses.AppendHeader(headername="X-On-Connect", headertext="Foo"), + responses.AppendHeader(headername="X-On-EOM", headertext="Bar"), + TwoCommandsMilterAppFactoryParams.return_on_end_of_message, + ] + + assert fake_session._manipulations_sent + _assert_nothing_logged(caplog.records) + + +class InvalidTwoCommandsMilterAppFactoryParams(MilterAppFactoryFixtureParams): + """ + Fake app that would erroneously add a manipulation at on_unknown, after an + on_end_of_message. + """ + + return_on_unknown: Any = responses.Continue( + manipulations=[ + responses.AppendHeader(headername="X-On-Unknown", headertext="Foo") + ] + ) + + +@pytest.mark.parametrize( + "fake_app_factory", + [pytest.param(InvalidTwoCommandsMilterAppFactoryParams())], + indirect=True, +) +async def test_session_send_manipulations_after_end_of_message_not_allowed( + fake_session: FakeMtaMilterSession, + caplog: pytest.LogCaptureFixture, +) -> None: + assert not fake_session._manipulations_sent + + eom = commands.EndOfMessage(data_raw=commands.CommandDataRaw(b"")) + fake_session.queue_command(eom) + later_command = commands.Unknown(data_raw=commands.CommandDataRaw(b"HELP\x00")) + e2 = fake_session.queue_command(later_command) + await e2.wait() + + assert fake_session._manipulations_sent + assert len(fake_session._pending_manipulations) == 0 + + warnings_logged = [rec for rec in caplog.records if rec.levelno >= logging.WARNING] + assert len(warnings_logged) == 1 + assert ( + "Adding manipulations after End of Message callback is not allowed; ignoring: " + ) in warnings_logged[0].msg + assert not [ + rec + for rec in caplog.records + if rec.levelno >= logging.INFO and rec.levelno != logging.WARNING + ]