From 0a38f2f93ebf7377779f4b6d9beb333012fda416 Mon Sep 17 00:00:00 2001 From: westersf Date: Wed, 9 Feb 2022 14:40:19 +0100 Subject: [PATCH 1/3] Add securitytxt bot --- .../bots/experts/securitytxt/REQUIREMENTS.txt | 1 + intelmq/bots/experts/securitytxt/__init__.py | 0 intelmq/bots/experts/securitytxt/expert.py | 104 ++++++++++++++++++ .../bots/experts/securitytxt/REQUIREMENTS.txt | 1 + .../bots/experts/securitytxt/__init__.py | 0 .../bots/experts/securitytxt/test_expert.py | 95 ++++++++++++++++ 6 files changed, 201 insertions(+) create mode 100644 intelmq/bots/experts/securitytxt/REQUIREMENTS.txt create mode 100644 intelmq/bots/experts/securitytxt/__init__.py create mode 100644 intelmq/bots/experts/securitytxt/expert.py create mode 100644 intelmq/tests/bots/experts/securitytxt/REQUIREMENTS.txt create mode 100644 intelmq/tests/bots/experts/securitytxt/__init__.py create mode 100644 intelmq/tests/bots/experts/securitytxt/test_expert.py diff --git a/intelmq/bots/experts/securitytxt/REQUIREMENTS.txt b/intelmq/bots/experts/securitytxt/REQUIREMENTS.txt new file mode 100644 index 000000000..343b6d91a --- /dev/null +++ b/intelmq/bots/experts/securitytxt/REQUIREMENTS.txt @@ -0,0 +1 @@ +wellknown-securitytxt \ No newline at end of file diff --git a/intelmq/bots/experts/securitytxt/__init__.py b/intelmq/bots/experts/securitytxt/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/intelmq/bots/experts/securitytxt/expert.py b/intelmq/bots/experts/securitytxt/expert.py new file mode 100644 index 000000000..dbbe6509e --- /dev/null +++ b/intelmq/bots/experts/securitytxt/expert.py @@ -0,0 +1,104 @@ +from typing import Optional + +import requests +from securitytxt import SecurityTXT + +from intelmq.lib.bot import ExpertBot + + +class SecurityTXTExpertBot(ExpertBot): + """ + A bot for retrieving contact details from a security.txt + """ + """ + url_field: The field where to find the url which should be searched + contact_field: Field in which to place the found contact details + + only_email_address: whether to select only email addresses as contact detail (no web urls) + overwrite: whether to override existing data + check_expired / check_canonical: whether to perform checks on expiry date / canonical urls. + """ + url_field: str = "source.reverse_dns" + contact_field: str = "source.abuse_contact" + + only_email_address: bool = True + overwrite: bool = True + check_expired: bool = False + check_canonical: bool = False + + def init(self): + if not self.url_field or not self.contact_field: + raise AttributeError("Not all required fields are set.") + + def process(self): + event = self.receive_message() + + try: + self.check_prerequisites(event) + primary_contact = self.get_primary_contact(event.get(self.url_field)) + event.add(self.contact_field, primary_contact, overwrite=self.overwrite) + except NotMeetsRequirementsError as e: + self.logger.debug(str(e) + " Skipping event.") + except ContactNotFoundError as e: + self.logger.debug(f"No contact found. {str(e)} Continue.") + + self.send_message(event) + self.acknowledge_message() + + def check_prerequisites(self, event) -> None: + """ + Check whether this event should be processed by this bot, or can be skipped. + :param event: The event to evaluate. + """ + if not event.get(self.url_field, False): + raise NotMeetsRequirementsError("The URL field is empty.") + if event.get(self.contact_field, False) and not self.overwrite: + raise NotMeetsRequirementsError("All replace values already set.") + + def get_primary_contact(self, url: str) -> Optional[str]: + """ + Given a url, get the file, check it's validity and look for contact details. The primary contact details are + returned. If only_email_address is set to True, it will only return email addresses (no urls). + :param url: The URL on which to look for a security.txt file + :return: The contact information + :raises ContactNotFoundError: if contact cannot be found + """ + try: + securitytxt = SecurityTXT.from_url(url) + if not self.security_txt_is_valid(securitytxt): + raise ContactNotFoundError("SecurityTXT File not valid.") + for contact in securitytxt.contact: + if not self.only_email_address or SecurityTXTExpertBot.is_email_address(contact): + return contact + raise ContactNotFoundError("No contact details found in SecurityTXT.") + except (FileNotFoundError, AttributeError, requests.exceptions.RequestException): + raise ContactNotFoundError("SecurityTXT file could not be found or parsed.") + + def security_txt_is_valid(self, securitytxt: SecurityTXT): + """ + Determine whether a security.txt file is valid according to parameters of the bot. + :param securitytxt: The securityTXT object + :return: Whether the securitytxt is valid. + """ + return (not self.check_expired or not securitytxt.expired) and \ + (not self.check_canonical or securitytxt.canonical_url()) + + @staticmethod + def is_email_address(contact: str): + """ + Determine whether the argument is an email address + :param contact: the contact + :return: whether contact is email address + """ + return 'mailto:' in contact or '@' in contact + + +class NotMeetsRequirementsError(Exception): + pass + + +class ContactNotFoundError(Exception): + pass + + +BOT = SecurityTXTExpertBot \ No newline at end of file diff --git a/intelmq/tests/bots/experts/securitytxt/REQUIREMENTS.txt b/intelmq/tests/bots/experts/securitytxt/REQUIREMENTS.txt new file mode 100644 index 000000000..b08a26037 --- /dev/null +++ b/intelmq/tests/bots/experts/securitytxt/REQUIREMENTS.txt @@ -0,0 +1 @@ +requests_mock \ No newline at end of file diff --git a/intelmq/tests/bots/experts/securitytxt/__init__.py b/intelmq/tests/bots/experts/securitytxt/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/intelmq/tests/bots/experts/securitytxt/test_expert.py b/intelmq/tests/bots/experts/securitytxt/test_expert.py new file mode 100644 index 000000000..6ece5bbef --- /dev/null +++ b/intelmq/tests/bots/experts/securitytxt/test_expert.py @@ -0,0 +1,95 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- +""" +Testing the SecurityTXT Expert Bot +""" + +import unittest + +import requests_mock + +import intelmq.lib.test as test +from intelmq.bots.experts.securitytxt.expert import SecurityTXTExpertBot + +EXAMPLE_INPUT_IP = {"__type": "Event", + "source.ip": "192.168.123.123"} + +EXPECTED_OUTPUT_IP = {"__type": "Event", + "source.ip": "192.168.123.123", + "source.account": 'test@test.local'} + +EXAMPLE_INPUT_FQDN = {"__type": "Event", + "source.fqdn": "test.local"} + +EXPECTED_OUTPUT_FQDN = {"__type": "Event", + "source.fqdn": "test.local", + "source.abuse_contact": 'test.local/whitehat'} + +EXPECTED_OUTPUT_FQDN_NO_CONTACT = {"__type": "Event", + "source.fqdn": "test.local"} + +@requests_mock.Mocker() +class TestSecurityTXTExpertBot(test.BotTestCase, unittest.TestCase): + """ + A TestCase for the SecurityTXT Expert Bot + """ + + @classmethod + def set_bot(cls): + cls.bot_reference = SecurityTXTExpertBot + + def test_ip(self, m: requests_mock.Mocker): + self._run_generic_test(securitytxt_url=f"https://{EXAMPLE_INPUT_IP['source.ip']}/.well-known/security.txt", + securitytxt=f"Contact: {EXPECTED_OUTPUT_IP['source.account']}", + input_message=EXAMPLE_INPUT_IP, + output_message=EXPECTED_OUTPUT_IP, + config={'url_field': 'source.ip', 'contact_field': 'source.account', + 'only_email_address': False}, + m=m) + + def test_fqdn(self, m: requests_mock.Mocker): + self._run_generic_test(securitytxt_url=f"https://{EXAMPLE_INPUT_FQDN['source.fqdn']}/.well-known/security.txt", + securitytxt=f"Contact: {EXPECTED_OUTPUT_FQDN['source.abuse_contact']}", + input_message=EXAMPLE_INPUT_FQDN, + output_message=EXPECTED_OUTPUT_FQDN, + config={'url_field': 'source.fqdn', 'contact_field': 'source.abuse_contact', + 'only_email_address': False}, + m=m) + + def test_only_email_address_true(self, m: requests_mock.Mocker): + self._run_generic_test(securitytxt_url=f"https://{EXAMPLE_INPUT_FQDN['source.fqdn']}/.well-known/security.txt", + securitytxt=f"Contact: {EXPECTED_OUTPUT_FQDN['source.abuse_contact']}", + input_message=EXAMPLE_INPUT_FQDN, + output_message=EXPECTED_OUTPUT_FQDN_NO_CONTACT, + config={'url_field': 'source.fqdn', 'contact_field': 'source.abuse_contact', + 'only_email_address': True}, + m=m) + + def test_expired(self, m: requests_mock.Mocker): + self._run_generic_test(securitytxt_url=f"https://{EXAMPLE_INPUT_FQDN['source.fqdn']}/.well-known/security.txt", + securitytxt=f"Contact: {EXPECTED_OUTPUT_FQDN['source.abuse_contact']}\nExpires: 1900-12-31T18:37:07.000Z", + input_message=EXAMPLE_INPUT_FQDN, + output_message=EXPECTED_OUTPUT_FQDN_NO_CONTACT, + config={'url_field': 'source.fqdn', 'contact_field': 'source.abuse_contact', + 'only_email_address': False, 'check_expired': True}, + m=m) + + def test_not_expired(self, m: requests_mock.Mocker): + self._run_generic_test(securitytxt_url=f"https://{EXAMPLE_INPUT_FQDN['source.fqdn']}/.well-known/security.txt", + securitytxt=f"Contact: {EXPECTED_OUTPUT_FQDN['source.abuse_contact']}\nExpires: 3000-12-31T18:37:07.000Z", + input_message=EXAMPLE_INPUT_FQDN, + output_message=EXPECTED_OUTPUT_FQDN, + config={'url_field': 'source.fqdn', 'contact_field': 'source.abuse_contact', + 'only_email_address': False, 'check_expired': True}, + m=m) + + def _run_generic_test(self, m: requests_mock.Mocker, config: dict, securitytxt_url: str, securitytxt: str, + input_message: dict, output_message: dict): + self.sysconfig = config + self.prepare_bot() + m.get(requests_mock.ANY, status_code=404) + m.get(securitytxt_url, text=securitytxt) + self.input_message = input_message + self.run_bot() + self.assertMessageEqual(0, output_message) From ccbee1a7c2e1f24839acd1b9471f8940c88bb03a Mon Sep 17 00:00:00 2001 From: Westers Date: Wed, 9 Feb 2022 14:52:41 +0100 Subject: [PATCH 2/3] Add documentation for securitytxt bot --- docs/user/bots.md | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/docs/user/bots.md b/docs/user/bots.md index 433fe98ef..3b1dd0fdf 100644 --- a/docs/user/bots.md +++ b/docs/user/bots.md @@ -3524,6 +3524,49 @@ to true. (optional, boolean) Query for IPs at `https://stat.ripe.net/data/maxmind-geo-lite/data.json?resource=%s`. Defaults to true. +--- + +### SecurityTXT
+ +SecurityTXT is an initiative to standardize how websites publish their abuse contact information. Currently it is a `draft RFC `_. See this draft RFC for more information on security.txt. This bot automatically looks for security.txt files on a URL or IP, retrieves the primary contact information out of it and adds this to the event. + +**Requirements** + +To use this bot, you need to install the required dependencies: + +```bash +pip3 install -r intelmq/bots/experts/securitytxt/REQUIREMENTS.txt +``` + +**Module:** `intelmq.bots.experts.securitytxt.expert` + +**Parameters (also expects [cache parameters](#cache-parameters)):** + +**`url_field`** + +The field in the event that contains the URL/IP on which to look for the the security.txt file. + +**`contact_field`** + +The field in the event in which to put the found contact details + +**`only_email_address`** + +Contact details can be web URLs or email addresses. When this value is set to True, it only selects email addresses as contact information. + +**`overwrite`** + +Boolean indicating whether to override existing data in contact_field + +**`check_expired`** + +Boolean indicating whether to check if the security.txt has expired according to its own expiry date + +**`check_canonical`** + +Boolean indicating whether to check if the url is contained in the list of canonical urls. + + --- ### Sieve
From c036b46758416cb269bd5a9eb615fa2b5dd6bdbf Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Sat, 30 Nov 2024 10:53:41 +0100 Subject: [PATCH 3/3] security.txt expert: fixes, updates to current standards --- CHANGELOG.md | 2 ++ docs/user/bots.md | 27 ++++++++++++------- .../bots/experts/securitytxt/REQUIREMENTS.txt | 3 +++ intelmq/bots/experts/securitytxt/expert.py | 21 ++++++++++----- .../bots/experts/securitytxt/REQUIREMENTS.txt | 1 - .../bots/experts/securitytxt/test_expert.py | 3 +++ 6 files changed, 40 insertions(+), 17 deletions(-) delete mode 100644 intelmq/tests/bots/experts/securitytxt/REQUIREMENTS.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b31d7035..b8d25667a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,8 @@ - Fix to avoid schema download if not configured #2530. #### Experts +- `intelmq.bots.experts.securitytxt`: + - Added new bot (PR#2538 by Frank Westers and Sebastian Wagner) #### Outputs diff --git a/docs/user/bots.md b/docs/user/bots.md index 3b1dd0fdf..56edbd158 100644 --- a/docs/user/bots.md +++ b/docs/user/bots.md @@ -3528,7 +3528,10 @@ true. ### SecurityTXT
-SecurityTXT is an initiative to standardize how websites publish their abuse contact information. Currently it is a `draft RFC `_. See this draft RFC for more information on security.txt. This bot automatically looks for security.txt files on a URL or IP, retrieves the primary contact information out of it and adds this to the event. +SecurityTXT is an initiative to standardize how websites publish their abuse contact information. +It is standardized in [RFC 9116 "A File Format to Aid in Security Vulnerability Disclosure"](https://datatracker.ietf.org/doc/rfc9116/). +Refer to the linked document RFC for more information on `security.txt`. +This bot looks for `security.txt` files on a URL or IP, retrieves the primary contact information out of it and adds this to the event. **Requirements** @@ -3540,31 +3543,35 @@ pip3 install -r intelmq/bots/experts/securitytxt/REQUIREMENTS.txt **Module:** `intelmq.bots.experts.securitytxt.expert` -**Parameters (also expects [cache parameters](#cache-parameters)):** +**Parameters** **`url_field`** -The field in the event that contains the URL/IP on which to look for the the security.txt file. +The field in the event that contains the URL/IP on which to look for the the security.txt file. Default: `source.reverse_dns` **`contact_field`** -The field in the event in which to put the found contact details +The field in the event in which to put the found contact details. Default: `source.abuse_contact` -**`only_email_address`** +**`only_email_address`** (bool) Contact details can be web URLs or email addresses. When this value is set to True, it only selects email addresses as contact information. +Default: `true` -**`overwrite`** +**`overwrite`** (bool) -Boolean indicating whether to override existing data in contact_field +Boolean indicating whether to override existing data in contact_field. +Default: `true` -**`check_expired`** +**`check_expired`** (bool) -Boolean indicating whether to check if the security.txt has expired according to its own expiry date +Boolean indicating whether to check if the security.txt has expired according to its own expiry date. +Default: `false` -**`check_canonical`** +**`check_canonical`** (bool) Boolean indicating whether to check if the url is contained in the list of canonical urls. +Default: `false` --- diff --git a/intelmq/bots/experts/securitytxt/REQUIREMENTS.txt b/intelmq/bots/experts/securitytxt/REQUIREMENTS.txt index 343b6d91a..3b93c2981 100644 --- a/intelmq/bots/experts/securitytxt/REQUIREMENTS.txt +++ b/intelmq/bots/experts/securitytxt/REQUIREMENTS.txt @@ -1 +1,4 @@ +# SPDX-FileCopyrightText: 2022 Frank Westers, 2024 Institute for Common Good Technology +# SPDX-License-Identifier: AGPL-3.0-or-later + wellknown-securitytxt \ No newline at end of file diff --git a/intelmq/bots/experts/securitytxt/expert.py b/intelmq/bots/experts/securitytxt/expert.py index dbbe6509e..94f2815cd 100644 --- a/intelmq/bots/experts/securitytxt/expert.py +++ b/intelmq/bots/experts/securitytxt/expert.py @@ -1,9 +1,18 @@ +# SPDX-FileCopyrightText: 2022 Frank Westers, 2024 Institute for Common Good Technology +# +# SPDX-License-Identifier: AGPL-3.0-or-later + from typing import Optional import requests -from securitytxt import SecurityTXT from intelmq.lib.bot import ExpertBot +from intelmq.lib.exceptions import MissingDependencyError + +try: + from securitytxt import SecurityTXT +except (ImportError, ModuleNotFoundError): + SecurityTXT = None class SecurityTXTExpertBot(ExpertBot): @@ -27,8 +36,8 @@ class SecurityTXTExpertBot(ExpertBot): check_canonical: bool = False def init(self): - if not self.url_field or not self.contact_field: - raise AttributeError("Not all required fields are set.") + if SecurityTXT is None: + raise MissingDependencyError('wellknown-securitytxt') def process(self): event = self.receive_message() @@ -38,9 +47,9 @@ def process(self): primary_contact = self.get_primary_contact(event.get(self.url_field)) event.add(self.contact_field, primary_contact, overwrite=self.overwrite) except NotMeetsRequirementsError as e: - self.logger.debug(str(e) + " Skipping event.") + self.logger.debug("Skipping event (%s).", e) except ContactNotFoundError as e: - self.logger.debug(f"No contact found. {str(e)} Continue.") + self.logger.debug("No contact found: %s Continue.", e) self.send_message(event) self.acknowledge_message() @@ -101,4 +110,4 @@ class ContactNotFoundError(Exception): pass -BOT = SecurityTXTExpertBot \ No newline at end of file +BOT = SecurityTXTExpertBot diff --git a/intelmq/tests/bots/experts/securitytxt/REQUIREMENTS.txt b/intelmq/tests/bots/experts/securitytxt/REQUIREMENTS.txt deleted file mode 100644 index b08a26037..000000000 --- a/intelmq/tests/bots/experts/securitytxt/REQUIREMENTS.txt +++ /dev/null @@ -1 +0,0 @@ -requests_mock \ No newline at end of file diff --git a/intelmq/tests/bots/experts/securitytxt/test_expert.py b/intelmq/tests/bots/experts/securitytxt/test_expert.py index 6ece5bbef..cf01285ba 100644 --- a/intelmq/tests/bots/experts/securitytxt/test_expert.py +++ b/intelmq/tests/bots/experts/securitytxt/test_expert.py @@ -1,3 +1,5 @@ +# SPDX-FileCopyrightText: 2022 Frank Westers +# # SPDX-License-Identifier: AGPL-3.0-or-later # -*- coding: utf-8 -*- @@ -30,6 +32,7 @@ "source.fqdn": "test.local"} @requests_mock.Mocker() +@test.skip_exotic() class TestSecurityTXTExpertBot(test.BotTestCase, unittest.TestCase): """ A TestCase for the SecurityTXT Expert Bot