Skip to content

Commit

Permalink
Merge pull request #1871 from blacklanternsecurity/excavate-intercept
Browse files Browse the repository at this point in the history
Break out docs updater into separate workflow
  • Loading branch information
TheTechromancer authored Oct 18, 2024
2 parents a3f0bbe + 29206f0 commit 7716db3
Show file tree
Hide file tree
Showing 8 changed files with 233 additions and 135 deletions.
40 changes: 40 additions & 0 deletions .github/workflows/docs_updater.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
name: Daily Docs Update

on:
schedule:
- cron: '0 0 * * *' # Runs daily at midnight UTC

jobs:
update_docs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
token: ${{ secrets.BBOT_DOCS_UPDATER_PAT }}
ref: dev # Checkout the dev branch
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.x"
- name: Install dependencies
run: |
pip install poetry
poetry install
- name: Generate docs
run: |
poetry run bbot/scripts/docs.py
- name: Commit changes
uses: EndBug/add-and-commit@v9
with:
add: '["*.md", "docs/data/chord_graph/*.json"]'
author_name: "BBOT Docs Autopublish"
author_email: [email protected]
message: "Refresh module docs"
- name: Create Pull Request
uses: peter-evans/create-pull-request@v7
with:
token: ${{ secrets.BBOT_DOCS_UPDATER_PAT }}
branch: update-docs
base: dev
title: "Daily Docs Update"
body: "This is an automated pull request to update the documentation."
File renamed without changes.
14 changes: 11 additions & 3 deletions bbot/core/event/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def __init__(
Raises:
ValidationError: If either `scan` or `parent` are not specified and `_dummy` is False.
"""
self.uuid = uuid.uuid4()
self._uuid = uuid.uuid4()
self._id = None
self._hash = None
self._data = None
Expand Down Expand Up @@ -456,6 +456,13 @@ def id(self):
self._id = f"{self.type}:{self.data_hash.hex()}"
return self._id

@property
def uuid(self):
"""
A universally unique identifier for the event
"""
return f"{self.type}:{self._uuid}"

@property
def data_hash(self):
"""
Expand Down Expand Up @@ -1718,7 +1725,7 @@ def event_from_json(j, siem_friendly=False):
event = make_event(**kwargs)
event_uuid = j.get("uuid", None)
if event_uuid is not None:
event.uuid = uuid.UUID(event_uuid)
event._uuid = uuid.UUID(event_uuid.split(":")[-1])

resolved_hosts = j.get("resolved_hosts", [])
event._resolved_hosts = set(resolved_hosts)
Expand All @@ -1730,7 +1737,8 @@ def event_from_json(j, siem_friendly=False):
event._parent_id = parent_id
parent_uuid = j.get("parent_uuid", None)
if parent_uuid is not None:
event._parent_uuid = uuid.UUID(parent_uuid)
parent_type, parent_uuid = parent_uuid.split(":", 1)
event._parent_uuid = parent_type + ":" + str(uuid.UUID(parent_uuid))
return event
except KeyError as e:
raise ValidationError(f"Event missing required field: {e}")
Expand Down
3 changes: 3 additions & 0 deletions bbot/core/helpers/regexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@
# uuid regex
_uuid_regex = r"[0-9a-f]{8}\b-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-\b[0-9a-f]{12}"
uuid_regex = re.compile(_uuid_regex, re.I)
# event uuid regex
_event_uuid_regex = r"[0-9A-Z_]+:[0-9a-f]{8}\b-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-\b[0-9a-f]{12}"
event_uuid_regex = re.compile(_event_uuid_regex, re.I)

_open_port_regexes = (
_dns_name_regex + r":[0-9]{1,5}",
Expand Down
16 changes: 11 additions & 5 deletions bbot/test/test_step_1/test_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from ..bbot_fixtures import *
from bbot.scanner import Scanner
from bbot.core.helpers.regexes import uuid_regex
from bbot.core.helpers.regexes import event_uuid_regex


@pytest.mark.asyncio
Expand Down Expand Up @@ -443,11 +443,17 @@ async def test_events(events, helpers):
parent_event2 = scan.make_event("evilcorp.com", parent=scan.root_event, context="test context")

event1 = scan.make_event("evilcorp.com:80", parent=parent_event1, context="test context")
assert hasattr(event1, "_uuid")
assert hasattr(event1, "uuid")
assert isinstance(event1.uuid, uuid.UUID)
assert isinstance(event1._uuid, uuid.UUID)
assert isinstance(event1.uuid, str)
assert event1.uuid == f"{event1.type}:{event1._uuid}"
event2 = scan.make_event("evilcorp.com:80", parent=parent_event2, context="test context")
assert hasattr(event2, "_uuid")
assert hasattr(event2, "uuid")
assert isinstance(event2.uuid, uuid.UUID)
assert isinstance(event2._uuid, uuid.UUID)
assert isinstance(event2.uuid, str)
assert event2.uuid == f"{event2.type}:{event2._uuid}"
# ids should match because the event type + data is the same
assert event1.id == event2.id
# but uuids should be unique!
Expand All @@ -470,7 +476,7 @@ async def test_events(events, helpers):
assert db_event.discovery_context == "test context"
assert db_event.discovery_path == ["test context"]
assert len(db_event.parent_chain) == 1
assert all([uuid_regex.match(u) for u in db_event.parent_chain])
assert all([event_uuid_regex.match(u) for u in db_event.parent_chain])
assert db_event.parent_chain[0] == str(db_event.uuid)
assert db_event.parent.uuid == scan.root_event.uuid
assert db_event.parent_uuid == scan.root_event.uuid
Expand All @@ -490,7 +496,7 @@ async def test_events(events, helpers):
assert json_event["parent_chain"] == db_event.parent_chain
assert json_event["parent_chain"][0] == str(db_event.uuid)
reconstituted_event = event_from_json(json_event)
assert isinstance(reconstituted_event.uuid, uuid.UUID)
assert isinstance(reconstituted_event._uuid, uuid.UUID)
assert str(reconstituted_event.uuid) == json_event["uuid"]
assert str(reconstituted_event.parent_uuid) == json_event["parent_uuid"]
assert reconstituted_event.uuid == db_event.uuid
Expand Down
95 changes: 68 additions & 27 deletions docs/scanning/events.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,50 +8,91 @@ An Event is a piece of data discovered by BBOT. Examples include `IP_ADDRESS`, `
event type event data source module tags
```

In addition to the obvious data (e.g. `www.evilcorp.com`), an event also contains other useful information such as:

- a `.discovery_path` showing exactly how the event was discovered, starting from the first scan target
- a `.timestamp` of when the data was discovered
- the `.module` that discovered it
- the `.parent` event that led to its discovery
- its `.scope_distance` (how many hops it is from the main scope, 0 == in-scope)
- a list of `.tags` that describe the data (`mx-record`, `http-title`, etc.)
## Event Attributes

Each BBOT event has the following attributes. Not all of these attributes are visible in the terminal output. However, they are always saved in `output.json` in the scan output folder. If you want to see them on the terminal, you can use `--json`.

- `.type`: the event type (e.g. `DNS_NAME`, `IP_ADDRESS`, `OPEN_TCP_PORT`, etc.)
- `.id`: an identifier representing the event type + a SHA1 hash of its data (note: multiple events can have the same `.id`)
- `.uuid`: a universally unique identifier for the event (e.g. `DNS_NAME:6c96d512-090a-47f0-82e4-6860e46aac13`)
- `.scope_description`: describes the scope of the event (e.g. `in-scope`, `affiliate`, `distance-2`)
- `.data`: the actual discovered data (for some events like `DNS_NAME` or `IP_ADDRESS`, this is a string. For other more complex events like `HTTP_RESPONSE`, it's a dictionary)
- `.host`: the hostname or IP address (e.g. `evilcorp.com` or `1.2.3.4`)
- `.port`: the port number (e.g. `80`, `443`)
- `.netloc`: the network location, including both the hostname and port (e.g. `www.evilcorp.com:443`)
- `.resolved_hosts`: a list of all resolved hosts for the event (`A`, `AAAA`, and `CNAME` records)
- `.dns_children`: a dictionary of all DNS records for the event (typically only present on `DNS_NAME`)
- `.web_spider_distance`: a count of how many URL links have been followed in a row to get to this event
- `.scope_distance`: a count of how many hops it is from the main scope (0 == in-scope)
- `.scan`: the ID of the scan that produced the event
- `.timestamp`: the date/time when the event was discovered
- `.parent`: the ID of the parent event that led to the discovery of this event
- `.parent_uuid`: the universally unique identifier for the parent event
- `.tags`: a list of tags describing the event (e.g. `mx-record`, `http-title`, etc.)
- `.module`: the module that discovered the event
- `.module_sequence`: the recent sequence of modules that were executed to discover the event (including omitted events)
- `.discovery_context`: a description of the context in which the event was discovered
- `.discovery_path`: a list of every discovery context leading to this event
- `.parent_chain`: a list of every event UUID leading to the discovery of this event (corresponds exactly to `.discovery_path`)

These attributes allow us to construct a visual graph of events (e.g. in [Neo4j](../output#neo4j)) and query/filter/grep them more easily. Here is what a typical event looks like in JSON format:

```json
{
"type": "DNS_NAME",
"id": "DNS_NAME:879e47564ff0ed7711b707d3dbecb706ad6af1a3",
"id": "DNS_NAME:33bc005c2bdfea4d73e07db733bd11861cf6520e",
"uuid": "DNS_NAME:6c96d512-090a-47f0-82e4-6860e46aac13",
"scope_description": "in-scope",
"data": "www.blacklanternsecurity.com",
"host": "www.blacklanternsecurity.com",
"data": "link.evilcorp.com",
"host": "link.evilcorp.com",
"resolved_hosts": [
"185.199.108.153",
"2606:50c0:8003::153",
"blacklanternsecurity.github.io"
"184.31.52.65",
"2600:1402:b800:d82::700",
"2600:1402:b800:d87::700",
"link.evilcorp.com.edgekey.net"
],
"dns_children": {},
"dns_children": {
"A": [
"184.31.52.65"
],
"AAAA": [
"2600:1402:b800:d82::700",
"2600:1402:b800:d87::700"
],
"CNAME": [
"link.evilcorp.com.edgekey.net"
]
},
"web_spider_distance": 0,
"scope_distance": 0,
"scan": "SCAN:477d1e6b94be928bf85c554b0845985189cfc81d",
"timestamp": "2024-08-17T03:49:47.906017+00:00",
"parent": "DNS_NAME:1e57014aa7b0715bca68e4f597204fc4e1e851fc",
"scan": "SCAN:b6ef48bc036bc8d001595ae5061846a7e6beadb6",
"timestamp": "2024-10-18T15:40:13.716880+00:00",
"parent": "DNS_NAME:94c92b7eaed431b37ae2a757fec4e678cc3bd213",
"parent_uuid": "DNS_NAME:c737dffa-d4f0-4b6e-a72d-cc8c05bd892e",
"tags": [
"cdn-github",
"subdomain",
"in-scope"
"a-record",
"cdn-akamai",
"in-scope",
"cname-record",
"aaaa-record"
],
"module": "otx",
"module_sequence": "otx",
"discovery_context": "otx searched otx API for \"blacklanternsecurity.com\" and found DNS_NAME: www.blacklanternsecurity.com",
"module": "speculate",
"module_sequence": "speculate->speculate",
"discovery_context": "speculated parent DNS_NAME: link.evilcorp.com",
"discovery_path": [
"Scan demonic_jimmy seeded with DNS_NAME: blacklanternsecurity.com",
"otx searched otx API for \"blacklanternsecurity.com\" and found DNS_NAME: www.blacklanternsecurity.com"
"Scan insidious_frederick seeded with DNS_NAME: evilcorp.com",
"TXT record for evilcorp.com contains IP_ADDRESS: 149.72.247.52",
"PTR record for 149.72.247.52 contains DNS_NAME: o1.ptr2410.link.evilcorp.com",
"speculated parent DNS_NAME: ptr2410.link.evilcorp.com",
"speculated parent DNS_NAME: link.evilcorp.com"
],
"parent_chain": [
"DNS_NAME:1e57014aa7b0715bca68e4f597204fc4e1e851fc",
"DNS_NAME:879e47564ff0ed7711b707d3dbecb706ad6af1a3"
"DNS_NAME:34c657a3-0bfa-457e-9e6e-0f22f04b8da5",
"IP_ADDRESS:efc0fb3b-1b42-44da-916e-83db2360e10e",
"DNS_NAME:c737dffa-d4f0-4b6e-a72d-cc8c05bd892e",
"DNS_NAME_UNRESOLVED:722a3473-30c6-40f1-90aa-908d47105d5a",
"DNS_NAME:6c96d512-090a-47f0-82e4-6860e46aac13"
]
}
```
Expand Down
Loading

0 comments on commit 7716db3

Please sign in to comment.