Skip to content

Commit

Permalink
Include pre commit checks
Browse files Browse the repository at this point in the history
  • Loading branch information
stefanDeveloper committed May 8, 2024
1 parent 42b1856 commit a03a026
Show file tree
Hide file tree
Showing 23 changed files with 126 additions and 109 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build_test_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,4 +56,4 @@ jobs:
- name: Test
if: startsWith(matrix.os, 'ubuntu') && !startsWith(matrix.python-version, '3.10')
run: |
python -m pytest tests.py
python -m pytest tests.py
2 changes: 1 addition & 1 deletion .github/workflows/build_test_macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,4 @@ jobs:
# On other versions then 3.9, we test only. (without coverage generation)
if: startsWith(matrix.os, 'macos') && !startsWith(matrix.python-version, '3.9') && !startsWith(github.ref, 'refs/tags/')
run: |
python -m pytest tests.py
python -m pytest tests.py
2 changes: 1 addition & 1 deletion .github/workflows/build_test_windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,4 @@ jobs:
# On other versions then 3.9, we test only. (without coverage generation)
if: startsWith(matrix.os, 'windows') && !startsWith(matrix.python-version, '3.9') && !startsWith(github.ref, 'refs/tags/')
run: |
python -m pytest tests.py
python -m pytest tests.py
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -318,4 +318,4 @@ dmypy.json
# Cython debug symbols
cython_debug/

# End of https://www.toptal.com/developers/gitignore/api/python,jupyternotebooks
# End of https://www.toptal.com/developers/gitignore/api/python,jupyternotebooks
12 changes: 12 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
repos:
- repo: https://github.com/psf/black
rev: 24.4.2
hooks:
- id: black
language_version: python3.11
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
hooks:
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
2 changes: 1 addition & 1 deletion .readthedocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ python:
- requirements: requirements.txt

sphinx:
configuration: docs/source/conf.py
configuration: docs/source/conf.py
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ COPY heidgaf/ heidgaf/

RUN pip --disable-pip-version-check install --no-cache-dir --no-compile .

CMD [ "heidgaf", "-h" ]
CMD [ "heidgaf", "-h" ]
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ Train your own model:

```sh
heidgaf train -m xg -d all
```
```

### Data

Expand Down Expand Up @@ -115,10 +115,10 @@ Based on the following work, we implement heiDGAF to find malicious behaviour su
Propose a hybrid DNS tunneling detection system using Tabu-PIO for feature selection.

- Classifying Malicious Domains using DNS Traffic Analysis


- [DeepDGA](https://github.com/roreagan/DeepDGA): Adversarially-Tuned Domain Generation and Detection

DeepDGA detecting (and generating) domains on a per-domain basis which provides a simple and flexible means to detect known DGA families. It uses GANs to bypass detectors and shows the effectiveness of such solutions.

- Kitsune: An Ensemble of Autoencoders for Online Network Intrusion Detection
Expand Down
23 changes: 14 additions & 9 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
version: "3.9"

volumes:
redis:
heidgaf:

services:
# TODO Currently not supported.
# redis:
# image: redis:latest
# ports:
Expand All @@ -19,10 +18,16 @@ services:
command: ["heidgaf", "inspect", "-r", "/tmp/data", "-m", "xg"]
volumes:
- ./data/heicloud:/tmp/data
# deploy:
# resources:
# reservations:
# devices:
# - driver: nvidia
# count: all
# capabilities: [gpu]
memswap_limit: 42G
deploy:
resources:
limits:
cpus: '6'
memory: 32g
reservations:
cpus: '4'
memory: 24g
# devices:
# - driver: nvidia
# count: all
# capabilities: [gpu]
2 changes: 1 addition & 1 deletion docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ sphinxcontrib.apidoc==0.5.0
sphinx_autodoc_typehints==2.0.0
nbsphinx==0.9.3
myst_parser==2.0.0
sphinx_design==0.5.0
sphinx_design==0.5.0
1 change: 0 additions & 1 deletion docs/source/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,3 @@ To use heiDGAF, first install it using pip:
.. code-block:: console
(.venv) $ pip install .
73 changes: 40 additions & 33 deletions heidgaf/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,49 +18,47 @@
def cli():
click.echo("Starting heiDGAF CLI")


@cli.command(name="train", context_settings={"show_default": True})
@click.option(
"-m",
"--model",
"model",
required=True,
"-m",
"--model",
"model",
required=True,
type=click.Choice(Model),
help="Model for fitting."
help="Model for fitting.",
)
@click.option(
"-d",
"--dataset",
"dataset",
required=True,
"-d",
"--dataset",
"dataset",
required=True,
type=click.Choice(Dataset),
default=Dataset.ALL,
help="Dataset for fitting."
help="Dataset for fitting.",
)
@click.option(
"-o",
"--output_dir",
"output_dir",
required=True,
"-o",
"--output_dir",
"output_dir",
required=True,
type=click.STRING,
help="Output path of model."
help="Output path of model.",
)
def train(model, dataset, output_dir):
click.echo("Start training of model.")
trainer = DNSAnalyzerTraining(
model=model,
dataset=dataset
)
trainer = DNSAnalyzerTraining(model=model, dataset=dataset)
trainer.train(output_path=output_dir)


@cli.command(name="inspect", context_settings={"show_default": True})
@click.option(
"-r",
"--read",
"input_dir",
required=True,
type=click.Path(),
help="Input directory or file for analyzing."
"-r",
"--read",
"input_dir",
required=True,
type=click.Path(),
help="Input directory or file for analyzing.",
)
@click.option(
"-dt",
Expand All @@ -71,12 +69,12 @@ def train(model, dataset, output_dir):
help="Sets the anomaly detector.",
)
@click.option(
"-m",
"--model",
"model",
required=True,
"-m",
"--model",
"model",
required=True,
type=click.Choice(Model),
help="Model for prediction."
help="Model for prediction.",
)
@click.option(
"-s",
Expand Down Expand Up @@ -143,7 +141,18 @@ def train(model, dataset, output_dir):
help="Sets Redis max connection for caching results.",
)
def inspection(
input_dir, detector, model, separator, filetype, lag, influence, n_standard_deviations, redis_host, redis_port, redis_db, redis_max_connection
input_dir,
detector,
model,
separator,
filetype,
lag,
influence,
n_standard_deviations,
redis_host,
redis_port,
redis_db,
redis_max_connection,
):
click.echo("Starts processing log lines of DNS traffic.")
pipeline = DNSInspectorPipeline(
Expand All @@ -164,6 +173,4 @@ def inspection(


if __name__ == "__main__":
"""Default CLI entrypoint for Click interface
"""
cli()
3 changes: 1 addition & 2 deletions heidgaf/detectors/arima_anomaly_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
import numpy as np
from statsmodels.tsa.arima.model import ARIMA

from heidgaf.detectors.base_anomaly import (AnomalyDetector,
AnomalyDetectorConfig)
from heidgaf.detectors.base_anomaly import AnomalyDetector, AnomalyDetectorConfig


class ARIMAAnomalyDetector(AnomalyDetector):
Expand Down
3 changes: 1 addition & 2 deletions heidgaf/detectors/exponential_thresholding.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@

import numpy as np

from heidgaf.detectors.base_anomaly import (AnomalyDetector,
AnomalyDetectorConfig)
from heidgaf.detectors.base_anomaly import AnomalyDetector, AnomalyDetectorConfig


class EMAAnomalyDetector(AnomalyDetector):
Expand Down
3 changes: 1 addition & 2 deletions heidgaf/detectors/real_time_anomaly.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@

import numpy as np

from heidgaf.detectors.base_anomaly import (AnomalyDetector,
AnomalyDetectorConfig)
from heidgaf.detectors.base_anomaly import AnomalyDetector, AnomalyDetectorConfig


class RealTimeAnomalyDetector(AnomalyDetector):
Expand Down
3 changes: 1 addition & 2 deletions heidgaf/detectors/thresholding_algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@

import numpy as np

from heidgaf.detectors.base_anomaly import (AnomalyDetector,
AnomalyDetectorConfig)
from heidgaf.detectors.base_anomaly import AnomalyDetector, AnomalyDetectorConfig


class ThresholdingAnomalyDetector(AnomalyDetector):
Expand Down
7 changes: 3 additions & 4 deletions heidgaf/inspectors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def __init__(self, config: InspectorConfig) -> None:
"thirdleveldomain",
"secondleveldomain",
"fqdn",
"tld"
"tld",
]
),
mean_imputer=Imputer(features_to_impute=[], strategy="mean"),
Expand Down Expand Up @@ -99,7 +99,7 @@ def warnings(self, data: pl.DataFrame, suspicious: List, id: str) -> pl.DataFram
.alias("distro")
)
fqdn_distro = fqdn_distro.filter(pl.col("distro") > 0.05)

# Initialize empty array
total_warnings = [data.clear()]

Expand All @@ -126,9 +126,8 @@ def warnings(self, data: pl.DataFrame, suspicious: List, id: str) -> pl.DataFram
with pl.Config(tbl_rows=100):
logging.debug(suspicious_data.select(["fqdn"]).unique())
total_warnings.append(suspicious_data)

return pl.concat(total_warnings)


def update_count(
self,
Expand Down
9 changes: 5 additions & 4 deletions heidgaf/inspectors/domain_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ class DomainInspector(Inspector):
Args:
Tester (Tester): Configuration.
"""

KEY_SECOND_LEVEL_DOMAIN = "secondleveldomain_frequency"
KEY_THIRD_LEVEL_DOMAIN = "thirdleveldomain_frequency"
KEY_FQDN = "fqdn_frequency"
Expand All @@ -37,14 +38,14 @@ def run(self, data: pl.DataFrame) -> pl.DataFrame:
"""
min_date = data.select(["timestamp"]).min().item()
max_date = data.select(["timestamp"]).max().item()

# Filter data with no errors
df = data.filter(pl.col("query") != "|").filter(
pl.col("query").str.split(".").list.len() != 1
)

findings = []

# Check anomalies in FQDN
logging.info("Analyze FQDN request anomalies")
warnings = self.update_count(df, min_date, max_date, "fqdn", self.KEY_FQDN)
Expand All @@ -63,5 +64,5 @@ def run(self, data: pl.DataFrame) -> pl.DataFrame:
df, min_date, max_date, "thirdleveldomain", self.KEY_THIRD_LEVEL_DOMAIN
)
findings.append(self.warnings(data, warnings, "thirdleveldomain"))

return pl.concat(findings)
4 changes: 2 additions & 2 deletions heidgaf/inspectors/ip_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def run(self, data: pl.DataFrame) -> pl.DataFrame:
.filter(pl.col("return_code") != ReturnCode.NOERROR.value)
.filter(pl.col("query").str.split(".").list.len() != 1)
)

findings = []

# Update frequencies based on errors
Expand All @@ -54,5 +54,5 @@ def run(self, data: pl.DataFrame) -> pl.DataFrame:
df, min_date, max_date, "dns_server", self.KEY_DNS_SERVER
)
findings.append(self.warnings(data, warnings, "dns_server"))

return pl.concat(findings)
Loading

0 comments on commit a03a026

Please sign in to comment.