Skip to content

Commit

Permalink
Add click extension
Browse files Browse the repository at this point in the history
  • Loading branch information
stefanDeveloper committed Apr 8, 2024
1 parent daa82d8 commit 4a18ca7
Show file tree
Hide file tree
Showing 18 changed files with 508 additions and 534 deletions.
95 changes: 84 additions & 11 deletions heidgaf/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import torch

from heidgaf import CONTEXT_SETTINGS
from heidgaf.main import DNSAnalyzerPipeline, Detector
from heidgaf.main import DNSAnalyzerPipeline, Detector, Separator
from heidgaf.models.lr import LogisticRegression
from heidgaf.train import DNSAnalyzerTraining
from heidgaf.version import __version__
Expand Down Expand Up @@ -57,30 +57,103 @@ def training_start():


@cli.group(name="process", context_settings={"show_default": True})
def training_model():
def analyse():
logging.info("Starts processing log lines of DNS traffic.")


@training_model.command(name="start")
@click.option("-r", "--read", "input_dir", required=True, type=click.Path())
@analyse.command(name="start")
@click.option(
"-r",
"--read",
"input_dir",
required=True,
type=click.Path(),
help="Input directory or file for analyzing."
)
@click.option(
"-dt",
"--detector",
"detector",
type=click.Choice(Detector),
help="Sets the anomaly detector",
default=Detector.THRESHOLDING,
help="Sets the anomaly detector.",
)
@click.option(
"-s",
"--separator",
"separator",
type=click.STRING,
default=Separator.COMMA.value,
help="Separator type of input.",
)
@click.option(
"--lag",
"lag",
type=click.FLOAT,
default=15,
help="Sets the anomaly detector lag.",
)
@click.option(
"--influence",
"influence",
type=click.FLOAT,
default=0.7,
help="Sets the anomaly detector influence.",
)
@click.option(
"--std",
"n_standard_deviations",
type=click.FLOAT,
default=3,
help="Sets the anomaly detector n standard deviation.",
)
@click.option(
"-d",
"--delimiter",
"delimiter",
"--redis-host",
"redis_host",
type=click.STRING,
help="Sets the anomaly detector",
default="localhost",
help="Sets Redis host for caching results.",
)
@click.option(
"--redis-port",
"redis_port",
type=click.INT,
default=6379,
help="Sets Redis port for caching results.",
)
def training_start(input_dir, detector, delimiter):
pipeline = DNSAnalyzerPipeline(input_dir)
@click.option(
"--redis-db",
"redis_db",
type=click.INT,
default=0,
help="Sets Redis database for caching results.",
)
@click.option(
"--redis-max-connection",
"redis_max_connection",
type=click.INT,
default=20,
help="Sets Redis max connection for caching results.",
)
def training_start(
input_dir, detector, separator, lag, influence, n_standard_deviations, redis_host, redis_port, redis_db, redis_max_connection
):
pipeline = DNSAnalyzerPipeline(
path=input_dir,
detector=detector,
lag=lag,
anomaly_influence=influence,
n_standard_deviations=n_standard_deviations,
separator=separator,
redis_host=redis_host,
redis_port=redis_port,
redis_db=redis_db,
redis_max_connections=redis_max_connection
)
pipeline.run()


if __name__ == "__main__":
"""Default CLI entrypoint for Click interface
"""
cli()
90 changes: 63 additions & 27 deletions heidgaf/detectors/arima_anomaly_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ class ARIMAAnomalyDetector(AnomalyDetector):
"""
Anomaly detector based on the ARIMA model.
"""

def __init__(self, config: AnomalyDetectorConfig, order: Tuple[int, int, int]):
super().__init__(config)
self.order = order
Expand All @@ -26,7 +27,6 @@ def fit_model(self, y: List[float]):

def get_predictions(self):
return self.model_fit.predict(start=1, end=len(self.y))


def get_residuals(self, predictions):
return self.y - predictions
Expand All @@ -47,10 +47,16 @@ def run(self, y: List[float]) -> Dict[str, np.ndarray]:

@staticmethod
def rmse_metric(residuals):
return np.sqrt(np.mean((residuals)**2))
return np.sqrt(np.mean((residuals) ** 2))


def plot(self, y: List[float], results: Dict[str, np.ndarray], xlabel: str = 'Time', ylabel: str = 'Value', figsize: Tuple[int, int] = (12, 8)) -> plt.figure:
def plot(
self,
y: List[float],
results: Dict[str, np.ndarray],
xlabel: str = "Time",
ylabel: str = "Value",
figsize: Tuple[int, int] = (12, 8),
) -> plt.figure:
fig = plt.figure(figsize=figsize, dpi=150)
gs = gridspec.GridSpec(3, 1, height_ratios=[2, 0.5, 0.5])
gs.update(wspace=1.5, hspace=0.025)
Expand All @@ -63,50 +69,80 @@ def plot(self, y: List[float], results: Dict[str, np.ndarray], xlabel: str = 'Ti
residuals = results["residuals"]
predictions = results["predictions"]
anomalies = results["anomalies"]

std_residual = np.std(residuals)
upper_bound = predictions + self.threshold * std_residual
lower_bound = predictions - self.threshold * std_residual


min_pos, max_pos = min(y.min(), predictions.min(), lower_bound.min()), max(y.max(), predictions.max(), upper_bound.max())


min_pos, max_pos = min(y.min(), predictions.min(), lower_bound.min()), max(
y.max(), predictions.max(), upper_bound.max()
)

# predictions vs actual
ax1.fill_between(time_series, lower_bound, upper_bound, color='lightsteelblue', alpha=0.3, label='Bounds')
ax1.plot(time_series, y, 'k.', label='Original Data', alpha=0.7)
ax1.plot(time_series, predictions, ls='-', lw=2, c='steelblue', label='ARIMA Predictions')
ax1.scatter(time_series[anomalies == 1], y[anomalies == 1], color='coral', s=20, zorder=5)
ax1.vlines(time_series[anomalies == 1], min_pos, max_pos, color="coral", alpha=0.2)


ax1.fill_between(
time_series,
lower_bound,
upper_bound,
color="lightsteelblue",
alpha=0.3,
label="Bounds",
)
ax1.plot(time_series, y, "k.", label="Original Data", alpha=0.7)
ax1.plot(
time_series,
predictions,
ls="-",
lw=2,
c="steelblue",
label="ARIMA Predictions",
)
ax1.scatter(
time_series[anomalies == 1],
y[anomalies == 1],
color="coral",
s=20,
zorder=5,
)
ax1.vlines(
time_series[anomalies == 1], min_pos, max_pos, color="coral", alpha=0.2
)

# residuals
ax2.plot(time_series, residuals, 'k.', label='Residuals', alpha=0.7)
ax2.scatter(time_series[anomalies], residuals[anomalies], color='coral', s=20, zorder=5)
ax2.vlines(time_series[anomalies], residuals.min(), residuals.max(), color='coral', zorder=5, alpha=0.2)

ax2.plot(time_series, residuals, "k.", label="Residuals", alpha=0.7)
ax2.scatter(
time_series[anomalies], residuals[anomalies], color="coral", s=20, zorder=5
)
ax2.vlines(
time_series[anomalies],
residuals.min(),
residuals.max(),
color="coral",
zorder=5,
alpha=0.2,
)

# signals
ax3.plot(time_series, anomalies, ls='-', c='coral', label='Anomalies')
ax3.plot(time_series, anomalies, ls="-", c="coral", label="Anomalies")

# labels
ax1.set_ylabel(ylabel, fontfamily="monospace")
ax1.set_xlim(time_series[0], time_series[-1])
ax1.set_xticklabels([])
ax1.set_ylim(min_pos, max_pos)
ax1.legend()

ax2.set_ylabel("Residuals", fontfamily="monospace")
ax2.set_xlim(time_series[0], time_series[-1])

ax3.set_xticklabels([])
ax3.set_xlabel(xlabel, fontfamily="monospace")
ax3.set_ylabel("Signal", fontfamily="monospace")
plt.setp(ax3.get_yticklabels(), visible=False)
# format
for ax in axes:
ax.grid(True, which='major', c='gray', ls='-', lw=0.5, alpha=0.1)
ax.tick_params(axis=u'both', which=u'both', length=0)
ax.grid(True, which="major", c="gray", ls="-", lw=0.5, alpha=0.1)
ax.tick_params(axis="both", which="both", length=0)
for s in ["bottom", "top", "left", "right"]:
ax.spines[s].set_visible(False)


return fig
Loading

0 comments on commit 4a18ca7

Please sign in to comment.