Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add pydantic model for parser #11

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions .github/workflows/hub.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,20 @@ jobs:
with:
cmd: find . -path ./.tests -prune -o -name "*"yaml -exec sh -c 'yq -o=json {} > $(dirname {})/$(basename {} .yaml).json' \;
- name: transform to json schema
uses: mikefarah/yq@master
with:
cmd: for i in *.yaml ; do yq -o=json $i > $(basename $i .yaml).json ; done
run: |
sudo apt-get install -y python3-pip
pip3 install pydantic
python -c 'from models.parser import Parser; import json; out_file=open("parser.json","w"); json.dump(Parser.model_json_schema(),out_file)'
- name: validate parsers against schema
run: |
go install github.com/santhosh-tekuri/jsonschema/cmd/jv@latest
for ITEM in ./hub/parsers/*/*/*.json; do echo $ITEM && ~/go/bin/jv parser_schema.json $ITEM ; done
for ITEM in ./hub/parsers/*/*/*.json; do echo $ITEM && ~/go/bin/jv parser.json $ITEM ; done
- name: validate scenarios against schema
run: |
for ITEM in ./hub/scenarios/*/*.json; do echo $ITEM && ~/go/bin/jv scenario_schema.json $ITEM ; done
- name: validate postoverflows against schema
run: |
for ITEM in ./hub/postoverflows/*/*/*.json; do echo $ITEM && ~/go/bin/jv parser_schema.json $ITEM ; done
for ITEM in ./hub/postoverflows/*/*/*.json; do echo $ITEM && ~/go/bin/jv parser.json $ITEM ; done
- name: validate parsers against schema
run: |
for ITEM in ./hub/collections/*/*.json; do echo $ITEM && ~/go/bin/jv collection_schema.json $ITEM ; done
4 changes: 2 additions & 2 deletions .github/workflows/validate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Go 1.19
- name: Set up Go 1.23
uses: actions/setup-go@v3
with:
go-version: 1.19
go-version: 1.23
- name: transform to json
uses: mikefarah/yq@master
with:
Expand Down
32 changes: 32 additions & 0 deletions models/children_nodes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/usr/bin/env python3
from __future__ import annotations
from models.pattern_syntax import PatternSyntax
from models.statics import Statics
from models.grok import Grok
from models.stash import Stash
from pydantic import BaseModel, Field
from typing import List, Optional


class ChildrenNodes(BaseModel):
onsuccess: Optional[str] = Field(
None,
pattern=r"^next_stage$",
description="If node is successful and onsuccess equals next_stage, event is moved to the next stage",
)

debug: Optional[bool] = Field(
None, description="If true, enables the debug. Default is false."
)
filter: Optional[str] = Field(
None,
description="filter must be a valid expr expression that will be evaluated\nagainst the event. If filter evaluation returns true or is\nabsent, node will be processed. If filter returns false or a\nnon-boolean, node won't be processed.\n",
)
pattern_syntax: Optional[PatternSyntax] = None
grok: Optional[Grok] = None
stash: Optional[Stash] = None
statics: Optional[Statics] = None
nodes: Optional[List[ChildrenNodes]] = Field(
None,
description="nodes is a list of parser nodes, allowing you to build\ntrees. Each subnode must be valid, and if any of the subnodes\nsucceed, the whole node is considered successful.\n",
)
44 changes: 44 additions & 0 deletions models/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/usr/bin/env python3

from typing import List, Optional
from pydantic import BaseModel, Field


class DataItem(BaseModel):
source_url: Optional[str] = Field(None, description="url to download file from")
dest_file: Optional[str] = Field(
None, description="destination to store the downloaded file to"
)
type: Optional[str] = Field(
description="The type is mandatory if you want to evaluate the data"
"inthe file, and should be regex for valid (re2) regular expression per"
"line or string for string per line. The regexps will be compiled, the"
"strings will be loaded intoa list and both will be kept in memory."
"Without specifyinga type, the file will be downloaded and stored as file"
"and not in memory.",
pattern=r"^(string|regexp)$",
)

strategy: Optional[str] = Field(
None,
description="Strategy for cache behavior. See https://pkg.go.dev/github.com/bluele/gcache",
pattern=r"^(LRU|LFU|ARC)$",
)

size: Optional[int] = Field(None, description="Maximum size of the cache")
ttl: Optional[str] = Field(
pattern=r"^([0-9]+(\.[0-9]+)*d)?([0-9]+(\.[0-9]+)*h)?([0-9]+(\.[0-9]+)*m)?([0-9]+(\.[0-9]+)*s)?([0-9]+(\.[0-9]+)*ms)?([0-9]+(\.[0-9]+)*(us|µs))?([0-9]+(\.[0-9]+)*ns)?$",
description="Duration after which cache elements expire")



class Data(BaseModel):
data: List[DataItem] = Field(
...,
description="data allows user to specify an external source of data. This "
"section is only relevant when cscli is used to install parser "
"from hub, as it will download the source_url and store it to "
"dest_file. When the parser is not installed from the hub,"
"CrowdSec won't download the URL, but the file must exist for the "
"parser to be loaded correctly.",
)
21 changes: 21 additions & 0 deletions models/grok.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/usr/bin/env python3

from typing import Optional
from pydantic import BaseModel, Field, Extra
from models.statics import Statics


class GrokItem(BaseModel):
name: Optional[str] = None
pattern: Optional[str] = None
apply_on: Optional[str] = None
expression: Optional[str] = None
statics: Optional[Statics] = None

model_config = {
"extra": "forbid",
}


class Grok(BaseModel):
grok: Optional[GrokItem]
119 changes: 119 additions & 0 deletions models/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#!/usr/bin/env python3

from typing import Dict, Any, List, Optional, Union
from pydantic import BaseModel, Field, confloat
from models.children_nodes import ChildrenNodes
from models.pattern_syntax import PatternSyntax
from models.statics import Statics
from models.grok import Grok
from models.stash import Stash
from models.data import Data


class ParserType1(BaseModel):

onsuccess: Optional[str] = Field(
None,
pattern=r"^next_stage$",
description="If node is successful and onsuccess equals next_stage, event is moved to the next stage",
)
debug: Optional[bool] = Field(
None, description="If true, enables the debug. Default is false."
)
filter: Optional[str] = Field(
None,
description="filter must be a valid expr expression that will be evaluated"
"against the event. If filter evaluation returns true or is absent, node "
"will be processed. If filter returns false or a"
"non-boolean, node won't be processed.",
)
description: Optional[str] = Field(
None, description="description of the parser usage"
)
pattern_syntax: Optional[PatternSyntax] = None
name: str = Field(
...,
description="The mandatory name of the node. If not present, node will be"
"skipped at runtime. It is used for example in debug log to help"
"you track things.",
)
grok: Optional[Grok] = None
stash: Optional[Stash] = None
statics: Optional[Statics] = None
data: Optional[Data] = None
format: Optional[float] = Field(
None,
ge=1.0,
description="Non mandatory format version for the parser. configuration "
"file. New features, may not be understood by old crowdsec version, "
"filling this correctly ensures that crowdsec supports "
"all the required parser features.",
)
nodes: Optional[List[ChildrenNodes]] = Field(
None,
description="nodes is a list of parser nodes, allowing you to build trees. "
"Each subnode must be valid, and if any of the subnodes succeed, the whole "
"node is considered successful.",
)
whitelist: Optional[Dict[str, Any]] = None

model_config = {
"extra": "forbid",
}


class ParserType2(BaseModel):

onsuccess: Optional[str] = Field(
None,
pattern=r"^next_stage$",
description="If node is successful and onsuccess equals next_stage, event is moved to the next stage",
)
debug: Optional[bool] = Field(
None, description="If true, enables the debug. Default is false."
)
filter: Optional[str] = Field(
None,
description="filter must be a valid expr expression that will be evaluated"
"against the event. If filter evaluation returns true or is absent, node "
"will be processed. If filter returns false or a non-boolean, node won't "
"be processed.",
)
description: Optional[str] = Field(
None, description="description of the parser usage"
)
pattern_syntax: Optional[PatternSyntax] = None
name: str = Field(
...,
description="The mandatory name of the node. If not present, node will be"
"skipped at runtime. It is used for example in debug log to help you track "
"things.",
)
grok: Optional[Grok] = None
stash: Optional[Stash] = None
statics: Optional[Statics] = None
data: Optional[Data] = None
format: Optional[str] = Field(
None,
ge=1.0,
description="Non mandatory format version for the parser. configuration file. "
"New features, may not be understood by old crowdsec version, filling this "
"correctly ensures that crowdsec supports\nall the required parser features.",
)
nodes: Optional[List[ChildrenNodes]] = Field(
None,
description="nodes is a list of parser nodes, allowing you to build trees. Each "
"subnode must be valid, and if any of the subnodes succeed, the whole node is "
"considered successful.",
)
whitelist: Dict[str, Any]
model_config = {
"extra": "forbid",
}


class Parser(BaseModel):
parser: Union[ParserType1, ParserType2] = Field(..., title="CrowdSec Parser")
model_config = {
"extra": "forbid",
}
31 changes: 31 additions & 0 deletions models/pattern_syntax.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env python3

from pydantic import BaseModel, Field, validator
from typing import Dict
import re


class PatternSyntax(BaseModel):
pattern_syntax: Dict[str, str] = Field(
...,
description="pattern_syntax allows user to define named capture group "
"expressions for future use in grok patterns. Regexp must be a valid RE2 expression.",
)

@validator("pattern_syntax", pre=True)
def check_key_constraint(cls, values):
pattern = r"^[A-Z][A-Z0-9_v]*$"
for key in values.get("pattern_syntax", {}).keys():
if not re.match(pattern, key):
raise ValueError(f"Key '{key}' does not match pattern '{pattern}'")

class Config:
json_schema_extra = {
"properties": {
"pattern_syntax": {
"patternProperties": {
"^[A-Z][A-Z0-9_v]*$": {"type": "string"},
},
}
}
}
47 changes: 47 additions & 0 deletions models/stash.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/usr/bin/env python3

from typing import List, Optional, ClassVar
from pydantic import BaseModel, Field


class StashItem(BaseModel):
description: ClassVar[str] = (
"The stash section allows a parser to capture data, that can \
be later accessed/populated via GetFromStash and SetInStash expr \
helpers. Each list item defines a capture directive that is stored in a \
separate cache (string:string), with its own maximum size, eviction \
rules etc.",
)
name: str = Field(
description="The name of the stash. Distinct parsers can manipulate the same cache",
)
key: str = Field(
description="The expression that defines the string that will be used as a key",
)
value: str = Field(
description="The expression that defines the string that will be used as a key",
)
ttl: Optional[str] = (
Field(
None,
pattern=r"^([0-9]+(\.[0-9]+)*d)?([0-9]+(\.[0-9]+)*h)?([0-9]+(\.[0-9]+)*m)?([0-9]+(\.[0-9]+)*s)?([0-9]+(\.[0-9]+)*ms)?([0-9]+(\.[0-9]+)*(us|µs))?([0-9]+(\.[0-9]+)*ns)?$",
description="The time to leave of items. Default strategy is LRU.",
),
)
size: Optional[int] = Field(None, description="The maximum size of the cache")
strategy: Optional[str] = Field(
None,
pattern=r"^(LFU|LRU|ARC)$",
description="The caching strategy to be used : LFU, LRU or ARC (see gcache doc for details). Defaults to LRU.\n",
)


class Stash(BaseModel):
stash: List[StashItem] = Field(
...,
description="The stash section "
"allows a parser to capture data, that can be later accessed/populated "
"via GetFromStash and SetInStash exprhelpers. Each list item defines a"
"capture directive that isstored in a separate cache (string:string),"
"with its own maximumsize, eviction rules etc.",
)
25 changes: 25 additions & 0 deletions models/statics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/usr/bin/env python3

from typing import List, Optional
from pydantic import Field, BaseModel


class Static(BaseModel):
meta: Optional[str] = None
parsed: Optional[str] = None
enriched: Optional[str] = None
target: Optional[str] = None
value: Optional[str] = None
expression: Optional[str] = None
method: Optional[str] = None

model_config = {
"extra": "forbid",
}


class Statics(BaseModel):
statics: List[Static] = Field(
...,
description="Statics is a list of directives that will be evaluated when the\nnode is considered successful. Each entry of the list is\ncomposed of a target (where to write) and a source (what data to\nwrite).\n",
)
Loading