Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automate incidents creation #586

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions kcidb/monitor/subscriptions/create_incidents.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""Automate incident creation"""
import os
import kcidb
from kcidb.tools import kcidb_match

CLIENT = None


# pylint: disable=global-statement
def get_client():
"""Get KCIDB client instance and set it as a global variable"""
global CLIENT
if not CLIENT:
project_id = os.environ.get('GCP_PROJECT')
topic_name = os.environ.get('KCIDB_LOAD_QUEUE_TOPIC')
if project_id and topic_name:
CLIENT = kcidb.Client(project_id=project_id, topic_name=topic_name)
return CLIENT


def match_test(test):
"""Generate incident for matching test"""
client = get_client()
if client:
incident_generator = kcidb_match.IncidentGenerator()
incidents = incident_generator.generate_incidents_from_test(test)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Considering that IncidentGenerator encapsulates a database client connection, why not create and get it similarly to get_client(), instead of creating a new one for every object matched?

client.submit(incidents)


def match_build(build):
"""Generate incident for matching build"""
client = get_client()
if client:
incident_generator = kcidb_match.IncidentGenerator()
incidents = incident_generator.generate_incidents_from_build(build)
client.submit(incidents)


def match_issue(issue):
"""Match issue and add its pattern to DB"""
incident_generator = kcidb_match.IncidentGenerator()
incident_generator.db.update_patterns(issue)
Empty file added kcidb/tools/__init__.py
Empty file.
276 changes: 276 additions & 0 deletions kcidb/tools/kcidb_match.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,276 @@
#!/usr/bin/env python3

"""KCIDB auto-matching tool"""


import json
import sys
import sqlite3
import hashlib
import logging
import argparse
from .pattern_validator import match_fields, validate_pattern_object


# Constants
DB_NAME = 'patterns.db'
ORIGIN = 'maestro'
KCIDB_IO_VERSION = {
"major": 4,
"minor": 3
}

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


class PatternDatabase:
"""Class to handle DB table 'patterns'"""
def __init__(self, db_name=DB_NAME):
self.db_name = db_name
self.setup_database()

def setup_database(self):
"""Connect to DB and create 'patterns' table if doesn't exist"""
with sqlite3.connect(self.db_name) as conn:
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS patterns (
issue_id TEXT UNIQUE,
issue_version INTEGER,
pattern_object JSON
)
''')
conn.commit()

def add_pattern(self, issue_id, issue_version, pattern_object):
"""Add pattern object to DB"""
with sqlite3.connect(self.db_name) as conn:
cursor = conn.cursor()
cursor.execute('''
INSERT INTO patterns (issue_id, issue_version, pattern_object)
VALUES (?, ?, json(?))
ON CONFLICT(issue_id) DO UPDATE SET
issue_version=excluded.issue_version,
pattern_object=excluded.pattern_object
''', (issue_id, issue_version, json.dumps(pattern_object)))
conn.commit()

def remove_pattern(self, issue_id):
"""Remove pattern object from DB"""
with sqlite3.connect(self.db_name) as conn:
cursor = conn.cursor()
cursor.execute('DELETE FROM patterns WHERE issue_id = ?',
(issue_id,))
conn.commit()

def get_all_patterns(self):
"""Retrieve all patterns objects from DB"""
with sqlite3.connect(self.db_name) as conn:
cursor = conn.cursor()
cursor.execute('SELECT issue_id, issue_version, pattern_object '
'FROM patterns')
while True:
row = cursor.fetchone()
if row is None:
break
yield row

def update_patterns(self, issue):
"""Update patterns for existing issue"""
if not issue.misc:
return

if not issue.misc.get("pattern_object"):
self.remove_pattern(issue.id)
return

pattern_object = issue.misc.get("pattern_object")
if not validate_pattern_object(pattern_object):
logger.error("Pattern object validation failed for issue id: %s",
issue.id)
return
self.add_pattern(issue.id, issue.version, pattern_object)


class IncidentGenerator:
"""Class to generate incidents"""
def __init__(self, db_name=DB_NAME):
self.db = PatternDatabase(db_name)

def create_incident(self, kcidb_io_object, issue_id, issue_version):
"""Create and return an incident object"""
if tests := kcidb_io_object.get('tests'):
type_id_key = "test_id"
type_id_value = tests[0]['id']
elif builds := kcidb_io_object.get('builds'):
type_id_key = "build_id"
type_id_value = builds[0]['id']
else:
raise ValueError("The KCIDB IO object must contain at least "
"one non-empty test or build")

unique_string = f"{issue_id}{issue_version}{type_id_value}"
incident_id = f"{ORIGIN}:" \
f"{hashlib.sha256(unique_string.encode()).hexdigest()}"

return {
'id': incident_id,
'origin': ORIGIN,
'issue_id': issue_id,
'issue_version': issue_version,
'present': True,
type_id_key: type_id_value,
}

def generate_incident_on_match(self, kcidb_io_object, issue_id,
issue_version, issue_pattern_object):
"""Generate incident if issue pattern is found in a build/test
object"""
incident = {}

if match_fields(issue_pattern_object, kcidb_io_object):
incident = self.create_incident(kcidb_io_object, issue_id,
issue_version)

return incident

def generate_incidents_from_db(self, kcidb_io_object):
JenySadadia marked this conversation as resolved.
Show resolved Hide resolved
"""
Generate incidents by trying to match the kcidb_io_object
against the patterns saved in the database
"""
incidents = []

for row in self.db.get_all_patterns():
issue_id, issue_version, pattern_object_json = row
pattern_object = json.loads(pattern_object_json)
incident = self.generate_incident_on_match(
kcidb_io_object, issue_id, issue_version, pattern_object)
if incident:
incidents.append(incident)

return {
"version": KCIDB_IO_VERSION,
"incidents": incidents
}

def generate_incidents_from_test(self, test):
"""Generate incident from test object"""
kcidb_io_object = {"tests": [test._data],
"builds": [test.build._data],
"checkouts": [test.build.checkout._data]}
return self.generate_incidents_from_db(kcidb_io_object)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You cannot make an I/O object from an OO object in general, even if it looks like you can. You just cannot count on that. An OO object is a processed I/O object, and there can be data loss. Instead deal with OO objects directly everywhere. Where you need to process I/O objects from stdin and command-line interface, load them into an sqlite database using the database client, get them as OO objects from there, and then process.

OO objects were specifically made to make the things you're doing here easier to do. Like walk related objects and so on. We can have a call this week and go over all the concerns and options regarding this.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BTW, there's also a schema for the raw OO data in kcidb.orm.data.


def generate_incidents_from_build(self, build):
"""Generate incident from build object"""
kcidb_io_object = {"builds": [build._data],
"checkouts": [build.checkout._data]}
return self.generate_incidents_from_db(kcidb_io_object)


def parse_arguments():
"""Parse command-line arguments"""
class CustomHelpFormatter(argparse.RawTextHelpFormatter):
"""Help string formatter for command-line tools"""

parser = argparse.ArgumentParser(
description='KCIDB Match Tool',
formatter_class=CustomHelpFormatter,
epilog='''\
Usage examples:

export -x DB_OPTS="postgresql:host=127.0.0.1 port=5432 sslmode=disable
dbname=playground_kcidb [email protected]"

# Update patterns
kcidb-query -i "kernelci_api:70d17807303641a9d6d2a8aeb1aee829221cefcf"
-d "$DB_OPTS" | ./kcidb-match.py --update-patterns

# Generate incidents
kcidb-query -t "maestro:6690dbfc7488a1b744200e82" -d "$DB_OPTS"
--parents | ./kcidb-match.py --generate-incidents

# Check test ID
cat issue.json | ./kcidb-match.py --check_test_id
"maestro:6690dbfc7488a1b744200e82" -d "$DB_OPTS"

# Check build ID
cat issue.json | ./kcidb-match.py --check_build_id
"maestro:6690dbfc7488a1b744200e82" -d "$DB_OPTS"
'''
)

parser.add_argument('--update-patterns', action='store_true',
help='Update patterns from issues. Other '
'arguments are ignored when used. Expects '
'KCIDB-IO object with issues via stdin.')

parser.add_argument('--generate-incidents', action='store_true',
help='Generate incidents for matched issues. '
'Expects KCIDB-IO object with build and/or '
'test via stdin.')

parser.add_argument('--ignore-db', action='store_true',
help='Ignore the database and generate incidents '
'based on the issues field in the KCIDB-IO '
'object via stdin.')

parser.add_argument('--check_test_id', type=str,
help='Test ID to check. Requires --db_conn. '
'Implies --ignore-db. Expects KCIDB-IO '
'object with issues via stdin.')

parser.add_argument('--check_build_id', type=str,
help='Build ID to check. Requires --db_conn. '
'Implies --ignore-db. '
'Expects KCIDB-IO object with issues via stdin.')

parser.add_argument('-d', '--db_conn', type=str,
help='Database connection string for kcidb-query.'
'Required with --check_test_id or '
'--check_build_id.')

args = parser.parse_args()

if args.check_test_id and args.check_build_id:
parser.error("Cannot use both --check_test_id and --check_build_id")

if (args.check_test_id or args.check_build_id) and not args.db_conn:
parser.error("--db_conn is required when using --check_test_id or "
"--check_build_id")

if args.check_test_id or args.check_build_id:
args.ignore_db = True

return args


def main():
JenySadadia marked this conversation as resolved.
Show resolved Hide resolved
"""Main function"""
args = parse_arguments()

if args.update_patterns:
issue_objects = json.load(sys.stdin)
IncidentGenerator().db.update_patterns(issue_objects)
return

kcidb_io_object = json.load(sys.stdin)

incident_generator = IncidentGenerator()

results = incident_generator.generate_incidents_from_db(
kcidb_io_object)

if args.generate_incidents:
print(json.dumps(results, indent=2))
return

for incident in results['incidents']:
print("Matched issue ID:", incident['issue_id'], "Version:",
incident['issue_version'])


if __name__ == "__main__":
main()
Loading