diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 85c1d1b..3cff692 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -13,3 +13,10 @@ updates:
     directory: "/.github/workflows"
     schedule:
       interval: "daily"
+
+  # Maintain dependencies for Python code
+  - package-ecosystem: "pip"
+    directory: "/"
+    versioning-strategy: "increase-if-necessary"
+    schedule:
+      interval: "daily"
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index 9f38401..fdd9391 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -48,7 +48,7 @@ jobs:
               tox -e jsonschema
           - name: "ruff"
             commands: |
-              tox -e ruff -- check
+              tox -e ruffcheck
           - name: "pylint"
             commands: |
               echo "::add-matcher::.github/workflows/matchers/pylint.json"
diff --git a/README.md b/README.md
index d5d5953..5740d49 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,6 @@
 # Taxonomy Schema
 
-This Python package defines the JSON schema for the InstructLab [Taxonomy](https://github.com/instructlab/taxonomy) YAML.
+This Python package defines the JSON schema and a parser for the InstructLab [Taxonomy](https://github.com/instructlab/taxonomy) YAML.
 
-Consumers of this schema can `pip install instructlab-schema`, and access the schema files using `importlib.resources` on the `instructlab.schema` package.
+Consumers of this schema can `pip install instructlab-schema`, and use the `instructlab.schema.taxonomy.TaxonomyParser` class to parse and validate `qna.yaml` taxonomy files.
+Schema files can be directly accessed using the `instructlab.schema.schema_base()` method to get access the base of the schema resources.
diff --git a/pyproject.toml b/pyproject.toml
index 4a3a231..a00d5e8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 [build-system]
-requires = ["setuptools>=64", "setuptools_scm>=8"]
+requires = ["setuptools>=70.1.0", "setuptools_scm>=8"]
 build-backend = "setuptools.build_meta"
 
 [project]
@@ -21,7 +21,15 @@ classifiers = [
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
 ]
-dynamic = ["dependencies", "optional-dependencies", "version"]
+dependencies = [
+    "typing_extensions",
+    "jsonschema>=4.22.0",
+    "PyYAML>=6.0.0",
+    # The below library should NOT be imported into any python files
+    # We only use the command via subprocess
+    "yamllint>=1.35.1",
+]
+dynamic = ["version"]
 
 [project.urls]
 homepage = "https://instructlab.ai"
@@ -40,6 +48,7 @@ exclude = ["^src/instructlab/schema/_version\\.py$"]
 target-version = "py310"
 src = ["src", "tests"]
 extend-exclude = ["src/instructlab/schema/_version.py"]
+line-length = 180
 
 [tool.ruff.lint]
 select = [
@@ -53,11 +62,23 @@ select = [
     "TID", # flake8-tidy-imports
 ]
 
+[tool.ruff.lint.flake8-tidy-imports.banned-api]
+"yamllint".msg = "yamllint is for use as a command via subprocess."
+
 [tool.pylint.main]
 py-version = "3.10"
 source-roots = ["src", "tests"]
 ignore = ["_version.py"]
 
+[tool.pylint.design]
+max-branches = 20
+max-line-length = 180
+max-locals = 20
+min-public-methods = 1
+
+[tool.pylint.format]
+max-args = 8
+
 [tool.pylint."messages control"]
 disable = [
     "missing-class-docstring",
diff --git a/scripts/ruff.sh b/scripts/ruff.sh
deleted file mode 100755
index 6bf131f..0000000
--- a/scripts/ruff.sh
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/usr/bin/env bash
-# SPDX-License-Identifier: Apache-2.0
-set -e
-
-# wrapper to combine ruff check and ruff format
-#
-# "ruff.sh fix" runs fixes and reformats the code
-# "ruff.sh check" checks style, format, and imports
-# "ruff.sh <args>" passes abitrary args to ruff
-
-if [ -z "$1" ]; then
-    echo "USAGE: $0 [check|fix|<args>]" >&2
-    exit 2
-fi
-
-run() {
-    declare -i err
-
-    echo "RUN: '$*'"
-    "$@"
-    err=$?
-    echo
-    return $err
-}
-
-case $1 in
-    "check")
-        declare -i exitcode=0
-
-        set +e
-        run ruff check --diff
-        exitcode=$(( exitcode + $? ))
-
-        run ruff format --check
-        exitcode=$(( exitcode + $? ))
-        set -e
-
-        if [ $exitcode -ne 0 ]; then
-            echo "ERROR: one or more checks have failed." >&2
-            echo "Run 'tox -e ruff' to auto-correct all fixable errors." >&2
-            exit 3
-        fi
-        ;;
-    "fix")
-        run ruff check --fix
-        run ruff format
-        ;;
-    *)
-        ruff "$@"
-esac
diff --git a/src/instructlab/schema/__init__.py b/src/instructlab/schema/__init__.py
index a1bc1f2..c8691da 100644
--- a/src/instructlab/schema/__init__.py
+++ b/src/instructlab/schema/__init__.py
@@ -3,14 +3,20 @@
 """InstructLab Taxonomy Schema"""
 
 # Standard
-from importlib import resources
+import importlib.resources
+from importlib.abc import Traversable
 
-try:
-    from importlib.resources.abc import Traversable  # type: ignore[import-not-found]
-except ImportError:  # python<3.11
-    from importlib.abc import Traversable
+__all__ = ["schema_base", "schema_versions"]
 
-__all__ = ["schema_versions"]
+
+def schema_base() -> Traversable:
+    """Return the schema base.
+
+    Returns:
+        Traversable: The base for the schema versions.
+    """
+    base = importlib.resources.files(__name__)
+    return base
 
 
 def schema_versions() -> list[Traversable]:
@@ -19,9 +25,8 @@ def schema_versions() -> list[Traversable]:
     Returns:
         list[Traversable]: A sorted list of schema versions.
     """
-    schema_base = resources.files(__package__)
     versions = sorted(
-        (v for v in schema_base.iterdir() if v.name[0] == "v" and v.name[1:].isdigit()),
+        (v for v in schema_base().iterdir() if v.name[0] == "v" and v.name[1:].isdigit()),
         key=lambda k: int(k.name[1:]),
     )
     return versions
diff --git a/src/instructlab/schema/taxonomy.py b/src/instructlab/schema/taxonomy.py
new file mode 100644
index 0000000..9bcd22f
--- /dev/null
+++ b/src/instructlab/schema/taxonomy.py
@@ -0,0 +1,431 @@
+# SPDX-License-Identifier: Apache-2.0
+
+"""Taxonomy qna.yaml parsing"""
+
+# Standard
+import enum
+import functools
+import json
+import logging
+import os
+import pathlib
+import re
+import subprocess
+import typing
+from collections.abc import Mapping
+
+# Third Party
+import yaml
+from jsonschema.protocols import Validator
+from jsonschema.validators import validator_for
+from referencing import Registry, Resource
+from referencing.exceptions import NoSuchResource
+from referencing.jsonschema import DRAFT202012, Schema, SchemaRegistry, SchemaResource
+from typing_extensions import Self
+
+from . import schema_base, schema_versions
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_TAXONOMY_FOLDERS: list[str] = ["compositional_skills", "knowledge"]
+"""Taxonomy folders which are also the schema names"""
+
+DEFAULT_YAMLLINT_CONFIG: str = "{extends: relaxed, rules: {line-length: {max: 120}}}"
+"""Default yamllint configuration"""
+
+
+class TaxonomyReadingException(Exception):
+    """An exception raised during reading of the taxonomy."""
+
+
+class TaxonomyMessageFormat(enum.Enum):
+    """An enum for the format choices for taxonomy parsing messages"""
+
+    AUTO = enum.auto()
+    """
+    If specified, then GITHUB will be used if both the GITHUB_ACTIONS and GITHUB_WORKFLOW
+    environment variables are set. Otherwise STANDARD will be used.
+    """
+    STANDARD = enum.auto()
+    """
+    A plain message starting with ERROR or WARN.
+    """
+    GITHUB = enum.auto()
+    """
+    Uses GitHub Actions workflow commands to set error or warning commands.
+    """
+    LOGGING = enum.auto()
+    """
+    Uses this module's logger to log warnings or errors.
+    """
+
+
+class Taxonomy:
+    """A container for a parsed taxonomy qna.yaml file."""
+
+    def __init__(self, *, taxonomy_path: pathlib.Path, abs_path: pathlib.Path, message_format: TaxonomyMessageFormat) -> None:
+        """Create a container for a parsed taxonomy qna.yaml file.
+
+        Args:
+            taxonomy_path (pathlib.Path): The taxonomy path for the qna.yaml file.
+            This should be a relative path starting at the taxonomy folder holding the qna.yaml file.
+            abs_path (pathlib.Path): The absolute path for the qna.yaml file.
+            message_format (TaxonomyMessageFormat): The format of any messages.
+            If TaxonomyMessageFormat.AUTO is specified, then TaxonomyMessageFormat.GITHUB will be used if
+            both the GITHUB_ACTIONS and GITHUB_WORKFLOW environment variables are set. Otherwise
+            TaxonomyMessageFormat.STANDARD will be used.
+        """
+        self.path: pathlib.Path = taxonomy_path
+        abs_path = abs_path.resolve()
+        cwd = pathlib.Path.cwd()
+        self.rel_path: pathlib.Path = abs_path.relative_to(cwd) if abs_path.is_relative_to(cwd) else abs_path
+        if message_format is TaxonomyMessageFormat.AUTO:
+            message_format = TaxonomyMessageFormat.GITHUB if "GITHUB_ACTIONS" in os.environ and "GITHUB_WORKFLOW" in os.environ else TaxonomyMessageFormat.STANDARD
+        self.message_format: TaxonomyMessageFormat = message_format
+        self.errors: int = 0
+        self.warnings: int = 0
+        self.contents: Mapping[str, typing.Any] = {}
+        self.version: int = 0
+
+    def error(
+        self,
+        message: str,
+        *message_args: object,
+        line: str | int = 1,
+        col: str | int = 1,
+        yaml_path: str = "",
+    ) -> Self:
+        """Report an error on the taxonomy qna.yaml file.
+
+        Args:
+            message (str): The error message. The message supports string formatting using the specified message_args, if any.
+            message_args (object, optional): Values used by the message string formatting.
+            line (str | int, optional): The line in the qna.yaml file where the error occurred. Defaults to 1.
+            col (str | int, optional): The column in the qna.yaml file where the error occurred. Defaults to 1.
+            yaml_path (str, optional): The yaml path to the item in the qna.yaml file where the error occurred.
+
+        Returns:
+            Self: This Taxonomy object.
+        """
+        self.errors += 1
+        match self.message_format:
+            case TaxonomyMessageFormat.GITHUB:
+                if message_args:
+                    message = message % message_args
+                print(
+                    f"::error file={self.rel_path},line={line},col={col}::{line}:{col} [{yaml_path}] {message}"
+                    if yaml_path
+                    else f"::error file={self.rel_path},line={line},col={col}::{line}:{col} {message}"
+                )
+            case TaxonomyMessageFormat.LOGGING:
+                if yaml_path:
+                    logger.error(
+                        "%s:%s:%s [%s] " + message,
+                        self.rel_path,
+                        line,
+                        col,
+                        yaml_path,
+                        *message_args,
+                    )
+                else:
+                    logger.error("%s:%s:%s " + message, self.rel_path, line, col, *message_args)
+            case TaxonomyMessageFormat.STANDARD | _:
+                if message_args:
+                    message = message % message_args
+                print(f"ERROR: {self.rel_path}:{line}:{col} [{yaml_path}] {message}" if yaml_path else f"ERROR: {self.rel_path}:{line}:{col} {message}")
+        return self
+
+    def warning(
+        self,
+        message: str,
+        *message_args: object,
+        line: str | int = 1,
+        col: str | int = 1,
+        yaml_path: str = "",
+    ) -> Self:
+        """Report a warning on the taxonomy qna.yaml file.
+
+        Args:
+            message (str): The warning message. The message supports string formatting using the specified message_args, if any.
+            message_args (object, optional): Values used by the message string formatting.
+            line (str | int, optional): The line in the qna.yaml file where the warning occurred. Defaults to 1.
+            col (str | int, optional): The column in the qna.yaml file where the warning occurred. Defaults to 1.
+            yaml_path (str, optional): The yaml path to the item in the qna.yaml file where the warning occurred.
+
+        Returns:
+            Self: This Taxonomy object.
+        """
+        self.warnings += 1
+        match self.message_format:
+            case TaxonomyMessageFormat.GITHUB:
+                if message_args:
+                    message = message % message_args
+                print(
+                    f"::warning file={self.rel_path},line={line},col={col}::{line}:{col} [{yaml_path}] {message}"
+                    if yaml_path
+                    else f"::warning file={self.rel_path},line={line},col={col}::{line}:{col} {message}"
+                )
+            case TaxonomyMessageFormat.LOGGING:
+                if yaml_path:
+                    logger.warning(
+                        "%s:%s:%s [%s] " + message,
+                        self.rel_path,
+                        line,
+                        col,
+                        yaml_path,
+                        *message_args,
+                    )
+                else:
+                    logger.warning("%s:%s:%s " + message, self.rel_path, line, col, *message_args)
+            case TaxonomyMessageFormat.STANDARD | _:
+                if message_args:
+                    message = message % message_args
+                print(f"WARN: {self.rel_path}:{line}:{col} [{yaml_path}] {message}" if yaml_path else f"WARN: {self.rel_path}:{line}:{col} {message}")
+        return self
+
+
+@functools.lru_cache
+def _load_schema(path: str) -> SchemaResource:
+    schema_path = schema_base().joinpath(path)
+    try:
+        text = schema_path.read_text(encoding="utf-8")
+        contents: Schema = json.loads(text)
+        return Resource.from_contents(contents, default_specification=DRAFT202012)
+    except Exception as e:
+        raise NoSuchResource(str(schema_path)) from e
+
+
+def _retrieve(version: str, uri: str) -> SchemaResource:
+    path = pathlib.Path(version, uri).as_posix()
+    return _load_schema(path)
+
+
+class TaxonomyParser:
+    """A parser for taxonomy qna.yaml files. The parser will return a Taxonomy object."""
+
+    def __init__(
+        self,
+        *,
+        taxonomy_folders: list[str] | None = None,
+        schema_version: int | None = None,
+        yamllint_config: str | None = None,
+        yamllint_strict: bool = False,
+        message_format: TaxonomyMessageFormat | str | None = None,
+    ) -> None:
+        """Create a parser for a taxonomy qna.yaml file.
+
+        Args:
+            taxonomy_folders (list[str] | None, optional): The folder/schema names.
+            DEFAULT_TAXONOMY_FOLDERS is used if None is specified.
+            schema_version (int | None, optional): The version of the Taxonomy schema.
+            Specifying a version less than 1 will use the schema version specified by each YAML document's "version" key.
+            The highest schema version is used if None is specified.
+            yamllint_config (str | None, optional): The yamllint configuration data.
+            DEFAULT_YAMLLINT_CONFIG is used if None is specified.
+            message_format (TaxonomyMessageFormat | str | None, optional): The format of any messages.
+            TaxonomyMessageFormat.AUTO is used if None is specified.
+        """
+        if taxonomy_folders is None:
+            taxonomy_folders = DEFAULT_TAXONOMY_FOLDERS
+        self.taxonomy_folders: list[str] = taxonomy_folders
+        if schema_version is None:
+            versions = schema_versions()
+            if not versions:
+                raise TaxonomyReadingException(f'Schema base "{schema_base()}" does not contain any schema versions')
+            schema_version = int(versions[-1].name[1:])
+        self.schema_version: int = schema_version
+        if yamllint_config is None:
+            yamllint_config = DEFAULT_YAMLLINT_CONFIG
+        self.yamllint_config: str = yamllint_config
+        self.yamllint_strict: bool = yamllint_strict
+        if message_format is None:
+            message_format = TaxonomyMessageFormat.AUTO
+        elif isinstance(message_format, str):
+            message_format = TaxonomyMessageFormat[message_format.upper()]
+        self.message_format: TaxonomyMessageFormat = message_format
+        self.yq_available: bool = True
+
+    def _yamllint(self, text: str, taxonomy: Taxonomy) -> None:
+        yamllint_cmd = [
+            "yamllint",
+            "-f",
+            "parsable",
+            "-d",
+            self.yamllint_config,
+            "-",  # read from stdin
+        ]
+
+        try:
+            result = subprocess.run(
+                yamllint_cmd,
+                check=False,
+                input=text,
+                text=True,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+            )
+        except FileNotFoundError:
+            logger.warning(
+                "could not run yamllint command",
+                exc_info=True,
+            )
+            return
+
+        lines = result.stdout.splitlines()
+        if lines:
+            pattern = re.compile(r"[^:]+:(?P<line>[^:]+):(?P<col>[^:]+):\s*\[(?P<severity>[^]]+)\]\s*(?P<message>.*)")
+            for line in lines:
+                match = pattern.match(line)
+                if match:
+                    line = match.group("line")
+                    col = match.group("col")
+                    severity = match.group("severity")
+                    message = match.group("message")
+                    if self.yamllint_strict or severity == "error":
+                        taxonomy.error(
+                            message,
+                            line=line,
+                            col=col,
+                        )
+                    else:
+                        taxonomy.warning(
+                            message,
+                            line=line,
+                            col=col,
+                        )
+
+    def _schema_validate(self, text: str, taxonomy: Taxonomy) -> None:
+        retrieve = functools.partial(_retrieve, f"v{taxonomy.version}")
+        schema_name = taxonomy.path.parts[0]
+        if schema_name not in self.taxonomy_folders:
+            schema_name = "knowledge" if "document" in taxonomy.contents else "compositional_skills"
+
+        try:
+            schema_resource = retrieve(f"{schema_name}.json")
+            schema = schema_resource.contents
+            if isinstance(schema, Mapping) and schema.get("deprecated"):
+                description = schema.get("description")
+                if description:  # description holds deprecation message
+                    taxonomy.error(description)
+                else:
+                    taxonomy.error(
+                        "%s schema version %s is deprecated",
+                        schema_name,
+                        taxonomy.version,
+                    )
+                return
+
+            validator_cls = validator_for(schema)
+            # mypy doesn't understand attrs classes fields, see: https://github.com/python/mypy/issues/5406
+            registry: SchemaRegistry = Registry(retrieve=retrieve)  # type: ignore[call-arg]
+            validator: Validator = validator_cls(schema, registry=registry)
+
+            for validation_error in validator.iter_errors(taxonomy.contents):
+                yaml_path = validation_error.json_path[1:]
+                if not yaml_path:
+                    yaml_path = "."
+                line: str | int = 1
+                if self.yq_available:
+                    try:
+                        yq_expression = f"{yaml_path} | line"
+                        line = subprocess.check_output(["yq", yq_expression], input=text, text=True)
+                        line = line.strip() if line else 1
+                    except (subprocess.CalledProcessError, FileNotFoundError) as e:
+                        if isinstance(e, FileNotFoundError):
+                            self.yq_available = False
+                        logger.warning(
+                            "could not run yq command",
+                            exc_info=True,
+                        )
+                if validation_error.validator == "minItems":
+                    # Special handling for minItems which can have a long message for seed_examples
+                    taxonomy.error(
+                        "Value must have at least %s items",
+                        validation_error.validator_value,
+                        line=line,
+                        yaml_path=yaml_path,
+                    )
+                else:
+                    taxonomy.error(
+                        validation_error.message[-200:],
+                        line=line,
+                        yaml_path=yaml_path,
+                    )
+        except NoSuchResource as e:
+            taxonomy.error(
+                "Cannot load schema file %s. %s",
+                e.ref,
+                e,
+            )
+
+    def parse(self, path: str | pathlib.Path) -> Taxonomy:
+        """Parse and validate a qna.yaml file into a Taxonomy object.
+
+        Args:
+            path (str | pathlib.Path): The path to the qna.yaml file to parse and validate.
+
+        Raises:
+            TaxonomyReadingException: If an exception is raised while parsing or validating.
+
+        Returns:
+            Taxonomy: A Taxonomy object holding the parsed and validated qna.yaml file
+            and the count of any errors or warnings identified during parsing the qna.yaml file.
+            These can include lint issues and also schema validation issues.
+        """
+        abs_path = pathlib.Path(path).resolve()
+        for i in range(len(abs_path.parts) - 1, -1, -1):
+            if abs_path.parts[i] in self.taxonomy_folders:
+                taxonomy_path = pathlib.Path(*abs_path.parts[i:])
+                break
+        else:
+            taxonomy_path = abs_path
+
+        taxonomy = Taxonomy(taxonomy_path=taxonomy_path, abs_path=abs_path, message_format=self.message_format)
+
+        if not abs_path.is_file():
+            return taxonomy.error(
+                'The file "%s" does not exist or is not a file',
+                abs_path,
+            )
+
+        if abs_path.name != "qna.yaml":
+            return taxonomy.error(
+                'Taxonomy file must be named "qna.yaml"; "%s" is not a valid name',
+                abs_path.name,
+            )
+
+        try:
+            text = abs_path.read_text(encoding="utf-8")
+
+            contents: Mapping[str, typing.Any] | typing.Any = yaml.safe_load(text)
+            if not contents:
+                return taxonomy.warning("The file is empty")
+
+            if not isinstance(contents, Mapping):
+                return taxonomy.error(
+                    "The file is not valid. The top-level element is not an object with key-value pairs.",
+                )
+
+            version: int = self.schema_version
+            if version < 1:  # Use version from YAML document
+                contents_version = contents.get("version", 1)
+                if isinstance(contents_version, int):
+                    version = contents_version
+                else:
+                    # schema validation will complain about the type
+                    try:
+                        version = int(contents_version)
+                    except (ValueError, TypeError):
+                        version = 1  # fallback to version 1
+
+            taxonomy.version = version
+            taxonomy.contents = contents
+
+            if version > 1:  # no linting for version 1 yaml
+                self._yamllint(text=text, taxonomy=taxonomy)
+
+            self._schema_validate(text=text, taxonomy=taxonomy)
+        except Exception as e:
+            raise TaxonomyReadingException from e
+
+        return taxonomy
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..3138b55
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: Apache-2.0
+
+# Standard
+import logging
+import pathlib
+
+# Third Party
+import pytest
+
+
+@pytest.fixture
+def caplog(caplog: pytest.LogCaptureFixture) -> pytest.LogCaptureFixture:  # pylint: disable=redefined-outer-name
+    caplog.set_level(logging.INFO, logger="instructlab.schema")
+    return caplog
+
+
+@pytest.fixture(scope="session")
+def testdata() -> pathlib.Path:
+    abs_path = pathlib.Path(__file__).parent.resolve().joinpath("testdata")
+    cwd = pathlib.Path.cwd()
+    return abs_path.relative_to(cwd) if abs_path.is_relative_to(cwd) else abs_path
diff --git a/tests/test_parse.py b/tests/test_parse.py
new file mode 100644
index 0000000..d40d66f
--- /dev/null
+++ b/tests/test_parse.py
@@ -0,0 +1,377 @@
+# SPDX-License-Identifier: Apache-2.0
+
+# Standard
+import logging
+import os
+import pathlib
+import re
+from collections.abc import Callable
+
+# Third Party
+import pytest
+from assertpy import assert_that
+
+from instructlab.schema.taxonomy import TaxonomyMessageFormat, TaxonomyParser
+
+
+class TestParsingLogging:
+    def message_filter(self, regex: str) -> Callable[[logging.LogRecord], bool]:
+        pattern = re.compile(regex)
+        return lambda r: bool(re.search(pattern, r.message))
+
+    def test_invalid(self, caplog: pytest.LogCaptureFixture, testdata: pathlib.Path) -> None:
+        test_yaml = "compositional_skills/invalid_yaml/qna.yaml"
+        rel_path = testdata.joinpath(test_yaml)
+        parser = TaxonomyParser(schema_version=0, message_format=TaxonomyMessageFormat.LOGGING)
+        taxonomy = parser.parse(rel_path)
+
+        assert_that(taxonomy.warnings).is_greater_than_or_equal_to(1)
+        assert_that(taxonomy.errors).is_greater_than_or_equal_to(2)
+        assert_that(taxonomy.path.as_posix()).is_equal_to(test_yaml)
+        assert_that(taxonomy.rel_path).is_equal_to(rel_path)
+        assert_that(caplog.records).extracting(
+            "message",
+            filter=self.message_filter(f"{re.escape(test_yaml)}:"),
+        ).is_length(len(caplog.records))
+        assert_that(caplog.records).extracting(
+            "levelno",
+            filter=self.message_filter(r"line too long"),
+        ).contains_only(logging.WARNING)
+        assert_that(caplog.records).extracting(
+            "levelno",
+            filter=self.message_filter(r"Unevaluated properties.*createdby"),
+        ).contains_only(logging.ERROR)
+        assert_that(caplog.records).extracting(
+            "levelno",
+            filter=self.message_filter(r"created_by.*required property"),
+        ).contains_only(logging.ERROR)
+
+    def test_invalid_yamlint_strict(self, caplog: pytest.LogCaptureFixture, testdata: pathlib.Path) -> None:
+        test_yaml = "compositional_skills/invalid_yaml/qna.yaml"
+        rel_path = testdata.joinpath(test_yaml)
+        parser = TaxonomyParser(schema_version=0, yamllint_strict=True, message_format=TaxonomyMessageFormat.LOGGING)
+        taxonomy = parser.parse(rel_path)
+
+        assert_that(taxonomy.warnings).is_zero()
+        assert_that(taxonomy.errors).is_greater_than_or_equal_to(3)
+        assert_that(taxonomy.path.as_posix()).is_equal_to(test_yaml)
+        assert_that(taxonomy.rel_path).is_equal_to(rel_path)
+        assert_that(caplog.records).extracting(
+            "message",
+            filter=self.message_filter(f"{re.escape(test_yaml)}:"),
+        ).is_length(len(caplog.records))
+        assert_that(caplog.records).extracting(
+            "levelno",
+            filter=self.message_filter(r"line too long"),
+        ).contains_only(logging.ERROR)
+        assert_that(caplog.records).extracting(
+            "levelno",
+            filter=self.message_filter(r"Unevaluated properties.*createdby"),
+        ).contains_only(logging.ERROR)
+        assert_that(caplog.records).extracting(
+            "levelno",
+            filter=self.message_filter(r"created_by.*required property"),
+        ).contains_only(logging.ERROR)
+
+    def test_invalid_custom_yaml_config(self, caplog: pytest.LogCaptureFixture, testdata: pathlib.Path) -> None:
+        yamllint_config = "{extends: relaxed, rules: {line-length: {max: 180}}}"
+        test_yaml = "compositional_skills/invalid_yaml/qna.yaml"
+        rel_path = testdata.joinpath(test_yaml)
+        parser = TaxonomyParser(
+            schema_version=0,
+            message_format="LOGGING",
+            yamllint_config=yamllint_config,
+        )
+        taxonomy = parser.parse(rel_path)
+
+        assert_that(taxonomy.warnings).is_zero()
+        assert_that(taxonomy.errors).is_greater_than_or_equal_to(2)
+        assert_that(taxonomy.path.as_posix()).is_equal_to(test_yaml)
+        assert_that(taxonomy.rel_path).is_equal_to(rel_path)
+        assert_that(caplog.records).extracting(
+            "message",
+            filter=self.message_filter(f"{re.escape(test_yaml)}:"),
+        ).is_length(len(caplog.records))
+        assert_that(caplog.records).extracting(
+            "levelno",
+            filter=self.message_filter(r"line too long"),
+        ).is_empty()
+        assert_that(caplog.records).extracting(
+            "levelno",
+            filter=self.message_filter(r"Unevaluated properties.*createdby"),
+        ).contains_only(logging.ERROR)
+        assert_that(caplog.records).extracting(
+            "levelno",
+            filter=self.message_filter(r"created_by.*required property"),
+        ).contains_only(logging.ERROR)
+
+    def test_incomplete_skill(self, caplog: pytest.LogCaptureFixture, testdata: pathlib.Path) -> None:
+        test_yaml = "compositional_skills/skill_incomplete/qna.yaml"
+        rel_path = testdata.joinpath(test_yaml)
+        parser = TaxonomyParser(schema_version=0, message_format=TaxonomyMessageFormat.LOGGING)
+        taxonomy = parser.parse(rel_path)
+
+        assert_that(taxonomy.warnings).is_zero()
+        assert_that(taxonomy.errors).is_greater_than_or_equal_to(1)
+        assert_that(taxonomy.path.as_posix()).is_equal_to(test_yaml)
+        assert_that(taxonomy.rel_path).is_equal_to(rel_path)
+        assert_that(caplog.records).extracting(
+            "message",
+            filter=self.message_filter(f"{re.escape(test_yaml)}:"),
+        ).is_length(len(caplog.records))
+        assert_that(caplog.records).extracting(
+            "levelno",
+            filter=self.message_filter(r"[\.seed_examples].*Value must have at least"),
+        ).contains_only(logging.ERROR)
+
+    def test_valid_skill(self, caplog: pytest.LogCaptureFixture, testdata: pathlib.Path) -> None:
+        test_yaml = "compositional_skills/skill_valid/qna.yaml"
+        rel_path = testdata.joinpath(test_yaml)
+        parser = TaxonomyParser(schema_version=0, message_format="logging")
+        taxonomy = parser.parse(rel_path)
+
+        assert_that(taxonomy.warnings).is_zero()
+        assert_that(taxonomy.errors).is_zero()
+        assert_that(taxonomy.path.as_posix()).is_equal_to(test_yaml)
+        assert_that(taxonomy.rel_path).is_equal_to(rel_path)
+        assert_that(caplog.records).is_empty()
+
+        assert_that(taxonomy.contents).contains_only("version", "created_by", "seed_examples", "task_description")
+        assert_that(taxonomy.contents.get("seed_examples")).is_not_none().is_length(5)
+
+    def test_valid_knowledge(self, caplog: pytest.LogCaptureFixture, testdata: pathlib.Path) -> None:
+        test_yaml = "knowledge/knowledge_valid/qna.yaml"
+        rel_path = testdata.joinpath(test_yaml)
+        parser = TaxonomyParser(schema_version=0, message_format=TaxonomyMessageFormat.LOGGING)
+        taxonomy = parser.parse(rel_path)
+
+        assert_that(taxonomy.warnings).is_zero()
+        assert_that(taxonomy.errors).is_zero()
+        assert_that(taxonomy.path.as_posix()).is_equal_to(test_yaml)
+        assert_that(taxonomy.rel_path).is_equal_to(rel_path)
+        assert_that(caplog.records).is_empty()
+
+        assert_that(taxonomy.contents).contains_only(
+            "version",
+            "created_by",
+            "seed_examples",
+            "document_outline",
+            "document",
+            "domain",
+        )
+        assert_that(taxonomy.contents.get("seed_examples")).is_not_none().is_length(5)
+        assert_that(taxonomy.contents.get("document")).is_not_none().contains_only("repo", "commit", "patterns")
+
+    def test_unsupported_knowledge(self, caplog: pytest.LogCaptureFixture, testdata: pathlib.Path) -> None:
+        test_yaml = "knowledge/unsupported/qna.yaml"
+        rel_path = testdata.joinpath(test_yaml)
+        parser = TaxonomyParser(schema_version=0, message_format=TaxonomyMessageFormat.LOGGING)
+        taxonomy = parser.parse(rel_path)
+
+        assert_that(taxonomy.warnings).is_zero()
+        assert_that(taxonomy.errors).is_equal_to(1)
+        assert_that(taxonomy.path.as_posix()).is_equal_to(test_yaml)
+        assert_that(taxonomy.rel_path).is_equal_to(rel_path)
+        assert_that(caplog.records).extracting(
+            "message",
+            filter=self.message_filter(re.escape(test_yaml)),
+        ).is_length(len(caplog.records))
+        assert_that(caplog.records).extracting(
+            "levelno",
+            filter=self.message_filter(r"Version .* is not supported for knowledge\. Minimum supported version is .*"),
+        ).contains_only(logging.ERROR)
+
+    def test_file_does_not_exist(self, caplog: pytest.LogCaptureFixture, testdata: pathlib.Path) -> None:
+        test_yaml = "knowledge/invalid_name/qna.yaml"
+        rel_path = testdata.joinpath(test_yaml)
+        parser = TaxonomyParser(schema_version=0, message_format=TaxonomyMessageFormat.LOGGING)
+        taxonomy = parser.parse(rel_path)
+
+        assert_that(taxonomy.warnings).is_zero()
+        assert_that(taxonomy.errors).is_equal_to(1)
+        assert_that(taxonomy.path.as_posix()).is_equal_to(test_yaml)
+        assert_that(taxonomy.rel_path).is_equal_to(rel_path)
+        assert_that(caplog.records).extracting(
+            "message",
+            filter=self.message_filter(re.escape(test_yaml)),
+        ).is_length(len(caplog.records))
+        assert_that(caplog.records).extracting(
+            "levelno",
+            filter=self.message_filter(r"does not exist or is not a file"),
+        ).contains_only(logging.ERROR)
+
+    def test_file_has_wrong_extension(self, caplog: pytest.LogCaptureFixture, testdata: pathlib.Path) -> None:
+        test_yaml = "knowledge/invalid_name/qna.yml"
+        rel_path = testdata.joinpath(test_yaml)
+        parser = TaxonomyParser(schema_version=0, message_format=TaxonomyMessageFormat.LOGGING)
+        taxonomy = parser.parse(rel_path)
+
+        assert_that(taxonomy.warnings).is_zero()
+        assert_that(taxonomy.errors).is_equal_to(1)
+        assert_that(taxonomy.path.as_posix()).is_equal_to(test_yaml)
+        assert_that(taxonomy.rel_path).is_equal_to(rel_path)
+        assert_that(caplog.records).extracting(
+            "message",
+            filter=self.message_filter(re.escape(test_yaml)),
+        ).is_length(len(caplog.records))
+        assert_that(caplog.records).extracting(
+            "levelno",
+            filter=self.message_filter(r"""Taxonomy file must be named "qna\.yaml".*qna.yml"""),
+        ).contains_only(logging.ERROR)
+
+    def test_file_has_wrong_name(self, caplog: pytest.LogCaptureFixture, testdata: pathlib.Path) -> None:
+        test_yaml = "knowledge/invalid_name/file.yaml"
+        rel_path = testdata.joinpath(test_yaml)
+        parser = TaxonomyParser(schema_version=0, message_format=TaxonomyMessageFormat.LOGGING)
+        taxonomy = parser.parse(rel_path)
+
+        assert_that(taxonomy.warnings).is_zero()
+        assert_that(taxonomy.errors).is_equal_to(1)
+        assert_that(taxonomy.path.as_posix()).is_equal_to(test_yaml)
+        assert_that(taxonomy.rel_path).is_equal_to(rel_path)
+        assert_that(caplog.records).extracting(
+            "message",
+            filter=self.message_filter(re.escape(test_yaml)),
+        ).is_length(len(caplog.records))
+        assert_that(caplog.records).extracting(
+            "levelno",
+            filter=self.message_filter(r"""Taxonomy file must be named "qna\.yaml".*file\.yaml"""),
+        ).contains_only(logging.ERROR)
+
+    def test_empty_yaml(self, caplog: pytest.LogCaptureFixture, testdata: pathlib.Path) -> None:
+        test_yaml = "compositional_skills/empty_yaml/qna.yaml"
+        rel_path = testdata.joinpath(test_yaml)
+        parser = TaxonomyParser(schema_version=0, message_format=TaxonomyMessageFormat.LOGGING)
+        taxonomy = parser.parse(rel_path)
+
+        assert_that(taxonomy.warnings).is_equal_to(1)
+        assert_that(taxonomy.errors).is_zero()
+        assert_that(taxonomy.path.as_posix()).is_equal_to(test_yaml)
+        assert_that(taxonomy.rel_path).is_equal_to(rel_path)
+        assert_that(caplog.records).extracting(
+            "message",
+            filter=self.message_filter(re.escape(test_yaml)),
+        ).is_length(len(caplog.records))
+        assert_that(caplog.records).extracting(
+            "levelno",
+            filter=self.message_filter(r"The file is empty"),
+        ).contains_only(logging.WARNING)
+
+    def test_array_yaml(self, caplog: pytest.LogCaptureFixture, testdata: pathlib.Path) -> None:
+        test_yaml = "compositional_skills/array_yaml/qna.yaml"
+        rel_path = testdata.joinpath(test_yaml)
+        parser = TaxonomyParser(schema_version=0, message_format=TaxonomyMessageFormat.LOGGING)
+        taxonomy = parser.parse(rel_path)
+
+        assert_that(taxonomy.warnings).is_zero()
+        assert_that(taxonomy.errors).is_equal_to(1)
+        assert_that(taxonomy.path.as_posix()).is_equal_to(test_yaml)
+        assert_that(taxonomy.rel_path).is_equal_to(rel_path)
+        assert_that(caplog.records).extracting(
+            "message",
+            filter=self.message_filter(re.escape(test_yaml)),
+        ).is_length(len(caplog.records))
+        assert_that(caplog.records).extracting(
+            "levelno",
+            filter=self.message_filter(r"The file is not valid"),
+        ).contains_only(logging.ERROR)
+
+    def test_version_1(self, caplog: pytest.LogCaptureFixture, testdata: pathlib.Path) -> None:
+        test_yaml = "compositional_skills/version_1/qna.yaml"
+        rel_path = testdata.joinpath(test_yaml)
+        parser = TaxonomyParser(schema_version=0, message_format=TaxonomyMessageFormat.LOGGING)
+        taxonomy = parser.parse(rel_path)
+
+        assert_that(taxonomy.warnings).is_zero()
+        assert_that(taxonomy.errors).is_zero()
+        assert_that(taxonomy.path.as_posix()).is_equal_to(test_yaml)
+        assert_that(taxonomy.rel_path).is_equal_to(rel_path)
+        assert_that(taxonomy.version).is_equal_to(1)
+        assert_that(caplog.records).is_empty()
+
+    def test_version_1_as_version_2(self, caplog: pytest.LogCaptureFixture, testdata: pathlib.Path) -> None:
+        test_yaml = "compositional_skills/version_1/qna.yaml"
+        rel_path = testdata.joinpath(test_yaml)
+        parser = TaxonomyParser(schema_version=2, message_format=TaxonomyMessageFormat.LOGGING)
+        taxonomy = parser.parse(rel_path)
+
+        assert_that(taxonomy.warnings).is_greater_than_or_equal_to(1)
+        assert_that(taxonomy.errors).is_greater_than_or_equal_to(1)
+        assert_that(taxonomy.path.as_posix()).is_equal_to(test_yaml)
+        assert_that(taxonomy.rel_path).is_equal_to(rel_path)
+        assert_that(taxonomy.version).is_equal_to(2)
+        assert_that(caplog.records).extracting(
+            "message",
+            filter=self.message_filter(f"{re.escape(test_yaml)}:"),
+        ).is_length(len(caplog.records))
+        assert_that(caplog.records).extracting(
+            "levelno",
+            filter=self.message_filter(r"line too long"),
+        ).contains_only(logging.WARNING)
+        assert_that(caplog.records).extracting(
+            "levelno",
+            filter=self.message_filter(r"version.*required property"),
+        ).contains_only(logging.ERROR)
+
+
+class TestParsingStdout:
+    def test_format_github(self, capsys: pytest.CaptureFixture[str], testdata: pathlib.Path) -> None:
+        test_yaml = "compositional_skills/invalid_yaml/qna.yaml"
+        rel_path = testdata.joinpath(test_yaml)
+        parser = TaxonomyParser(schema_version=0, message_format=TaxonomyMessageFormat.GITHUB)
+        taxonomy = parser.parse(rel_path)
+
+        assert_that(taxonomy.warnings).is_greater_than_or_equal_to(1)
+        assert_that(taxonomy.errors).is_greater_than_or_equal_to(2)
+        assert_that(taxonomy.path.as_posix()).is_equal_to(test_yaml)
+        assert_that(taxonomy.rel_path).is_equal_to(rel_path)
+        captured = capsys.readouterr()
+        assert_that(captured.err).is_empty()
+        assert_that(captured.out).is_not_empty()
+        lines: list[str] = captured.out.splitlines()
+        assert_that(lines).is_not_empty()
+        pattern = f"^::(error|warning) file={re.escape(str(rel_path))},"
+        for line in lines:
+            assert_that(line).matches(pattern)
+
+    def test_format_standard(self, capsys: pytest.CaptureFixture[str], testdata: pathlib.Path) -> None:
+        test_yaml = "compositional_skills/invalid_yaml/qna.yaml"
+        rel_path = testdata.joinpath(test_yaml)
+        parser = TaxonomyParser(schema_version=0, message_format=TaxonomyMessageFormat.STANDARD)
+        taxonomy = parser.parse(rel_path)
+
+        assert_that(taxonomy.warnings).is_greater_than_or_equal_to(1)
+        assert_that(taxonomy.errors).is_greater_than_or_equal_to(2)
+        assert_that(taxonomy.path.as_posix()).is_equal_to(test_yaml)
+        assert_that(taxonomy.rel_path).is_equal_to(rel_path)
+        captured = capsys.readouterr()
+        assert_that(captured.err).is_empty()
+        assert_that(captured.out).is_not_empty()
+        lines: list[str] = captured.out.splitlines()
+        assert_that(lines).is_not_empty()
+        pattern = f"^(ERROR|WARN): {re.escape(str(rel_path))}:"
+        for line in lines:
+            assert_that(line).matches(pattern)
+
+    def test_format_auto(self, capsys: pytest.CaptureFixture[str], testdata: pathlib.Path) -> None:
+        test_yaml = "compositional_skills/invalid_yaml/qna.yaml"
+        rel_path = testdata.joinpath(test_yaml)
+        parser = TaxonomyParser(schema_version=0)
+        taxonomy = parser.parse(rel_path)
+
+        assert_that(taxonomy.warnings).is_greater_than_or_equal_to(1)
+        assert_that(taxonomy.errors).is_greater_than_or_equal_to(2)
+        assert_that(taxonomy.path.as_posix()).is_equal_to(test_yaml)
+        assert_that(taxonomy.rel_path).is_equal_to(rel_path)
+        captured = capsys.readouterr()
+        assert_that(captured.err).is_empty()
+        assert_that(captured.out).is_not_empty()
+        lines: list[str] = captured.out.splitlines()
+        assert_that(lines).is_not_empty()
+        pattern = (
+            f"^::(error|warning) file={re.escape(str(rel_path))},"
+            if "GITHUB_ACTIONS" in os.environ and "GITHUB_WORKFLOW" in os.environ
+            else f"^(ERROR|WARN): {re.escape(str(rel_path))}:"
+        )
+        for line in lines:
+            assert_that(line).matches(pattern)
diff --git a/tests/test_versions.py b/tests/test_versions.py
index ed08865..6fd0524 100644
--- a/tests/test_versions.py
+++ b/tests/test_versions.py
@@ -1,48 +1,49 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # Standard
+import importlib.resources
 import json
-from importlib import resources
+from importlib.abc import Traversable
 
 # Third Party
+from assertpy import assert_that
 from referencing import Resource
 from referencing.jsonschema import DRAFT202012
 
-from instructlab.schema import schema_versions
+from instructlab.schema import schema_base, schema_versions
 
 
 class TestVersions:
-    def test_versions(self):
+    def _load_schemas(self, version: Traversable) -> None:
+        assert_that(version).is_not_none()
+        for schema_name in ("compositional_skills", "knowledge", "version"):
+            path = version.joinpath(f"{schema_name}.json")
+            text = path.read_text(encoding="utf-8")
+            assert_that(text).is_not_none().is_not_empty()
+            contents = json.loads(text)
+            assert_that(contents).is_not_none().is_not_empty()
+            resource = Resource.from_contents(contents=contents, default_specification=DRAFT202012)
+            assert_that(resource).is_not_none().has_contents(contents)
+
+    def test_schema_base(self) -> None:
+        base = schema_base()
+        assert_that(base).is_not_none()
         versions = schema_versions()
-        assert versions is not None
-        assert len(versions) > 1
-        for i, v in enumerate(versions):
-            assert v.name == f"v{i+1}"
-
-    def _load_schema(self, path):
-        text = path.read_text(encoding="utf-8")
-        assert text
-        assert len(text) > 1
-        contents = json.loads(text)
-        assert contents
-        assert len(contents) > 1
-        resource = Resource.from_contents(
-            contents=contents, default_specification=DRAFT202012
-        )
-        assert resource
-        assert resource.contents == contents
-
-    def test_import_schema_base(self):
-        schema_base = resources.files("instructlab.schema")
-        for i in range(len(schema_versions())):
-            schema_version = schema_base.joinpath(f"v{i+1}")
-            for schema_name in ("compositional_skills", "knowledge", "version"):
-                path = schema_version.joinpath(f"{schema_name}.json")
-                self._load_schema(path)
-
-    def test_import_schema_versions(self):
-        for i in range(len(schema_versions())):
-            schema_version = resources.files(f"instructlab.schema.v{i+1}")
-            for schema_name in ("compositional_skills", "knowledge", "version"):
-                path = schema_version.joinpath(f"{schema_name}.json")
-                self._load_schema(path)
+        assert_that(versions).is_not_none().is_not_empty()
+        for i in range(len(versions)):
+            version = base.joinpath(f"v{i+1}")
+            self._load_schemas(version)
+
+    def test_schema_versions(self) -> None:
+        versions = schema_versions()
+        assert_that(versions).is_not_none().is_not_empty()
+        for i, version in enumerate(versions):
+            assert_that(version).has_name(f"v{i+1}")
+            self._load_schemas(version)
+
+    def test_importlib_schema(self) -> None:
+        versions = schema_versions()
+        assert_that(versions).is_not_none().is_not_empty()
+        for i in range(len(versions)):
+            version = importlib.resources.files(f"instructlab.schema.v{i+1}")
+            self._load_schemas(version)
diff --git a/tests/testdata/compositional_skills/array_yaml/qna.yaml b/tests/testdata/compositional_skills/array_yaml/qna.yaml
new file mode 100644
index 0000000..dc790be
--- /dev/null
+++ b/tests/testdata/compositional_skills/array_yaml/qna.yaml
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: Apache-2.0
+["a", "b"]
diff --git a/tests/testdata/compositional_skills/empty_yaml/qna.yaml b/tests/testdata/compositional_skills/empty_yaml/qna.yaml
new file mode 100644
index 0000000..0f8869a
--- /dev/null
+++ b/tests/testdata/compositional_skills/empty_yaml/qna.yaml
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: Apache-2.0
+# empty
diff --git a/tests/testdata/compositional_skills/invalid_yaml/qna.yaml b/tests/testdata/compositional_skills/invalid_yaml/qna.yaml
new file mode 100644
index 0000000..f0083c8
--- /dev/null
+++ b/tests/testdata/compositional_skills/invalid_yaml/qna.yaml
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: Apache-2.0
+
+createdby: invalid_key
+version: 2
+seed_examples:
+- question: What is this skill about?
+  answer: It's a skill that makes the tests more skillful
+- answer: "answer2"
+  question: "question2"
+- answer: "answer6"
+  question: "This is for a unit test and has a line with 124 characters! It is too long for the default rules but not too long for the customer rules!"
+- answer: "answer3"
+  question: "question3"
+- answer: "answer4"
+  question: "question4"
+- answer: "answer5"
+  question: "question5"
+task_description: For invalid yaml tests
diff --git a/tests/testdata/compositional_skills/skill_incomplete/qna.yaml b/tests/testdata/compositional_skills/skill_incomplete/qna.yaml
new file mode 100644
index 0000000..4f56a0a
--- /dev/null
+++ b/tests/testdata/compositional_skills/skill_incomplete/qna.yaml
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: Apache-2.0
+
+created_by: test
+version: 2
+seed_examples:
+- question: "Does this yaml pass schema validation?"
+  answer: "No, it does not! It should have 5 examples."
+task_description: 'This yaml does not conform to the schema'
diff --git a/tests/testdata/compositional_skills/skill_valid/qna.yaml b/tests/testdata/compositional_skills/skill_valid/qna.yaml
new file mode 100644
index 0000000..d015a92
--- /dev/null
+++ b/tests/testdata/compositional_skills/skill_valid/qna.yaml
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: Apache-2.0
+
+created_by: test-bot
+version: 3
+seed_examples:
+- answer: Yes, it is.
+  question: Is this for a test?
+- answer: Yes I am very sure.
+  question: Are you sure it's for a test?
+- answer: "answer3"
+  question: "question3"
+- answer: "answer4"
+  question: "question4"
+- answer: "answer5"
+  question: "question5"
+task_description: for testing
diff --git a/tests/testdata/compositional_skills/version_1/qna.yaml b/tests/testdata/compositional_skills/version_1/qna.yaml
new file mode 100644
index 0000000..da1b888
--- /dev/null
+++ b/tests/testdata/compositional_skills/version_1/qna.yaml
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: Apache-2.0
+
+created_by: someone
+seed_examples:
+- question: What is this skill about?
+  answer: It's a skill that makes the tests more skillful
+- answer: "answer2"
+  question: "question2"
+- answer: "answer6"
+  question: "This is for a unit test and has a line with 124 characters! It is too long for the default rules but not too long for the customer rules!"
+- answer: "answer3"
+  question: "question3"
+- answer: "answer4"
+  question: "question4"
+- answer: "answer5"
+  question: "question5"
+task_description: For yaml tests
diff --git a/tests/testdata/knowledge/invalid_name/file.yaml b/tests/testdata/knowledge/invalid_name/file.yaml
new file mode 100644
index 0000000..83940ef
--- /dev/null
+++ b/tests/testdata/knowledge/invalid_name/file.yaml
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: Apache-2.0
+
+created_by: test-bot
+version: 3
+domain: test-domain
+seed_examples:
+  - context: |
+      some context 1
+    questions_and_answers:
+      - question: What is this knowledge about?
+        answer: It's a knowledge that makes the tests more knowledgeable
+      - question: "question1_2"
+        answer: "answer1_2"
+      - question: "question1_3"
+        answer: "answer1_3"
+  - context: |
+      some context 2
+    questions_and_answers:
+      - question: What is this knowledge about?
+        answer: It's a knowledge that makes the tests more knowledgeable
+      - question: "question2_2"
+        answer: "answer2_2"
+      - question: "question2_3"
+        answer: "answer2_3"
+  - context: |
+      some context 3
+    questions_and_answers:
+      - question: What is this knowledge about?
+        answer: It's a knowledge that makes the tests more knowledgeable
+      - question: "question3_2"
+        answer: "answer3_2"
+      - question: "question3_3"
+        answer: "answer3_3"
+  - context: |
+      some context 4
+    questions_and_answers:
+      - question: What is this knowledge about?
+        answer: It's a knowledge that makes the tests more knowledgeable
+      - question: "question4_2"
+        answer: "answer4_2"
+      - question: "question4_3"
+        answer: "answer4_3"
+  - context: |
+      some context 4
+    questions_and_answers:
+      - question: What is this knowledge about?
+        answer: It's a knowledge that makes the tests more knowledgeable
+      - question: "question5_2"
+        answer: "answer5_2"
+      - question: "question5_3"
+        answer: "answer5_3"
+document_outline: For knowledge tests
+document:
+  repo: https://github.com/example-org/example-repo
+  commit: a0c3c8e
+  patterns:
+    - "*.md"
+    - docs/*.md
diff --git a/tests/testdata/knowledge/invalid_name/qna.yml b/tests/testdata/knowledge/invalid_name/qna.yml
new file mode 100644
index 0000000..83940ef
--- /dev/null
+++ b/tests/testdata/knowledge/invalid_name/qna.yml
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: Apache-2.0
+
+created_by: test-bot
+version: 3
+domain: test-domain
+seed_examples:
+  - context: |
+      some context 1
+    questions_and_answers:
+      - question: What is this knowledge about?
+        answer: It's a knowledge that makes the tests more knowledgeable
+      - question: "question1_2"
+        answer: "answer1_2"
+      - question: "question1_3"
+        answer: "answer1_3"
+  - context: |
+      some context 2
+    questions_and_answers:
+      - question: What is this knowledge about?
+        answer: It's a knowledge that makes the tests more knowledgeable
+      - question: "question2_2"
+        answer: "answer2_2"
+      - question: "question2_3"
+        answer: "answer2_3"
+  - context: |
+      some context 3
+    questions_and_answers:
+      - question: What is this knowledge about?
+        answer: It's a knowledge that makes the tests more knowledgeable
+      - question: "question3_2"
+        answer: "answer3_2"
+      - question: "question3_3"
+        answer: "answer3_3"
+  - context: |
+      some context 4
+    questions_and_answers:
+      - question: What is this knowledge about?
+        answer: It's a knowledge that makes the tests more knowledgeable
+      - question: "question4_2"
+        answer: "answer4_2"
+      - question: "question4_3"
+        answer: "answer4_3"
+  - context: |
+      some context 4
+    questions_and_answers:
+      - question: What is this knowledge about?
+        answer: It's a knowledge that makes the tests more knowledgeable
+      - question: "question5_2"
+        answer: "answer5_2"
+      - question: "question5_3"
+        answer: "answer5_3"
+document_outline: For knowledge tests
+document:
+  repo: https://github.com/example-org/example-repo
+  commit: a0c3c8e
+  patterns:
+    - "*.md"
+    - docs/*.md
diff --git a/tests/testdata/knowledge/knowledge_valid/qna.yaml b/tests/testdata/knowledge/knowledge_valid/qna.yaml
new file mode 100644
index 0000000..83940ef
--- /dev/null
+++ b/tests/testdata/knowledge/knowledge_valid/qna.yaml
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: Apache-2.0
+
+created_by: test-bot
+version: 3
+domain: test-domain
+seed_examples:
+  - context: |
+      some context 1
+    questions_and_answers:
+      - question: What is this knowledge about?
+        answer: It's a knowledge that makes the tests more knowledgeable
+      - question: "question1_2"
+        answer: "answer1_2"
+      - question: "question1_3"
+        answer: "answer1_3"
+  - context: |
+      some context 2
+    questions_and_answers:
+      - question: What is this knowledge about?
+        answer: It's a knowledge that makes the tests more knowledgeable
+      - question: "question2_2"
+        answer: "answer2_2"
+      - question: "question2_3"
+        answer: "answer2_3"
+  - context: |
+      some context 3
+    questions_and_answers:
+      - question: What is this knowledge about?
+        answer: It's a knowledge that makes the tests more knowledgeable
+      - question: "question3_2"
+        answer: "answer3_2"
+      - question: "question3_3"
+        answer: "answer3_3"
+  - context: |
+      some context 4
+    questions_and_answers:
+      - question: What is this knowledge about?
+        answer: It's a knowledge that makes the tests more knowledgeable
+      - question: "question4_2"
+        answer: "answer4_2"
+      - question: "question4_3"
+        answer: "answer4_3"
+  - context: |
+      some context 4
+    questions_and_answers:
+      - question: What is this knowledge about?
+        answer: It's a knowledge that makes the tests more knowledgeable
+      - question: "question5_2"
+        answer: "answer5_2"
+      - question: "question5_3"
+        answer: "answer5_3"
+document_outline: For knowledge tests
+document:
+  repo: https://github.com/example-org/example-repo
+  commit: a0c3c8e
+  patterns:
+    - "*.md"
+    - docs/*.md
diff --git a/tests/testdata/knowledge/unsupported/qna.yaml b/tests/testdata/knowledge/unsupported/qna.yaml
new file mode 100644
index 0000000..d09b633
--- /dev/null
+++ b/tests/testdata/knowledge/unsupported/qna.yaml
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: Apache-2.0
+
+created_by: test-bot
+version: 2
+domain: test-domain
+seed_examples:
+- question: What is this knowledge about?
+  answer: It's a knowledge that makes the tests more knowledgeable
+- answer: "answer2"
+  question: "question2"
+- answer: "answer3"
+  question: "question3"
+- answer: "answer4"
+  question: "question4"
+- answer: "answer5"
+  question: "question5"
+task_description: For knowledge tests
+document:
+  repo: https://github.com/example-org/example-repo
+  commit: a0c3c8e
+  patterns:
+  - "*.md"
+  - docs/*.md
diff --git a/tox.ini b/tox.ini
index 1c4e721..b5443eb 100644
--- a/tox.ini
+++ b/tox.ini
@@ -7,12 +7,15 @@ envlist = ruff, pylint, mypy, jsonschema, py3-unit
 minversion = 4.4
 
 [testenv]
-description = Run tests (unit)
+description = Run tests with pytest
 package = wheel
 wheel_build_env = pkg
+passenv =
+    GITHUB_ACTIONS
+    GITHUB_WORKFLOW
 deps =
     pytest
-    jsonschema
+    assertpy
 commands =
     unit: {envpython} -m pytest {posargs:tests}
 
@@ -20,7 +23,7 @@ commands =
 description = Lint with pylint
 deps =
     pylint
-    jsonschema
+    {[testenv]deps}
 commands =
     {envpython} -m pylint {posargs:src tests}
 
@@ -30,10 +33,20 @@ skip_install = True
 skipsdist = true
 deps =
     ruff
-    jsonschema
+    {[testenv]deps}
 commands =
-    ./scripts/ruff.sh {posargs:fix}
-allowlist_externals = ./scripts/ruff.sh
+    ruff check --fix
+    ruff format
+
+[testenv:ruffcheck]
+description = Check code with Ruff
+skip_install = True
+skipsdist = true
+deps =
+    {[testenv:ruff]deps}
+commands =
+    ruff check --diff
+    ruff format --check
 
 [testenv:mypy]
 description = Python type checking with mypy
@@ -41,7 +54,10 @@ namespace_packages = True
 explicit_package_bases = True
 deps =
     mypy
-    jsonschema
+    {[testenv]deps}
+    types-assertpy
+    types-PyYAML
+    types-jsonschema
 commands =
     {envpython} -m mypy {posargs:src tests}