tests: improve llm evals #79

	name: Evaluate LLM test cases

	on: [push]

	jobs:
	build:
	strategy:
	matrix:
	python-version: ["3.12"]

	runs-on: ubuntu-latest

	steps:
	- uses: actions/checkout@v2

	- name: Set up Python ${{ matrix.python-version }}
	uses: actions/setup-python@v2
	with:
	python-version: ${{ matrix.python-version }}

	- name: Install Python dependencies and build
	# if you change something here, you must also change it in .github/workflows/build-binaries-and-brew.yaml
	run: \|
	python -m pip install --upgrade pip setuptools pyinstaller

	curl -sSL https://install.python-poetry.org \| python3 - --version 1.4.0
	poetry config virtualenvs.create false
	poetry install --no-root
	poetry run python -m playwright install --with-deps firefox

	- name: Run tests
	shell: bash
	env:
	OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
	run: \|
	poetry run pytest -m "llm"

Provide feedback