Skip to content

Commit

Permalink
Add codspeed benchmarks (#674)
Browse files Browse the repository at this point in the history
* Add `codspeed` benchmarks

* Make the test lighter

* Make test ultra light

* Use `python==3.12` for `codspeed`

* Add concurrency config for `codspeed` workflow
  • Loading branch information
gabrielmbmb authored May 31, 2024
1 parent 42efe6d commit 0dc464e
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 5 deletions.
42 changes: 42 additions & 0 deletions .github/workflows/codspeed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: Benchmarks

on:
push:
branches:
- "main"
pull_request:

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:
benchmarks:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: "3.12"
# Looks like it's not working very well for other people:
# https://github.com/actions/setup-python/issues/436
# cache: "pip"
# cache-dependency-path: pyproject.toml

- uses: actions/cache@v3
id: cache
with:
path: ${{ env.pythonLocation }}
key: ${{ runner.os }}-python-${{ env.pythonLocation }}-${{ hashFiles('pyproject.toml') }}-benchmarks-v00

- name: Install dependencies
if: steps.cache.outputs.cache-hit != 'true'
run: ./scripts/install_dependencies.sh

- name: Run benchmarks
uses: CodSpeedHQ/action@v2
with:
token: ${{ secrets.CODSPEED_TOKEN }}
run: pytest tests/ --codspeed
4 changes: 0 additions & 4 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,6 @@ jobs:
if: steps.cache.outputs.cache-hit != 'true'
run: ./scripts/install_dependencies.sh

- name: Setup tmate session
if: ${{ github.event_name == 'workflow_dispatch' && matrix.python-version == '3.12' && github.event.inputs.tmate_session == 'true' }}
uses: mxschmitt/action-tmate@v3

- name: Lint
run: make lint

Expand Down
8 changes: 7 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,13 @@ docs = [
"CairoSVG >= 2.7.1",
"mknotebooks >= 0.8.0",
]
tests = ["pytest >= 7.4.0", "pytest-asyncio", "nest-asyncio", "pytest-timeout"]
tests = [
"pytest >= 7.4.0",
"pytest-asyncio",
"nest-asyncio",
"pytest-timeout",
"pytest-codspeed",
]

# Optional LLMs, integrations, etc
anthropic = ["anthropic >= 0.20.0"]
Expand Down
54 changes: 54 additions & 0 deletions tests/integration/test_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Copyright 2023-present, Argilla, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import TYPE_CHECKING, List

import numpy as np
from distilabel.pipeline import Pipeline
from distilabel.steps import GeneratorStep, StepInput, step

if TYPE_CHECKING:
from distilabel.steps import GeneratorStepOutput, StepOutput
from pytest_codspeed import BenchmarkFixture


class NumpyBigArrayGenerator(GeneratorStep):
num_batches: int

@property
def outputs(self) -> List[str]:
return ["array"]

def process(self, offset: int = 0) -> "GeneratorStepOutput":
for i in range(self.num_batches):
yield (
[{"array": np.random.randn(256)} for _ in range(self.batch_size)], # type: ignore
i == self.num_batches - 1,
) # type: ignore


@step(step_type="global")
def ReceiveArrays(inputs: StepInput) -> "StepOutput":
yield inputs


def test_cache_time(benchmark: "BenchmarkFixture") -> None:
with Pipeline(name="dummy") as pipeline:
numpy_generator = NumpyBigArrayGenerator(num_batches=2, batch_size=100)

receive_arrays = ReceiveArrays()

numpy_generator >> receive_arrays

benchmark(pipeline.run, use_cache=False)

0 comments on commit 0dc464e

Please sign in to comment.