Skip to content

Commit

Permalink
Initial release.
Browse files Browse the repository at this point in the history
  • Loading branch information
umarbutler committed Jun 12, 2024
1 parent cebd99f commit 701d02a
Show file tree
Hide file tree
Showing 11 changed files with 800 additions and 0 deletions.
44 changes: 44 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
name: ci

on:
push:
branches: [ main ]
pull_request:
branches: [ main ]

jobs:
build:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]

steps:
- name: Check-out repository
uses: actions/checkout@v3

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}

- name: Install test dependencies
run: |
python -m pip install --upgrade pip
python -m pip install pytest
python -m pip install pytest-cov
python -m pip install torch
- name: Install terge
run: |
python -m pip install .
- name: Test with pytest
run: |
pytest --cov=terge --cov-report=xml
- name: Use Codecov to track coverage
uses: codecov/codecov-action@v3
with:
files: ./coverage.xml
101 changes: 101 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# Exclude everything.
*

# Then, include just the folders and files we want.
!README.md
!CHANGELOG.md
!pyproject.toml
!LICENSE
!.isort.cfg
!src/
!src/**/*
!tests/
!tests/**/*
!.github/
!.github/**/*
!.gitignore
!assets/
!assets/banner.svg

# Finally, exclude anything in the above inclusions that we don't want.
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
*.manifest
*.spec
pip-log.txt
pip-delete-this-directory.txt
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
*.mo
*.pot
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
instance/
.webassets-cache
.scrapy
docs/_build/
.pybuilder/
target/
.ipynb_checkpoints
profile_default/
ipython_config.py
.pdm.toml
__pypackages__/
celerybeat-schedule
celerybeat.pid
*.sage.py
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
.spyderproject
.spyproject
.ropeproject
/site
.mypy_cache/
.dmypy.json
dmypy.json
.pyre/
.pytype/
cython_debug/
.archive/
catboost_info/
.persist_cache/

# And, finally, exclude any specific stuff we don't want.
7 changes: 7 additions & 0 deletions .isort.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[settings]
skip_gitignore = True
length_sort = True
line_length = 120
sections = FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER
lines_between_types = 1
order_by_type = False
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
## Changelog 🔄
All notable changes to terge will be documented here. This project adheres to [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.1.0] - 2024-06-12
### Added
- Added the `merge()` function, which merges PyTorch models.

[0.1.0]: https://github.com/umarbutler/terge/releases/tag/v0.1.0
19 changes: 19 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
Copyright (c) 2024 Umar Butler

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
142 changes: 142 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
![terge logo](https://github.com/umarbutler/terge/raw/main/assets/banner.svg)
-----------------------------------------------------------------------------
<a href="https://pypi.org/project/terge/" alt="PyPI Version"><img src="https://img.shields.io/pypi/v/terge"></a> <a href="https://github.com/umarbutler/terge/actions/workflows/ci.yml" alt="Build Status"><img src="https://img.shields.io/github/actions/workflow/status/umarbutler/terge/ci.yml?branch=main"></a> <a href="https://app.codecov.io/gh/umarbutler/terge" alt="Code Coverage"><img src="https://img.shields.io/codecov/c/github/umarbutler/terge"></a> <!-- <a href="https://pypistats.org/packages/terge" alt="Downloads"><img src="https://img.shields.io/pypi/dm/terge"></a> -->

terge is an *easy-to-use* Python library for merging PyTorch models. It works with models of any size and architecture, including Hugging Face 🤗 Transformers.

## Features 🎯
- **👌 Easy-to-use**: a single line of code is all you need to get started.
- **⚡ Lightning-fast**: billions of parameters can be merged in mere seconds.
- **📐 Architecture-agnostic**: models of any size and architecture can be merged, provided they share a couple parameters with the same name and shape.
- **🛠️ Hyper-customizable**: parameters can be filtered in or out with regex, and custom weights can be assigned to models or even to their individual parameters.
- **🌳 Lineage tracking**: maps of merged parameter names to models' weightings can be produced to document precisely how models were merged.
- **🤗 Hugging Face-friendly**: Hugging Face 🤗 Transformers are supported out of the box.

## Installation 🧑‍🔧
`terge` can be installed with `pip`:
```bash
pip install terge
```

## Usage 👩‍💻
The following code snippet demonstrates how you can get started with `terge`:
```python
import re
import torch
import terge

from transformers import AutoModel # NOTE `transformers` isn't required, this is just for demo purposes.

# A single line is all it takes to merge any number of models.
model = terge.merge([torch.nn.Linear(10, 1) for _ in range(3)])

# This also works for models of different architectures...
model = terge.merge([torch.nn.LSTM(10, 1, num_layers = 1), torch.nn.LSTM(10, 1, num_layers = 2)])

# And models of different sizes...
model = terge.merge([torch.nn.LSTM(10, 1, num_layers = 1), torch.nn.LSTM(100, 1, num_layers = 2)])

# And even Hugging Face 🤗 Transformers...
model = terge.merge([AutoModel.from_pretrained('umarbutler/emubert'),
AutoModel.from_pretrained('roberta-base')],
progress = True)

# Just make sure there's at least one shared named parameter in there.
model = terge.merge([torch.nn.Linear(10, 1), torch.nn.Linear(1, 10)]) # -> terge.NoParametersToMergeWarning
```

If you want even greater control over the merging process, `terge` has got you covered:
```python
# Changing how parameters are merged and what model serves as the base is trivial.
model = terge.merge(
[torch.nn.Linear(10, 1) for _ in range(3)],
base = torch.nn.Linear(10, 1), # The base model doesn't even need to be getting merged! You can also
# use the index of a model in the input models. The default is 0.
weights = [1, 2, 3], # Weights are relative and correspond to the order of the input models such that,
# here, the second model is weighted double the weight of the first model and the third model is weighted
# triple the weight of the first model. The default is [1, 1, ...].
)

# Assigning custom weights to individual parameters is also easy.
model = terge.merge(
[torch.nn.Linear(10, 1) for _ in range(3)],
weights = {re.compile(r'weight'): [1, 2, 3], 'bias': [3, 2, 1]}, # Anything that doesn't match this map
# will get a weight of 1. You can change that adding `re.compile(r'.*'): [...]` to the *end* of your
# weights map.
)

# If you want to filter specific parameters in or out, that can be done too.
model = terge.merge(
[torch.nn.Linear(10, 1) for _ in range(3)],
included = re.compile(r'weight'), # Only parameters with 'weight' in their name will be merged.
# You could also pass a string for an exact match.
excluded = ['bias', re.compile(r'bias')], # Lists of strings and regex patterns work as well.
# NOTE Exclusions execute after inclusions, so this isn't actually necessary.
)

# You can also enable lineage tracking to understand exactly how models got merged.
model, lineage = terge.merge(
[torch.nn.Linear(10, 1) for _ in range(3)],
lineage = True,
) # -> {'weight': ('arithmetic', [(0, 0.3333333333333333), (1, 0.3333333333333333), (2, 0.3333333333333333)]),
# 'bias': ('arithmetic', [(0, 0.3333333333333333), (1, 0.3333333333333333), (2, 0.3333333333333333)])}

# Finally, for an extra speed boost, you can merge in-place (just keep in mind, this will modify your base model).
models = terge.merge(
[torch.nn.Linear(10, 1) for _ in range(3)],
inplace = True,
)
```

## API 🧩
### `merge()`
```python
def merge(
models: list[torch.nn.Module],
base: torch.nn.Module | int = 0,
method: Literal['arithmetic'] | dict[str | re.Pattern, Literal['arithmetic']] = 'arithmetic',
weights: list[float] | dict[str | re.Pattern, list[float]] = None,
included: re.Pattern | str | list[str | re.Pattern] = None,
excluded: re.Pattern | str | list[str | re.Pattern] = None,
inplace: bool = False,
dtype: torch.dtype = torch.float64,
lineage: bool = False,
progress: bool = False,
) -> torch.nn.Module | tuple[torch.nn.Module, dict[str, tuple[str, list[tuple[int, float]]]]]
```

`merge()` merges PyTorch models.

`models` represents the models to be merged.

`base` represents the model whose parameters will be used as defaults and that, if `inplace` is set to `True`, will be merged into; or the index of such a model in `models`. It defaults to `0`, that is, the index of the first model in `models`.

`method` represents the method to be used for merging the models' parameters, or a map of parameter names or regex patterns matching parameter names to the methods to be used to merge them. Currently, only the `'arithmetic'` method is supported (that is, the merging of parameters by taking their ordinary or weighted arithmetic mean). `method` defaults to `'arithmetic'`.

`weights` represents a list of all of the relative weights to be assigned to the models' parameters, or a map of parameter names or regex patterns matching parameter names to lists of weights. If set to `None`, all models will be weighted equally. If a dictionary is provided and there are any parameters to be merged that do not match any of the keys of that dictionary, they will be also weighted equally. `weights` defaults to `None`.

`included` represents a regex pattern, string or list of regex patterns and strings matching parameter names to be merged. If set to `None`, all parameters will be merged. `included` defaults to `None`.

`excluded` represents a regex pattern, string or list of regex patterns and strings matching parameter names to be excluded from merging. If set to `None`, no parameters will be excluded. If `included` is provided, this argument will apply to the subset of parameters that match `included`. `excluded` defaults to `None`.

`inplace` represents whether, for the sake of expediency or memory conservation, the `base` should be merged into in place instead of being deep copied. It defaults to `False`.

`dtype` represents the data type to be used for storing the weightings. It defaults to `torch.float64`.

`lineage` represents whether to output a tuple containing the merged model along with a dictionary mapping the names of merged parameters to a tuple containing the names of merge methods and a list of tuples containing the indices of merged models that contributed to those parameters and the weights they were assigned. It defaults to `False`.

`progress` represents whether to display a progress bar. It defaults to `False`.

`merge()` will return either a merged model, or, if `lineage` is `True`, a tuple containing the merged model along with a dictionary mapping the names of merged parameters to a tuple containing the names of merge methods and a list of tuples containing the indices of merged models that contributed to those parameters and the weights they were assigned, which looks like this:
```python
{
'parameter_name': ('method', [(model_index, weight), ...]),
...
}
```

## Changelog 🔄
terge adheres to [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and [Semantic Versioning](https://semver.org/spec/v2.0.0.html). All notable changes to terge are documented in its [Changelog 🔄](https://github.com/umarbutler/terge/blob/main/CHANGELOG.md).

## License 📜
terge is licensed under the [MIT License](https://github.com/umarbutler/terge/blob/main/LICENSE).
62 changes: 62 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[project]
name = "terge"
version = "0.1.0"
authors = [
{name="Umar Butler", email="[email protected]"},
]
description = "An easy-to-use Python library for merging PyTorch models."
readme = "README.md"
requires-python = ">=3.9"
license = {text = "MIT"}
keywords = [
"pytorch",
"torch",
"merge",
"merger",
"merging",
"model",
"models",
"artificial intelligence",
"ai",
"machine learning",
"ml",
"neural net",
"neural nets",
"neural network",
"neural networks",
"nn",
"nns",
]
classifiers = [
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
"Intended Audience :: Information Technology",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.14",
"Programming Language :: Python :: Implementation :: CPython",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Topic :: Scientific/Engineering :: Mathematics",
"Topic :: Software Development :: Libraries :: Python Modules",
"Topic :: Utilities",
"Typing :: Typed",
]
dependencies = [
"torch",
]

[project.urls]
Homepage = "https://github.com/umarbutler/terge"
Documentation = "https://github.com/umarbutler/terge/blob/main/README.md"
Issues = "https://github.com/umarbutler/terge/issues"
Source = "https://github.com/umarbutler/terge"
4 changes: 4 additions & 0 deletions src/terge/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"""An easy-to-use Python library for merging PyTorch models."""

from .terge import (merge, NoParametersToMergeWarning, NoMergeMethodFoundForParameterError,
ParameterModelWeightsSumToZeroError)
Empty file added src/terge/py.typed
Empty file.
Loading

0 comments on commit 701d02a

Please sign in to comment.