Skip to content

Commit

Permalink
ci: test imports up to python 3.12 and remove scikit-learn dependency
Browse files Browse the repository at this point in the history
  • Loading branch information
percevalw committed Oct 10, 2024
1 parent a0ae653 commit c03fa3a
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 29 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ jobs:
strategy:
fail-fast: true
matrix:
python-version: ["3.7", "3.8", "3.9"]
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v2

Expand All @@ -172,6 +172,7 @@ jobs:

- name: Install library
run: |
pip install .
pip install . pytest
pytest tests/pipelines/test_pipelines.py
# uv venv
# uv pip install .
1 change: 1 addition & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
### Changed

- Rename `eds.measurements` to `eds.quantities`
- scikit-learn (used in `eds.endlines`) is no longer installed by default when installing `edsnlp[ml]`

## v0.13.0

Expand Down
12 changes: 8 additions & 4 deletions edsnlp/pipes/core/endlines/endlines.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ class EndLinesMatcher(GenericMatcher):
Behind the scenes, it uses a `endlinesmodel` instance, which is an unsupervised
algorithm based on the work of [@zweigenbaum2016].
!!! warning "Installation"
To use this component, you need to install the `scikit-learn` library.
Training
--------
```python
Expand Down Expand Up @@ -93,12 +97,12 @@ class EndLinesMatcher(GenericMatcher):
Extensions
----------
The `eds.endlines` pipeline declares one extension, on both `Span` and `Token`
objects. The `end_line` attribute is a boolean, set to `True` if the pipeline
The `eds.endlines` pipe declares one extension, on both `Span` and `Token`
objects. The `end_line` attribute is a boolean, set to `True` if the pipe
predicts that the new line is an end line character. Otherwise, it is set to
`False` if the new line is classified as a space.
The pipeline also sets the `excluded` custom attribute on newlines that are
The pipe also sets the `excluded` custom attribute on newlines that are
classified as spaces. It lets downstream matchers skip excluded tokens
(see [normalisation](/pipes/core/normalisation/)) for more detail.
Expand All @@ -113,7 +117,7 @@ class EndLinesMatcher(GenericMatcher):
Authors and citation
--------------------
The `eds.endlines` pipeline was developed by AP-HP's Data Science team based on
The `eds.endlines` pipe was developed by AP-HP's Data Science team based on
the work of [@zweigenbaum2016].
'''

Expand Down
40 changes: 19 additions & 21 deletions edsnlp/pipes/misc/quantities/quantities.py
Original file line number Diff line number Diff line change
Expand Up @@ -612,7 +612,7 @@ def __init__(
as_ents: bool = False,
span_setter: Optional[SpanSetterArg] = None,
use_tables: bool = True,
measurements: Union[str, List[Union[str, MsrConfig]], Dict[str, MsrConfig]] = None # deprecated # noqa: E501
measurements: Optional[Union[str, List[Union[str, MsrConfig]], Dict[str, MsrConfig]]] = None, # deprecated # noqa: E501
):

if measurements:
Expand All @@ -632,7 +632,7 @@ def __init__(
"Skipping that step."
)

self.all_quantities = (quantities == "all")
self.all_quantities = quantities == "all"
if self.all_quantities:
quantities = []

Expand All @@ -659,9 +659,7 @@ def __init__(
self.extract_ranges = extract_ranges
self.range_patterns = range_patterns
self.span_getter = (
validate_span_getter(span_getter)
if span_getter is not None
else None
validate_span_getter(span_getter) if span_getter is not None else None
)
self.merge_mode = merge_mode
self.before_snippet_limit = before_snippet_limit
Expand All @@ -676,10 +674,7 @@ def __init__(
"ents": as_ents,
"measurements": True,
"quantities": True,
**{
name: [name]
for name in self.measure_names.values()
}
**{name: [name] for name in self.measure_names.values()},
}

super().__init__(nlp=nlp, name=name, span_setter=span_setter)
Expand Down Expand Up @@ -1033,10 +1028,17 @@ def get_matches_before(i):
table_pd = table._.to_pd_table(as_spans=True)
# Find out the number's row
for _, row in table_pd.iterrows():
start_line = next((item.start for item in row
if item is not None), None)
end_line = next((item.end for item in reversed(row)
if item is not None), None)
start_line = next(
(item.start for item in row if item is not None), None
)
end_line = next(
(
item.end
for item in reversed(row)
if item is not None
),
None,
)
if start_line is None:
continue

Expand Down Expand Up @@ -1136,10 +1138,7 @@ def is_within_row(x):

else:
ent.label_ = self.measure_names[dims]
ent._.set(
ent.label_,
SimpleQuantity(value, unit_norm, self.unit_registry)
)
ent._.set(ent.label_, SimpleQuantity(value, unit_norm, self.unit_registry))

quantities.append(ent)

Expand Down Expand Up @@ -1224,9 +1223,7 @@ def merge_quantities_in_ranges(self, quantities: List[Span]) -> List[Span]:
]
if len(matching_patterns):
try:
new_value = RangeQuantity.from_quantities(
last._.value, ent._.value
)
new_value = RangeQuantity.from_quantities(last._.value, ent._.value)
merged[-1] = last = last.doc[
last.start
if matching_patterns[0][0] is None
Expand Down Expand Up @@ -1296,7 +1293,8 @@ def __call__(self, doc):
existing = (
list(get_spans(doc, self.span_getter))
if self.span_getter is not None
else ())
else ()
)
snippets = (
dict.fromkeys(ent.sent for ent in existing)
if self.span_getter is not None
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ ml = [
"safetensors>=0.3.0",
"transformers>=4.0.0,<5.0.0",
"accelerate>=0.20.3,<1.0.0",
"scikit-learn>=1.0.0",
]

[project.urls]
Expand Down
2 changes: 1 addition & 1 deletion tests/pipelines/test_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ def test_import_all():
import edsnlp.pipes

for name in dir(edsnlp.pipes):
if not name.startswith("_"):
if not name.startswith("_") and "endlines" not in name:
getattr(edsnlp.pipes, name)

0 comments on commit c03fa3a

Please sign in to comment.