Skip to content

Commit

Permalink
Remove Bucket driver from nucliadb_datasets (#1757)
Browse files Browse the repository at this point in the history
* Remove Bucket driver from nucliadb_datasets

* WIP on deleting datasets

* WIP

* Fix

* Fix

* Fix sort

* Fix lint

* Update nucliadb_dataset/nucliadb_dataset/settings.py

Co-authored-by: Joan Antoni RE <[email protected]>

---------

Co-authored-by: Joan Antoni RE <[email protected]>
  • Loading branch information
bloodbare and jotare authored Jan 24, 2024
1 parent 8a67587 commit 4dc7501
Show file tree
Hide file tree
Showing 24 changed files with 794 additions and 813 deletions.
2 changes: 1 addition & 1 deletion nucliadb/nucliadb/train/api/v1/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@
from fastapi import Request
from fastapi_versioning import version # type: ignore

from nucliadb.train.api.models import TrainSetPartitions
from nucliadb.train.api.utils import get_kb_partitions
from nucliadb.train.api.v1.router import KB_PREFIX, api
from nucliadb_models.resource import NucliaDBRoles
from nucliadb_models.trainset import TrainSetPartitions
from nucliadb_utils.authentication import requires_one


Expand Down
2 changes: 1 addition & 1 deletion nucliadb/nucliadb/train/api/v1/trainset.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@
from fastapi import Request
from fastapi_versioning import version # type: ignore

from nucliadb.train.api.models import TrainSetPartitions
from nucliadb.train.api.utils import get_kb_partitions
from nucliadb.train.api.v1.router import KB_PREFIX, api
from nucliadb_models.resource import NucliaDBRoles
from nucliadb_models.trainset import TrainSetPartitions
from nucliadb_utils.authentication import requires_one


Expand Down
19 changes: 1 addition & 18 deletions nucliadb_dataset/nucliadb_dataset/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,41 +20,24 @@
from enum import Enum
from typing import Dict

from nucliadb_dataset.dataset import (
NucliaCloudDataset,
NucliaDBDataset,
Task,
download_all_partitions,
)
from nucliadb_dataset.dataset import NucliaDBDataset, Task, download_all_partitions
from nucliadb_dataset.nuclia import NucliaDriver

NUCLIA_GLOBAL: Dict[str, NucliaDriver] = {}

CLIENT_ID = "CLIENT"


class DatasetType(str, Enum):
FIELD_CLASSIFICATION = "FIELD_CLASSIFICATION"
IMAGE_CLASSIFICATION = "IMAGE_CLASSIFICATION"
PARAGRAPH_CLASSIFICATION = "PARAGRAPH_CLASSIFICATION"
PARAGRAPH_STREAMING = "PARAGRAPH_STREAMING"
QUESTION_ANSWER_STREAMING = "QUESTION_ANSWER_STREAMING"
SENTENCE_CLASSIFICATION = "SENTENCE_CLASSIFICATION"
TOKEN_CLASSIFICATION = "TOKEN_CLASSIFICATION"


class ExportType(str, Enum):
DATASETS = "DATASETS"
FILESYSTEM = "FILESYSTEM"


__all__ = (
"NucliaDBDataset",
"NucliaCloudDataset",
"Task",
"download_all_partitions",
"NUCLIA_GLOBAL",
"CLIENT_ID",
"DatasetType",
"ExportType",
)
Loading

3 comments on commit 4dc7501

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark

Benchmark suite Current: 4dc7501 Previous: c67870a Ratio
nucliadb/search/tests/unit/search/test_fetch.py::test_highligh_error 13022.249376507873 iter/sec (stddev: 8.632377142703554e-7) 12887.24555746259 iter/sec (stddev: 2.385970996903907e-7) 0.99

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark

Benchmark suite Current: 4dc7501 Previous: c67870a Ratio
nucliadb/search/tests/unit/search/test_fetch.py::test_highligh_error 13331.868293645879 iter/sec (stddev: 3.31814258499317e-7) 12887.24555746259 iter/sec (stddev: 2.385970996903907e-7) 0.97

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark

Benchmark suite Current: 4dc7501 Previous: c67870a Ratio
nucliadb/search/tests/unit/search/test_fetch.py::test_highligh_error 12853.391809990324 iter/sec (stddev: 1.1808676079927311e-7) 12887.24555746259 iter/sec (stddev: 2.385970996903907e-7) 1.00

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.