Skip to content

Commit

Permalink
Merge branch 'main' into remove-datasets-from-datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
bloodbare authored Jan 23, 2024
2 parents dee9428 + 4d7edf7 commit 656eae6
Show file tree
Hide file tree
Showing 20 changed files with 340 additions and 190 deletions.
54 changes: 52 additions & 2 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: PyPI Releases
name: PyPI and Helm Releases

on:
push:
Expand Down Expand Up @@ -155,4 +155,54 @@ jobs:
nuclia/nucliadb:${{ steps.version_step.outputs.version_number }}
nuclia/nucliadb:${{ steps.version_step.outputs.hash }}
cache-from: type=gha
cache-to: type=gha,mode=min
cache-to: type=gha,mode=min

deploy:
name: Deploy Helm chart and trigger internal CI
runs-on: ubuntu-latest
needs: push-docker

steps:
- name: Checkout repository
uses: actions/checkout@v3

- name: Calculate short sha
id: env-vars
run: |-
HASH=`git rev-parse --short HEAD`
echo "short_sha=$HASH" >> $GITHUB_OUTPUT
- name: Set helm package image
id: version_step
run: |-
sed -i.bak "s#IMAGE_TO_REPLACE#$IMAGE_NAME:${{ steps.env-vars.outputs.short_sha }}#" ./charts/nucliadb_writer/values.yaml
sed -i.bak "s#CONTAINER_REGISTRY_TO_REPLACE#$CONTAINER_REGISTRY#" ./charts/nucliadb_writer/values.yaml
VERSION=`cat VERSION`
VERSION_SHA=$VERSION+${{ steps.env-vars.outputs.short_sha }}
sed -i.bak "s#99999.99999.99999#$VERSION_SHA#" ./charts/nucliadb_writer/Chart.yaml
echo "version_number=$VERSION_SHA" >> $GITHUB_OUTPUT
- name: Configure Git
run: |
git config user.name "$GITHUB_ACTOR"
git config user.email "[email protected]"
- name: Install Helm
uses: azure/setup-helm@v3
with:
version: v3.4.0

- name: Push helm package
run: |-
helm lint charts/nucliadb
helm package charts/nucliadb
curl --data-binary "@nucliadb-${{ steps.version_step.outputs.version_number }}.tgz" ${{ secrets.HELM_CHART_URL }}/api/charts
# Not working yet, disabled for now
# - name: Repository Dispatch
# uses: peter-evans/repository-dispatch@v2
# with:
# token: ${{ secrets.GH_CICD_PUBLIC }}
# repository: nuclia/nucliadb_deploy
# event-type: promote
# client-payload: '{"component": "nucliadb_standalone", "chart-version": "${{ steps.version_step.outputs.version_number }}" }'
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.40.1
2.41.0
4 changes: 4 additions & 0 deletions charts/nucliadb/templates/sts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ spec:
heritage: "{{ .Release.Service }}"
template:
metadata:
{{- with .Values.podAnnotations }}
annotations:
{{ toYaml . | indent 8 }}
{{- end }}
name: nucliadb
labels:
app: nucliadb
Expand Down
1 change: 1 addition & 0 deletions charts/nucliadb/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ image: nuclia/nucliadb
imageVersion:

replicas: 2
podAnnotations: {}

# app settings
env:
Expand Down
4 changes: 2 additions & 2 deletions e2e/test_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ def _test_predict_proxy_chat(kbid: str):
json={
"question": "Who is the best football player?",
"query_context": [
"Many football players have existed. Cristiano Ronaldo and Messi among them, but Messi is by far the greatest."
"Many football players have existed. Messi is by far the greatest."
],
"user_id": "[email protected]",
},
Expand Down Expand Up @@ -252,4 +252,4 @@ def raise_for_status(resp):
print("Error response")
print("Status code:", resp.status_code)
print(resp.text)
raise
raise
5 changes: 5 additions & 0 deletions nucliadb/nucliadb/common/http_clients/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,11 @@ class StatusResultV2(pydantic.BaseModel):
title="Schedule ETA",
description="Estimated time until the resource is scheduled.",
)
schedule_order: int = pydantic.Field(
0,
title="Schedule Order",
description="Order of the resource in the schedule queue.",
)


class StatusResultsV2(pydantic.BaseModel):
Expand Down
3 changes: 3 additions & 0 deletions nucliadb/nucliadb/ingest/orm/brain.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,9 @@ def _set_resource_labels(self, basic: Basic, origin: Optional[Origin]):
if origin.source_id != "":
self.labels["u"].append(f"s/{origin.source_id}")

if origin.path:
self.labels["p"].append(origin.path.lstrip("/"))

# origin contributors
for contrib in origin.colaborators:
self.labels["u"].append(f"o/{contrib}")
Expand Down
4 changes: 3 additions & 1 deletion nucliadb/nucliadb/search/requesters/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,9 @@ async def node_query(
target_replicas: Optional[list[str]] = None,
read_only: bool = True,
) -> tuple[list[T], bool, list[tuple[str, str, str]], list[str]]:
read_only = read_only and has_feature(const.Features.READ_REPLICA_SEARCHES)
read_only = read_only and has_feature(
const.Features.READ_REPLICA_SEARCHES, context={"kbid": kbid}
)

shard_manager = get_shard_manager()

Expand Down
1 change: 1 addition & 0 deletions nucliadb/nucliadb/search/search/summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
async def summarize(kbid: str, request: SummarizeRequest) -> SummarizedResponse:
predict_request = SummarizeModel()
predict_request.user_prompt = request.user_prompt
predict_request.summary_kind = request.summary_kind

for rid, field_id, extracted_text in await get_extracted_texts(
kbid, request.resources
Expand Down
57 changes: 56 additions & 1 deletion nucliadb/nucliadb/tests/integration/search/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,7 @@ async def test_catalog_can_filter_by_processing_status(
@pytest.mark.skip(reason="Needs sc-5626")
@pytest.mark.asyncio
@pytest.mark.parametrize("knowledgebox", ("EXPERIMENTAL", "STABLE"), indirect=True)
async def test_catalog_prefix_search(
async def test_(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
knowledgebox,
Expand Down Expand Up @@ -1499,3 +1499,58 @@ def check_fuzzy_paragraphs(search_response, *, fuzzy_result: bool, n_expected: i
assert paragraph["fuzzy_result"] is fuzzy_result
found += 1
assert found == n_expected


@pytest.mark.asyncio
@pytest.mark.parametrize("knowledgebox", ("EXPERIMENTAL", "STABLE"), indirect=True)
async def test_search_by_path_filter(
nucliadb_reader: AsyncClient,
nucliadb_writer: AsyncClient,
nucliadb_grpc: WriterStub,
knowledgebox,
):
paths = ["/foo", "foo/bar", "foo/bar/1", "foo/bar/2", "foo/bar/3", "foo/bar/4"]

for path in paths:
resp = await nucliadb_writer.post(
f"/kb/{knowledgebox}/resources",
headers={"X-Synchronous": "true"},
json={
"title": f"My resource: {path}",
"summary": "Some summary",
"origin": {
"path": path,
},
},
)
assert resp.status_code == 201

resp = await nucliadb_reader.get(
f"/kb/{knowledgebox}/catalog",
params={
"query": "",
},
)
assert resp.status_code == 200
assert len(resp.json()["resources"]) == len(paths)

# Get the list of all
resp = await nucliadb_reader.get(
f"/kb/{knowledgebox}/search?filters=/origin.path/foo"
)
assert resp.status_code == 200
assert len(resp.json()["resources"]) == len(paths)

# Get the list of under foo/bar
resp = await nucliadb_reader.get(
f"/kb/{knowledgebox}/search?filters=/origin.path/foo/bar"
)
assert resp.status_code == 200
assert len(resp.json()["resources"]) == len(paths) - 1

# Get the list of under foo/bar/4
resp = await nucliadb_reader.get(
f"/kb/{knowledgebox}/search?filters=/origin.path/foo/bar/4"
)
assert resp.status_code == 200
assert len(resp.json()["resources"]) == 1
2 changes: 2 additions & 0 deletions nucliadb/nucliadb/writer/resource/origin.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ def parse_origin(origin: Origin, origin_payload: InputOrigin):
origin.related.extend(origin_payload.related)
if origin_payload.metadata:
origin.metadata.update(origin_payload.metadata)
if origin_payload.path:
origin.path = origin_payload.path
origin.source = Origin.Source.API


Expand Down
2 changes: 2 additions & 0 deletions nucliadb_models/nucliadb_models/labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
"f": [], # field keyword field (field/keyword)
"fg": [], # field keyword (keywords) flat
"m": [], # origin metadata in the form of (key/value). Max key/value size is 255
"p": [], # origin metadata in the form of (key/value). Max key/value size is 255
}


Expand All @@ -44,6 +45,7 @@
"metadata.languages": "s/s",
"origin.tags": "t",
"origin.metadata": "m",
"origin.path": "p",
"classification.labels": "l",
"entities": "e",
"field": "f",
Expand Down
1 change: 1 addition & 0 deletions nucliadb_models/nucliadb_models/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,7 @@ class InputOrigin(BaseModel):
# old field was "colaborators"
filename: Optional[str] = None
related: List[str] = []
path: Optional[str] = None


class Origin(InputOrigin):
Expand Down
12 changes: 12 additions & 0 deletions nucliadb_models/nucliadb_models/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -782,13 +782,19 @@ class SummarizeResourceModel(BaseModel):
fields: Dict[str, str] = {}


class SummaryKind(str, Enum):
SIMPLE = "simple"
EXTENDED = "extended"


class SummarizeModel(BaseModel):
"""
Model for the summarize predict api request payload
"""

resources: Dict[str, SummarizeResourceModel] = {}
user_prompt: Optional[str] = None
summary_kind: SummaryKind = SummaryKind.SIMPLE


class SummarizeRequest(BaseModel):
Expand All @@ -809,6 +815,12 @@ class SummarizeRequest(BaseModel):
description="Uids of the resources to summarize",
)

summary_kind: SummaryKind = Field(
default=SummaryKind.SIMPLE,
title="Summary kind",
description="Option to customize how the summary will be",
)


class SummarizedResource(BaseModel):
summary: str
Expand Down
4 changes: 4 additions & 0 deletions nucliadb_node/tests/test_search_relations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,10 @@ async fn test_search_relations_prefixed(
shard_id: shard_id.clone(),
prefix: Some(RelationPrefixSearchRequest {
prefix: "cat".to_string(),
node_filters: vec![RelationNodeFilter {
node_subtype: None,
node_type: NodeType::Entity as i32,
}],
..Default::default()
}),
..Default::default()
Expand Down
364 changes: 182 additions & 182 deletions nucliadb_protos/python/nucliadb_protos/resources_pb2.py

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion nucliadb_protos/python/nucliadb_protos/resources_pb2.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ class Origin(google.protobuf.message.Message):
COLABORATORS_FIELD_NUMBER: builtins.int
FILENAME_FIELD_NUMBER: builtins.int
RELATED_FIELD_NUMBER: builtins.int
PATH_FIELD_NUMBER: builtins.int
source: global___Origin.Source.ValueType
source_id: builtins.str
url: builtins.str
Expand All @@ -300,6 +301,7 @@ class Origin(google.protobuf.message.Message):
filename: builtins.str
@property
def related(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: ...
path: builtins.str
def __init__(
self,
*,
Expand All @@ -313,9 +315,10 @@ class Origin(google.protobuf.message.Message):
colaborators: collections.abc.Iterable[builtins.str] | None = ...,
filename: builtins.str = ...,
related: collections.abc.Iterable[builtins.str] | None = ...,
path: builtins.str = ...,
) -> None: ...
def HasField(self, field_name: typing_extensions.Literal["created", b"created", "modified", b"modified"]) -> builtins.bool: ...
def ClearField(self, field_name: typing_extensions.Literal["colaborators", b"colaborators", "created", b"created", "filename", b"filename", "metadata", b"metadata", "modified", b"modified", "related", b"related", "source", b"source", "source_id", b"source_id", "tags", b"tags", "url", b"url"]) -> None: ...
def ClearField(self, field_name: typing_extensions.Literal["colaborators", b"colaborators", "created", b"created", "filename", b"filename", "metadata", b"metadata", "modified", b"modified", "path", b"path", "related", b"related", "source", b"source", "source_id", b"source_id", "tags", b"tags", "url", b"url"]) -> None: ...

global___Origin = Origin

Expand Down
1 change: 1 addition & 0 deletions nucliadb_protos/resources.proto
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ message Origin {
repeated string colaborators = 8;
string filename = 9;
repeated string related = 10;
string path = 11;
}

message Extra {
Expand Down
2 changes: 2 additions & 0 deletions nucliadb_protos/rust/src/resources.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ pub struct Origin {
pub filename: ::prost::alloc::string::String,
#[prost(string, repeated, tag="10")]
pub related: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
#[prost(string, tag="11")]
pub path: ::prost::alloc::string::String,
}
/// Nested message and enum types in `Origin`.
pub mod origin {
Expand Down
2 changes: 2 additions & 0 deletions nucliadb_telemetry/nucliadb_telemetry/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,5 +143,7 @@ def setup_sentry_logging_integration(for_loggers: List[str]) -> None:
if settings.sentry_url:
sentry_sdk.init(
dsn=settings.sentry_url,
environment=settings.environment,
integrations=[SentryLoggingIntegration(for_loggers)],
)
sentry_sdk.set_tag("zone", settings.zone)

0 comments on commit 656eae6

Please sign in to comment.