Skip to content

Commit

Permalink
feat: support more vector item metadata value types (#972)
Browse files Browse the repository at this point in the history
In the previous version of the service, we only supported
string-valued metadata. In the latest version, we support the
following value types:

string
int
double
bool
list of strings
We update the VectorItem and SearchHit interfaces to support
metadata of these types (where int and float are both represented as
number).

We update sendUpsertItemBatch to serialize the metadata
appropriately. We distinguish numbers that are integers vs
floating point.

Similarly we update sendSearch to deserialize the metadata
appropriately.

We add an extra integration test to ensure each of the metadata value
types are stored and retrieved properly.
  • Loading branch information
malandis authored Oct 20, 2023
1 parent 8455f31 commit 2b7b326
Show file tree
Hide file tree
Showing 9 changed files with 6,598 additions and 62,270 deletions.
34,281 changes: 3,153 additions & 31,128 deletions packages/client-sdk-nodejs/package-lock.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion packages/client-sdk-nodejs/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
"uuid": "8.3.2"
},
"dependencies": {
"@gomomento/generated-types": "0.85.0",
"@gomomento/generated-types": "0.87.0",
"@gomomento/sdk-core": "file:../core",
"@grpc/grpc-js": "1.9.0",
"@types/google-protobuf": "3.15.6",
Expand Down
116 changes: 100 additions & 16 deletions packages/client-sdk-nodejs/src/internal/vector-index-data-client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@ import {
validateIndexName,
validateTopK,
} from '@gomomento/sdk-core/dist/src/internal/utils';
import {normalizeSdkError} from '@gomomento/sdk-core/dist/src/errors';
import {
UnknownError,
normalizeSdkError,
} from '@gomomento/sdk-core/dist/src/errors';
import {ALL_VECTOR_METADATA} from '@gomomento/sdk-core/dist/src/clients/IVectorIndexClient';

export class VectorIndexDataClient implements IVectorIndexDataClient {
Expand Down Expand Up @@ -77,37 +80,92 @@ export class VectorIndexDataClient implements IVectorIndexDataClient {
indexName: string,
items: Array<VectorIndexItem>
): Promise<VectorUpsertItemBatch.Response> {
let request: vectorindex._UpsertItemBatchRequest;
try {
validateIndexName(indexName);

// Create the request here to catch any metadata validation errors.
request = VectorIndexDataClient.buildUpsertItemBatchRequest(
indexName,
items
);
} catch (err) {
return new VectorUpsertItemBatch.Error(normalizeSdkError(err as Error));
}
return await this.sendUpsertItemBatch(indexName, items);
return await this.sendUpsertItemBatch(indexName, request);
}

private async sendUpsertItemBatch(
private static buildUpsertItemBatchRequest(
indexName: string,
items: Array<VectorIndexItem>
): Promise<VectorUpsertItemBatch.Response> {
const request = new vectorindex._UpsertItemBatchRequest({
): vectorindex._UpsertItemBatchRequest {
return new vectorindex._UpsertItemBatchRequest({
index_name: indexName,
items: items.map(item => {
return new vectorindex._Item({
id: item.id,
vector: new vectorindex._Vector({elements: item.vector}),
metadata:
item.metadata === undefined
? []
: Object.entries(item.metadata).map(
([key, value]) =>
new vectorindex._Metadata({
field: key,
string_value: value,
})
),
VectorIndexDataClient.convertItemMetadataToProtobufMetadata(item),
});
}),
});
}

private static convertItemMetadataToProtobufMetadata(
item: VectorIndexItem
): vectorindex._Metadata[] {
if (item.metadata === undefined) {
return [];
}
return Object.entries(item.metadata).map(([key, value]) => {
if (typeof value === 'string') {
return new vectorindex._Metadata({
field: key,
string_value: value,
});
} else if (typeof value === 'number') {
if (Number.isInteger(value)) {
return new vectorindex._Metadata({
field: key,
integer_value: value,
});
} else {
return new vectorindex._Metadata({
field: key,
double_value: value,
});
}
} else if (typeof value === 'boolean') {
return new vectorindex._Metadata({
field: key,
boolean_value: value,
});
} else if (
Array.isArray(value) &&
value.every(item => typeof item === 'string')
) {
return new vectorindex._Metadata({
field: key,
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
list_of_strings_value:
// eslint-disable-next-line @typescript-eslint/no-unsafe-call
new vectorindex._Metadata._ListOfStrings({
values: value,
}),
});
} else {
throw new InvalidArgumentError(
`Metadata value for field '${key}' is not a valid type. Value is of type '${typeof value} and is not a string, number, boolean, or array of strings.'`
);
}
});
}

private async sendUpsertItemBatch(
indexName: string,
request: vectorindex._UpsertItemBatchRequest
): Promise<VectorUpsertItemBatch.Response> {
return await new Promise(resolve => {
this.client.UpsertItemBatch(
request,
Expand Down Expand Up @@ -212,9 +270,35 @@ export class VectorIndexDataClient implements IVectorIndexDataClient {
id: hit.id,
distance: hit.distance,
metadata: hit.metadata.reduce((acc, metadata) => {
acc[metadata.field] = metadata.string_value;
const field = metadata.field;
switch (metadata.value) {
case 'string_value':
acc[field] = metadata.string_value;
break;
case 'integer_value':
acc[field] = metadata.integer_value;
break;
case 'double_value':
acc[field] = metadata.double_value;
break;
case 'boolean_value':
acc[field] = metadata.boolean_value;
break;
case 'list_of_strings_value':
acc[field] = metadata.list_of_strings_value.values;
break;
default:
resolve(
new VectorSearch.Error(
new UnknownError(
'Search responded with an unknown result'
)
)
);
break;
}
return acc;
}, {} as Record<string, string>),
}, {} as Record<string, string | number | boolean | Array<string>>),
}))
)
);
Expand Down
Loading

0 comments on commit 2b7b326

Please sign in to comment.