From 9d8962ac88c9f984288237449f849d4294dcfbe8 Mon Sep 17 00:00:00 2001 From: fzowl Date: Fri, 6 Dec 2024 18:55:23 +0100 Subject: [PATCH 1/3] Introducing VoyageAI's new multimodal embedding model --- src/collections/config/types/vectorizer.ts | 11 +++++++++ src/collections/configure/types/vectorizer.ts | 5 ++++ src/collections/configure/vectorizer.ts | 23 +++++++++++++++++++ 3 files changed, 39 insertions(+) diff --git a/src/collections/config/types/vectorizer.ts b/src/collections/config/types/vectorizer.ts index c462c5ac..430651b6 100644 --- a/src/collections/config/types/vectorizer.ts +++ b/src/collections/config/types/vectorizer.ts @@ -24,6 +24,7 @@ export type Vectorizer = | 'multi2vec-bind' | Multi2VecPalmVectorizer | 'multi2vec-google' + | 'multi2vec-voyageai' | 'ref2vec-centroid' | 'text2vec-aws' | 'text2vec-azure-openai' @@ -184,6 +185,13 @@ export type Multi2VecGoogleConfig = { }; }; +/** The configuration for multi-media vectorization using the VoyageAI module. + * + * See the [documentation](https://weaviate.io/developers/weaviate/model-providers/transformers/embeddings-multimodal) for detailed usage. + */ +export type Multi2VecVoyageAIConfig = { +}; + /** The configuration for reference-based vectorization using the centroid method. * * See the [documentation](https://weaviate.io/developers/weaviate/modules/ref2vec-centroid) for detailed usage. @@ -431,6 +439,7 @@ export type VectorizerConfig = | Multi2VecBindConfig | Multi2VecGoogleConfig | Multi2VecPalmConfig + | Multi2VecVoyageAIConfig | Ref2VecCentroidConfig | Text2VecAWSConfig | Text2VecAzureOpenAIConfig @@ -460,6 +469,8 @@ export type VectorizerConfigType = V extends 'img2vec-neural' ? Multi2VecGoogleConfig : V extends Multi2VecPalmVectorizer ? Multi2VecPalmConfig + : V extends 'multi2vec-voyageai' + ? Multi2VecVoyageAIConfig | undefined : V extends 'ref2vec-centroid' ? Ref2VecCentroidConfig : V extends 'text2vec-aws' diff --git a/src/collections/configure/types/vectorizer.ts b/src/collections/configure/types/vectorizer.ts index d7bc66b3..5730402f 100644 --- a/src/collections/configure/types/vectorizer.ts +++ b/src/collections/configure/types/vectorizer.ts @@ -150,6 +150,9 @@ export type Multi2VecGoogleConfigCreate = { vectorizeCollectionName?: boolean; }; +export type Multi2VecVoyageAIConfigCreate = { +}; + export type Ref2VecCentroidConfigCreate = Ref2VecCentroidConfig; export type Text2VecAWSConfigCreate = Text2VecAWSConfig; @@ -197,6 +200,8 @@ export type VectorizerConfigCreateType = V extends 'img2vec-neural' ? Multi2VecPalmConfigCreate : V extends 'multi2vec-google' ? Multi2VecGoogleConfigCreate + : V extends 'multi2vec-voyageai' + ? Multi2VecVoyageAIConfigCreate | undefined : V extends 'ref2vec-centroid' ? Ref2VecCentroidConfigCreate : V extends 'text2vec-aws' diff --git a/src/collections/configure/vectorizer.ts b/src/collections/configure/vectorizer.ts index f6ff8832..6198b282 100644 --- a/src/collections/configure/vectorizer.ts +++ b/src/collections/configure/vectorizer.ts @@ -3,6 +3,7 @@ import { Multi2VecClipConfig, Multi2VecField, Multi2VecPalmConfig, + Multi2VecVoyageAIConfig, VectorIndexType, Vectorizer, VectorizerConfigType, @@ -263,6 +264,28 @@ export const vectorizer = { }, }); }, + /** + * Create a `VectorConfigCreate` object with the vectorizer set to `'multi2vec-clip'`. + * + * See the [documentation](https://weaviate.io/developers/weaviate/model-providers/transformers/embeddings-multimodal) for detailed usage. + * + * @param {ConfigureNonTextVectorizerOptions} [opts] The configuration options for the `multi2vec-voyageai` vectorizer. + * @returns {VectorConfigCreate[], N, I, 'multi2vec-voyageai'>} The configuration object. + */ + multi2VecVoyageAI: ( + opts?: ConfigureNonTextVectorizerOptions + ): VectorConfigCreate => { + const { name, vectorIndexConfig, ...config } = opts || {}; + return makeVectorizer(name, { + vectorIndexConfig, + vectorizerConfig: { + name: 'multi2vec-voyageai', + config: { + ...config, + }, + }, + }); + }, /** * Create a `VectorConfigCreate` object with the vectorizer set to `'ref2vec-centroid'`. * From 2ba3f8faf5609f774d558f9f456360ad0323a6ba Mon Sep 17 00:00:00 2001 From: fzowl Date: Fri, 6 Dec 2024 19:01:22 +0100 Subject: [PATCH 2/3] Introducing VoyageAI's new multimodal embedding model --- src/collections/configure/vectorizer.ts | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/collections/configure/vectorizer.ts b/src/collections/configure/vectorizer.ts index 6198b282..de2aed93 100644 --- a/src/collections/configure/vectorizer.ts +++ b/src/collections/configure/vectorizer.ts @@ -3,7 +3,6 @@ import { Multi2VecClipConfig, Multi2VecField, Multi2VecPalmConfig, - Multi2VecVoyageAIConfig, VectorIndexType, Vectorizer, VectorizerConfigType, @@ -281,8 +280,8 @@ export const vectorizer = { vectorizerConfig: { name: 'multi2vec-voyageai', config: { - ...config, - }, + ...config, + }, }, }); }, From 3f7de11cc599e52b9dd505939ab1bb490dffb659 Mon Sep 17 00:00:00 2001 From: fzowl Date: Sat, 7 Dec 2024 22:49:34 +0100 Subject: [PATCH 3/3] Introducing VoyageAI's new multimodal embedding model --- src/collections/config/types/vectorizer.ts | 11 +++++++++++ src/collections/configure/types/vectorizer.ts | 4 ++++ src/collections/configure/vectorizer.ts | 9 +++++++++ 3 files changed, 24 insertions(+) diff --git a/src/collections/config/types/vectorizer.ts b/src/collections/config/types/vectorizer.ts index 430651b6..9d9bde93 100644 --- a/src/collections/config/types/vectorizer.ts +++ b/src/collections/config/types/vectorizer.ts @@ -190,6 +190,17 @@ export type Multi2VecGoogleConfig = { * See the [documentation](https://weaviate.io/developers/weaviate/model-providers/transformers/embeddings-multimodal) for detailed usage. */ export type Multi2VecVoyageAIConfig = { + /** The image fields used when vectorizing. */ + imageFields?: string[]; + /** The text fields used when vectorizing. */ + textFields?: string[]; + /** The weights of the fields used for vectorization. */ + weights?: { + /** The weights of the image fields. */ + imageFields?: number[]; + /** The weights of the text fields. */ + textFields?: number[]; + }; }; /** The configuration for reference-based vectorization using the centroid method. diff --git a/src/collections/configure/types/vectorizer.ts b/src/collections/configure/types/vectorizer.ts index 5730402f..b505d3e9 100644 --- a/src/collections/configure/types/vectorizer.ts +++ b/src/collections/configure/types/vectorizer.ts @@ -151,6 +151,10 @@ export type Multi2VecGoogleConfigCreate = { }; export type Multi2VecVoyageAIConfigCreate = { + /** The image fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */ + imageFields?: string[] | Multi2VecField[]; + /** The text fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */ + textFields?: string[] | Multi2VecField[]; }; export type Ref2VecCentroidConfigCreate = Ref2VecCentroidConfig; diff --git a/src/collections/configure/vectorizer.ts b/src/collections/configure/vectorizer.ts index de2aed93..b4721edd 100644 --- a/src/collections/configure/vectorizer.ts +++ b/src/collections/configure/vectorizer.ts @@ -3,6 +3,7 @@ import { Multi2VecClipConfig, Multi2VecField, Multi2VecPalmConfig, + Multi2VecVoyageAIConfig, VectorIndexType, Vectorizer, VectorizerConfigType, @@ -275,12 +276,20 @@ export const vectorizer = { opts?: ConfigureNonTextVectorizerOptions ): VectorConfigCreate => { const { name, vectorIndexConfig, ...config } = opts || {}; + const imageFields = config.imageFields?.map(mapMulti2VecField); + const textFields = config.textFields?.map(mapMulti2VecField); + let weights: Multi2VecVoyageAIConfig['weights'] = {}; + weights = formatMulti2VecFields(weights, 'imageFields', imageFields); + weights = formatMulti2VecFields(weights, 'textFields', textFields); return makeVectorizer(name, { vectorIndexConfig, vectorizerConfig: { name: 'multi2vec-voyageai', config: { ...config, + imageFields: imageFields?.map((f) => f.name), + textFields: textFields?.map((f) => f.name), + weights: Object.keys(weights).length === 0 ? undefined : weights, }, }, });