Skip to content

Commit

Permalink
Merge pull request #241 from weaviate/add-support-for-multi2vec-jina
Browse files Browse the repository at this point in the history
Add factory and unit tests for multi2vecjina module
  • Loading branch information
tsmith023 authored Dec 19, 2024
2 parents d197556 + 3a6760a commit edb27cb
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 5 deletions.
36 changes: 33 additions & 3 deletions src/collections/config/types/vectorizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ export type Vectorizer =
| 'multi2vec-bind'
| Multi2VecPalmVectorizer
| 'multi2vec-google'
| 'multi2vec-jinaai'
| 'multi2vec-voyageai'
| 'ref2vec-centroid'
| 'text2vec-aws'
Expand Down Expand Up @@ -170,7 +171,7 @@ export type Multi2VecGoogleConfig = {
videoFields?: string[];
/** The model ID in use. */
modelId?: string;
/** The number of dimensions in use. */
/** The dimensionality of the vector once embedded. */
dimensions?: number;
/** Whether the collection name is vectorized. */
vectorizeCollectionName?: boolean;
Expand All @@ -185,6 +186,32 @@ export type Multi2VecGoogleConfig = {
};
};

/** The configuration for multi-media vectorization using the Jina module.
*
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings-multimodal) for detailed usage.
*/
export type Multi2VecJinaAIConfig = {
/** The base URL to use where API requests should go. */
baseURL?: string;
/** The dimensionality of the vector once embedded. */
dimensions?: number;
/** The image fields used when vectorizing. */
imageFields?: string[];
/** The model to use. */
model?: string;
/** The text fields used when vectorizing. */
textFields?: string[];
/** Whether the collection name is vectorized. */
vectorizeCollectionName?: boolean;
/** The weights of the fields used for vectorization. */
weights?: {
/** The weights of the image fields. */
imageFields?: number[];
/** The weights of the text fields. */
textFields?: number[];
};
};

/** The configuration for multi-media vectorization using the VoyageAI module.
*
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/transformers/embeddings-multimodal) for detailed usage.
Expand Down Expand Up @@ -359,7 +386,7 @@ export type Text2VecOllamaConfig = {
export type Text2VecOpenAIConfig = {
/** The base URL to use where API requests should go. */
baseURL?: string;
/** The dimensions to use. */
/** The dimensionality of the vector once embedded. */
dimensions?: number;
/** The model to use. */
model?: 'text-embedding-3-small' | 'text-embedding-3-large' | 'text-embedding-ada-002' | string;
Expand Down Expand Up @@ -434,7 +461,7 @@ export type Text2VecVoyageAIConfig = {
export type Text2VecWeaviateConfig = {
/** The base URL to use where API requests should go. */
baseURL?: string;
/** The dimensions to use. */
/** The dimensionality of the vector once embedded. */
dimensions?: number;
/** The model to use. */
model?: 'Snowflake/snowflake-arctic-embed-m-v1.5' | string;
Expand All @@ -449,6 +476,7 @@ export type VectorizerConfig =
| Multi2VecClipConfig
| Multi2VecBindConfig
| Multi2VecGoogleConfig
| Multi2VecJinaAIConfig
| Multi2VecPalmConfig
| Multi2VecVoyageAIConfig
| Ref2VecCentroidConfig
Expand Down Expand Up @@ -478,6 +506,8 @@ export type VectorizerConfigType<V> = V extends 'img2vec-neural'
? Multi2VecBindConfig | undefined
: V extends 'multi2vec-google'
? Multi2VecGoogleConfig
: V extends 'multi2vec-jinaai'
? Multi2VecJinaAIConfig | undefined
: V extends Multi2VecPalmVectorizer
? Multi2VecPalmConfig
: V extends 'multi2vec-voyageai'
Expand Down
17 changes: 16 additions & 1 deletion src/collections/configure/types/vectorizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,19 @@ export type Multi2VecCohereConfigCreate = {
vectorizeCollectionName?: boolean;
};

export type Multi2VecJinaAIConfigCreate = {
/** The base URL to use where API requests should go. */
baseURL?: string;
/** The dimensionality of the vector once embedded. */
dimensions?: number;
/** The image fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */
imageFields?: string[] | Multi2VecField[];
/** The text fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */
textFields?: string[] | Multi2VecField[];
/** Whether to vectorize the collection name. */
vectorizeCollectionName?: boolean;
};

/** @deprecated Use `Multi2VecGoogleConfigCreate` instead.*/
export type Multi2VecPalmConfigCreate = Multi2VecGoogleConfigCreate;

Expand All @@ -144,7 +157,7 @@ export type Multi2VecGoogleConfigCreate = {
videoFields?: string[] | Multi2VecField[];
/** The model ID to use. */
modelId?: string;
/** The number of dimensions to use. */
/** The dimensionality of the vector once embedded. */
dimensions?: number;
/** Whether to vectorize the collection name. */
vectorizeCollectionName?: boolean;
Expand Down Expand Up @@ -200,6 +213,8 @@ export type VectorizerConfigCreateType<V> = V extends 'img2vec-neural'
? Multi2VecCohereConfigCreate | undefined
: V extends 'multi2vec-bind'
? Multi2VecBindConfigCreate | undefined
: V extends 'multi2vec-jinaai'
? Multi2VecJinaAIConfigCreate | undefined
: V extends 'multi2vec-palm'
? Multi2VecPalmConfigCreate
: V extends 'multi2vec-google'
Expand Down
48 changes: 47 additions & 1 deletion src/collections/configure/unit.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -621,7 +621,53 @@ describe('Unit testing of the vectorizer factory class', () => {
},
});
});

it('should create the correct Multi2VecJinaAIConfig type with defaults', () => {
const config = configure.vectorizer.multi2VecJinaAI();
expect(config).toEqual<VectorConfigCreate<never, undefined, 'hnsw', 'multi2vec-jinaai'>>({
name: undefined,
vectorIndex: {
name: 'hnsw',
config: undefined,
},
vectorizer: {
name: 'multi2vec-jinaai',
config: undefined,
},
});
});
it('should create the correct Multi2VecJinaAIConfig type with all values and weights', () => {
const config = configure.vectorizer.multi2VecJinaAI({
name: 'test',
imageFields: [
{ name: 'field1', weight: 0.1 },
{ name: 'field2', weight: 0.2 },
],
textFields: [
{ name: 'field3', weight: 0.3 },
{ name: 'field4', weight: 0.4 },
],
vectorizeCollectionName: true,
});
expect(config).toEqual<VectorConfigCreate<never, 'test', 'hnsw', 'multi2vec-jinaai'>>({
name: 'test',
vectorIndex: {
name: 'hnsw',
config: undefined,
},
vectorizer: {
name: 'multi2vec-jinaai',
config: {
imageFields: ['field1', 'field2'],
textFields: ['field3', 'field4'],
vectorizeCollectionName: true,
weights: {
imageFields: [0.1, 0.2],
textFields: [0.3, 0.4],
},
},
},
});
});
it('should create the correct Multi2VecPalmConfig type using deprecated method with defaults', () => {
const config = configure.vectorizer.multi2VecPalm({
projectId: 'project-id',
Expand Down
33 changes: 33 additions & 0 deletions src/collections/configure/vectorizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,39 @@ export const vectorizer = {
},
});
},
/**
* Create a `VectorConfigCreate` object with the vectorizer set to `'multi2vec-jinaai'`.
*
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings-multimodal) for detailed usage.
*
* @param {ConfigureNonTextVectorizerOptions<N, I, 'multi2vec-jinaai'>} [opts] The configuration options for the `multi2vec-jinaai` vectorizer.
* @returns {VectorConfigCreate<PrimitiveKeys<T>[], N, I, 'multi2vec-jinaai'>} The configuration object.
*/
multi2VecJinaAI: <N extends string | undefined = undefined, I extends VectorIndexType = 'hnsw'>(
opts?: ConfigureNonTextVectorizerOptions<N, I, 'multi2vec-jinaai'>
): VectorConfigCreate<never, N, I, 'multi2vec-jinaai'> => {
const { name, vectorIndexConfig, ...config } = opts || {};
const imageFields = config.imageFields?.map(mapMulti2VecField);
const textFields = config.textFields?.map(mapMulti2VecField);
let weights: Multi2VecBindConfig['weights'] = {};
weights = formatMulti2VecFields(weights, 'imageFields', imageFields);
weights = formatMulti2VecFields(weights, 'textFields', textFields);
return makeVectorizer(name, {
vectorIndexConfig,
vectorizerConfig: {
name: 'multi2vec-jinaai',
config:
Object.keys(config).length === 0
? undefined
: {
...config,
imageFields: imageFields?.map((f) => f.name),
textFields: textFields?.map((f) => f.name),
weights: Object.keys(weights).length === 0 ? undefined : weights,
},
},
});
},
/**
* Create a `VectorConfigCreate` object with the vectorizer set to `'multi2vec-palm'`.
*
Expand Down

0 comments on commit edb27cb

Please sign in to comment.