From 1cc3b572ee0e22b0a4ad967ae888f2d767417176 Mon Sep 17 00:00:00 2001 From: Anush Date: Mon, 9 Oct 2023 21:03:51 +0530 Subject: [PATCH] test: verify canonical values (#4) --- src/fastembed.ts | 1 - tests/fastembed_allminilm.test.ts | 20 ++++++++++-- tests/fastembed_bgebase.test.ts | 17 ++++++++++ tests/fastembed_bgesmall.test.ts | 53 +++++++++++++++++++++++++------ tests/fastembed_mle5large.test.ts | 53 +++++++++++++++++++++++++------ 5 files changed, 123 insertions(+), 21 deletions(-) diff --git a/src/fastembed.ts b/src/fastembed.ts index b2b2bca..76542e5 100644 --- a/src/fastembed.ts +++ b/src/fastembed.ts @@ -210,7 +210,6 @@ export class FlagEmbedding extends Embedding { // The model directory name in the GCS storage is "fast-multilingual-e5-large", like the others if (model === EmbeddingModel.MLE5Large) { model = "intfloat" + model.substring(model.indexOf("-")); - console.log("It is:", model); } const url = `https://storage.googleapis.com/qdrant-fastembed/${model}.tar.gz`; const fileStream = fs.createWriteStream(outputFilePath); diff --git a/tests/fastembed_allminilm.test.ts b/tests/fastembed_allminilm.test.ts index 45e2c01..90f7e06 100644 --- a/tests/fastembed_allminilm.test.ts +++ b/tests/fastembed_allminilm.test.ts @@ -1,5 +1,5 @@ -import { expect, test } from 'vitest' -import { FlagEmbedding, EmbeddingModel } from "../src" +import { expect, test } from "vitest"; +import { FlagEmbedding, EmbeddingModel } from "../src"; test('Init EmbeddingModel', async () => { const model = await FlagEmbedding.init({ @@ -69,3 +69,19 @@ test("FlagEmbedding passageEmbed", async () => { expect(embeddings.length).toBe(1); }); +test("FlagEmbedding canonical values", async () => { + const flagEmbedding = await FlagEmbedding.init({ + model: EmbeddingModel.AllMiniLML6V2, + maxLength: 512, + }); + const expected = [ + 0.02591, 0.00573, 0.01147, 0.03796, -0.0232, -0.0549, 0.01404, -0.0107, + -0.0244, -0.01822, + ]; + + const embeddings = (await flagEmbedding.embed(["hello world"]).next()).value!; + expect(embeddings).toBeDefined(); + for (let i = 0; i < expected.length; i++) { + expect(embeddings[0][i]).toBeCloseTo(expected[i], 3); + } +}); diff --git a/tests/fastembed_bgebase.test.ts b/tests/fastembed_bgebase.test.ts index 1f61694..39c951a 100644 --- a/tests/fastembed_bgebase.test.ts +++ b/tests/fastembed_bgebase.test.ts @@ -68,3 +68,20 @@ test("FlagEmbedding passageEmbed", async () => { expect(embeddings).toBeDefined(); expect(embeddings.length).toBe(1); }); + +test("FlagEmbedding canonical values", async () => { + const flagEmbedding = await FlagEmbedding.init({ + model: EmbeddingModel.BGEBaseEN, + maxLength: 512, + }); + const expected = [ + 0.0114, 0.03722, 0.02941, 0.0123, 0.03451, 0.00876, 0.02356, 0.05414, + -0.0294, -0.0547, + ]; + + const embeddings = (await flagEmbedding.embed(["hello world"]).next()).value!; + expect(embeddings).toBeDefined(); + for (let i = 0; i < expected.length; i++) { + expect(embeddings[0][i]).toBeCloseTo(expected[i], 3); + } +}); \ No newline at end of file diff --git a/tests/fastembed_bgesmall.test.ts b/tests/fastembed_bgesmall.test.ts index d8e6d59..a74efa5 100644 --- a/tests/fastembed_bgesmall.test.ts +++ b/tests/fastembed_bgesmall.test.ts @@ -1,11 +1,11 @@ -import { expect, test } from 'vitest' -import { FlagEmbedding, EmbeddingModel } from "../src" +import { expect, test } from "vitest"; +import { FlagEmbedding, EmbeddingModel } from "../src"; -test('Init EmbeddingModel', async () => { - const model = await FlagEmbedding.init({ - model: EmbeddingModel.BGESmallEN - }); - expect(model).toBeDefined(); +test("Init EmbeddingModel", async () => { + const model = await FlagEmbedding.init({ + model: EmbeddingModel.BGESmallEN, + }); + expect(model).toBeDefined(); }); test("FlagEmbedding embed", async () => { @@ -26,7 +26,14 @@ test("FlagEmbedding embed batch", async () => { maxLength: 512, }); - const embeddingsBatch = flagEmbedding.embed(["This is a test", "Some text", "Some more test", "This is a test", "Some text", "Some more test"]); + const embeddingsBatch = flagEmbedding.embed([ + "This is a test", + "Some text", + "Some more test", + "This is a test", + "Some text", + "Some more test", + ]); for await (const embeddings of embeddingsBatch) { expect(embeddings).toBeDefined(); expect(embeddings.length).toBe(6); @@ -39,7 +46,17 @@ test("FlagEmbedding embed small batch", async () => { model: EmbeddingModel.BGESmallEN, maxLength: 512, }); - const embeddingsBatch = flagEmbedding.embed(["This is a test", "Some text", "Some more test", "This is a test", "Some text", "Some more test"], 1); + const embeddingsBatch = flagEmbedding.embed( + [ + "This is a test", + "Some text", + "Some more test", + "This is a test", + "Some text", + "Some more test", + ], + 1 + ); for await (const embeddings of embeddingsBatch) { expect(embeddings).toBeDefined(); expect(embeddings.length).toBe(1); @@ -69,3 +86,21 @@ test("FlagEmbedding passageEmbed", async () => { expect(embeddings).toBeDefined(); expect(embeddings.length).toBe(1); }); + +test("FlagEmbedding canonical values", async () => { + const flagEmbedding = await FlagEmbedding.init({ + model: EmbeddingModel.BGESmallEN, + + maxLength: 512, + }); + const expected = [ + -0.02313, -0.02552, 0.017357, -0.06393, -0.00061, 0.022123, -0.01472, + 0.039255, 0.034447, 0.004598, + ]; + + const embeddings = (await flagEmbedding.embed(["hello world"]).next()).value!; + expect(embeddings).toBeDefined(); + for (let i = 0; i < expected.length; i++) { + expect(embeddings[0][i]).toBeCloseTo(expected[i], 3); + } +}); diff --git a/tests/fastembed_mle5large.test.ts b/tests/fastembed_mle5large.test.ts index c56e218..f0fd5f3 100644 --- a/tests/fastembed_mle5large.test.ts +++ b/tests/fastembed_mle5large.test.ts @@ -1,11 +1,11 @@ -import { expect, test } from 'vitest' -import { FlagEmbedding, EmbeddingModel } from "../src" +import { expect, test } from "vitest"; +import { FlagEmbedding, EmbeddingModel } from "../src"; -test('Init EmbeddingModel', async () => { - const model = await FlagEmbedding.init({ - model: EmbeddingModel.MLE5Large - }); - expect(model).toBeDefined(); +test("Init EmbeddingModel", async () => { + const model = await FlagEmbedding.init({ + model: EmbeddingModel.MLE5Large, + }); + expect(model).toBeDefined(); }); test("FlagEmbedding embed", async () => { @@ -26,7 +26,14 @@ test("FlagEmbedding embed batch", async () => { maxLength: 512, }); - const embeddingsBatch = flagEmbedding.embed(["This is a test", "Some text", "Some more test", "This is a test", "Some text", "Some more test"]); + const embeddingsBatch = flagEmbedding.embed([ + "This is a test", + "Some text", + "Some more test", + "This is a test", + "Some text", + "Some more test", + ]); for await (const embeddings of embeddingsBatch) { expect(embeddings).toBeDefined(); expect(embeddings.length).toBe(6); @@ -39,7 +46,17 @@ test("FlagEmbedding embed small batch", async () => { model: EmbeddingModel.MLE5Large, maxLength: 512, }); - const embeddingsBatch = flagEmbedding.embed(["This is a test", "Some text", "Some more test", "This is a test", "Some text", "Some more test"], 1); + const embeddingsBatch = flagEmbedding.embed( + [ + "This is a test", + "Some text", + "Some more test", + "This is a test", + "Some text", + "Some more test", + ], + 1 + ); for await (const embeddings of embeddingsBatch) { expect(embeddings).toBeDefined(); expect(embeddings.length).toBe(1); @@ -69,3 +86,21 @@ test("FlagEmbedding passageEmbed", async () => { expect(embeddings).toBeDefined(); expect(embeddings.length).toBe(1); }); + +test("FlagEmbedding canonical values", async () => { + const flagEmbedding = await FlagEmbedding.init({ + model: EmbeddingModel.MLE5Large, + + maxLength: 512, + }); + const expected = [ + 0.00961, 0.00443, 0.00658, -0.03532, 0.00703, -0.02878, -0.03671, 0.03482, + 0.06343, -0.04731, + ]; + + const embeddings = (await flagEmbedding.embed(["hello world"]).next()).value!; + expect(embeddings).toBeDefined(); + for (let i = 0; i < expected.length; i++) { + expect(embeddings[0][i]).toBeCloseTo(expected[i], 3); + } +});