Skip to content

Commit

Permalink
test: verify canonical values (#4)
Browse files Browse the repository at this point in the history
  • Loading branch information
Anush008 authored Oct 9, 2023
1 parent 0f3ef10 commit 1cc3b57
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 21 deletions.
1 change: 0 additions & 1 deletion src/fastembed.ts
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,6 @@ export class FlagEmbedding extends Embedding {
// The model directory name in the GCS storage is "fast-multilingual-e5-large", like the others
if (model === EmbeddingModel.MLE5Large) {
model = "intfloat" + model.substring(model.indexOf("-"));
console.log("It is:", model);
}
const url = `https://storage.googleapis.com/qdrant-fastembed/${model}.tar.gz`;
const fileStream = fs.createWriteStream(outputFilePath);
Expand Down
20 changes: 18 additions & 2 deletions tests/fastembed_allminilm.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { expect, test } from 'vitest'
import { FlagEmbedding, EmbeddingModel } from "../src"
import { expect, test } from "vitest";
import { FlagEmbedding, EmbeddingModel } from "../src";

test('Init EmbeddingModel', async () => {
const model = await FlagEmbedding.init({
Expand Down Expand Up @@ -69,3 +69,19 @@ test("FlagEmbedding passageEmbed", async () => {
expect(embeddings.length).toBe(1);
});

test("FlagEmbedding canonical values", async () => {
const flagEmbedding = await FlagEmbedding.init({
model: EmbeddingModel.AllMiniLML6V2,
maxLength: 512,
});
const expected = [
0.02591, 0.00573, 0.01147, 0.03796, -0.0232, -0.0549, 0.01404, -0.0107,
-0.0244, -0.01822,
];

const embeddings = (await flagEmbedding.embed(["hello world"]).next()).value!;
expect(embeddings).toBeDefined();
for (let i = 0; i < expected.length; i++) {
expect(embeddings[0][i]).toBeCloseTo(expected[i], 3);
}
});
17 changes: 17 additions & 0 deletions tests/fastembed_bgebase.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,20 @@ test("FlagEmbedding passageEmbed", async () => {
expect(embeddings).toBeDefined();
expect(embeddings.length).toBe(1);
});

test("FlagEmbedding canonical values", async () => {
const flagEmbedding = await FlagEmbedding.init({
model: EmbeddingModel.BGEBaseEN,
maxLength: 512,
});
const expected = [
0.0114, 0.03722, 0.02941, 0.0123, 0.03451, 0.00876, 0.02356, 0.05414,
-0.0294, -0.0547,
];

const embeddings = (await flagEmbedding.embed(["hello world"]).next()).value!;
expect(embeddings).toBeDefined();
for (let i = 0; i < expected.length; i++) {
expect(embeddings[0][i]).toBeCloseTo(expected[i], 3);
}
});
53 changes: 44 additions & 9 deletions tests/fastembed_bgesmall.test.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import { expect, test } from 'vitest'
import { FlagEmbedding, EmbeddingModel } from "../src"
import { expect, test } from "vitest";
import { FlagEmbedding, EmbeddingModel } from "../src";

test('Init EmbeddingModel', async () => {
const model = await FlagEmbedding.init({
model: EmbeddingModel.BGESmallEN
});
expect(model).toBeDefined();
test("Init EmbeddingModel", async () => {
const model = await FlagEmbedding.init({
model: EmbeddingModel.BGESmallEN,
});
expect(model).toBeDefined();
});

test("FlagEmbedding embed", async () => {
Expand All @@ -26,7 +26,14 @@ test("FlagEmbedding embed batch", async () => {

maxLength: 512,
});
const embeddingsBatch = flagEmbedding.embed(["This is a test", "Some text", "Some more test", "This is a test", "Some text", "Some more test"]);
const embeddingsBatch = flagEmbedding.embed([
"This is a test",
"Some text",
"Some more test",
"This is a test",
"Some text",
"Some more test",
]);
for await (const embeddings of embeddingsBatch) {
expect(embeddings).toBeDefined();
expect(embeddings.length).toBe(6);
Expand All @@ -39,7 +46,17 @@ test("FlagEmbedding embed small batch", async () => {
model: EmbeddingModel.BGESmallEN,
maxLength: 512,
});
const embeddingsBatch = flagEmbedding.embed(["This is a test", "Some text", "Some more test", "This is a test", "Some text", "Some more test"], 1);
const embeddingsBatch = flagEmbedding.embed(
[
"This is a test",
"Some text",
"Some more test",
"This is a test",
"Some text",
"Some more test",
],
1
);
for await (const embeddings of embeddingsBatch) {
expect(embeddings).toBeDefined();
expect(embeddings.length).toBe(1);
Expand Down Expand Up @@ -69,3 +86,21 @@ test("FlagEmbedding passageEmbed", async () => {
expect(embeddings).toBeDefined();
expect(embeddings.length).toBe(1);
});

test("FlagEmbedding canonical values", async () => {
const flagEmbedding = await FlagEmbedding.init({
model: EmbeddingModel.BGESmallEN,

maxLength: 512,
});
const expected = [
-0.02313, -0.02552, 0.017357, -0.06393, -0.00061, 0.022123, -0.01472,
0.039255, 0.034447, 0.004598,
];

const embeddings = (await flagEmbedding.embed(["hello world"]).next()).value!;
expect(embeddings).toBeDefined();
for (let i = 0; i < expected.length; i++) {
expect(embeddings[0][i]).toBeCloseTo(expected[i], 3);
}
});
53 changes: 44 additions & 9 deletions tests/fastembed_mle5large.test.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import { expect, test } from 'vitest'
import { FlagEmbedding, EmbeddingModel } from "../src"
import { expect, test } from "vitest";
import { FlagEmbedding, EmbeddingModel } from "../src";

test('Init EmbeddingModel', async () => {
const model = await FlagEmbedding.init({
model: EmbeddingModel.MLE5Large
});
expect(model).toBeDefined();
test("Init EmbeddingModel", async () => {
const model = await FlagEmbedding.init({
model: EmbeddingModel.MLE5Large,
});
expect(model).toBeDefined();
});

test("FlagEmbedding embed", async () => {
Expand All @@ -26,7 +26,14 @@ test("FlagEmbedding embed batch", async () => {

maxLength: 512,
});
const embeddingsBatch = flagEmbedding.embed(["This is a test", "Some text", "Some more test", "This is a test", "Some text", "Some more test"]);
const embeddingsBatch = flagEmbedding.embed([
"This is a test",
"Some text",
"Some more test",
"This is a test",
"Some text",
"Some more test",
]);
for await (const embeddings of embeddingsBatch) {
expect(embeddings).toBeDefined();
expect(embeddings.length).toBe(6);
Expand All @@ -39,7 +46,17 @@ test("FlagEmbedding embed small batch", async () => {
model: EmbeddingModel.MLE5Large,
maxLength: 512,
});
const embeddingsBatch = flagEmbedding.embed(["This is a test", "Some text", "Some more test", "This is a test", "Some text", "Some more test"], 1);
const embeddingsBatch = flagEmbedding.embed(
[
"This is a test",
"Some text",
"Some more test",
"This is a test",
"Some text",
"Some more test",
],
1
);
for await (const embeddings of embeddingsBatch) {
expect(embeddings).toBeDefined();
expect(embeddings.length).toBe(1);
Expand Down Expand Up @@ -69,3 +86,21 @@ test("FlagEmbedding passageEmbed", async () => {
expect(embeddings).toBeDefined();
expect(embeddings.length).toBe(1);
});

test("FlagEmbedding canonical values", async () => {
const flagEmbedding = await FlagEmbedding.init({
model: EmbeddingModel.MLE5Large,

maxLength: 512,
});
const expected = [
0.00961, 0.00443, 0.00658, -0.03532, 0.00703, -0.02878, -0.03671, 0.03482,
0.06343, -0.04731,
];

const embeddings = (await flagEmbedding.embed(["hello world"]).next()).value!;
expect(embeddings).toBeDefined();
for (let i = 0; i < expected.length; i++) {
expect(embeddings[0][i]).toBeCloseTo(expected[i], 3);
}
});

0 comments on commit 1cc3b57

Please sign in to comment.