Skip to content

Commit

Permalink
fix: MLE5Large GCS URL transform (#3)
Browse files Browse the repository at this point in the history
* fix: MLE5Large GCS URL transform

* ci: PR compliance, tests

* ci: refactor compliance.yml
  • Loading branch information
Anush008 authored Oct 9, 2023
1 parent 13b35ba commit 7ae5bea
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 3 deletions.
41 changes: 39 additions & 2 deletions .github/workflows/compliance.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: "Compliance"
name: "PR Compliance"

on:
pull_request_target:
Expand All @@ -11,5 +11,42 @@ permissions:
pull-requests: write

jobs:
prepare-matrix:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Set matrix for test
id: set-matrix
run: echo "matrix=$(find . -name '*.test.ts' | jq -R -s -c 'split("\n") | map(select(length > 0))')" >> $GITHUB_OUTPUT

test:
runs-on: ubuntu-latest
strategy:
matrix:
files: ${{fromJson(needs.prepare-matrix.outputs.matrix)}}
needs:
- prepare-matrix
steps:
- name: "☁️ checkout repository"
uses: actions/checkout@v3
with:
fetch-depth: 0

- name: "🔧 setup Bun"
uses: oven-sh/setup-bun@v1

- name: "📦 install dependencies"
run: bun install

- name: "Run Vitest"
run: bun x vitest ${{ matrix.files }}

compliance:
uses: open-sauced/hot/.github/workflows/compliance.yml@main
uses: open-sauced/hot/.github/workflows/compliance.yml@main
needs:
- test


9 changes: 8 additions & 1 deletion src/fastembed.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ export enum EmbeddingModel {
AllMiniLML6V2 = "fast-all-MiniLM-L6-v2",
BGEBaseEN = "fast-bge-base-en",
BGESmallEN = "fast-bge-small-en",
MLE5Large = "intfloat-multilingual-e5-large",
MLE5Large = "fast-multilingual-e5-large",
}

interface InitOptions {
Expand Down Expand Up @@ -149,6 +149,13 @@ export class FlagEmbedding extends Embedding {
return outputFilePath;
}

// The MLE5Large model URL doesn't follow the same naming convention as the other models
// So, we tranform "fast-multilingual-e5-large" -> "intfloat-multilingual-e5-large" in the download URL
// The model directory name in the GCS storage is "fast-multilingual-e5-large", like the others
if (model === EmbeddingModel.MLE5Large) {
model = "intfloat" + model.substring(model.indexOf("-"));
console.log("It is:", model);
}
const url = `https://storage.googleapis.com/qdrant-fastembed/${model}.tar.gz`;
const fileStream = fs.createWriteStream(outputFilePath);

Expand Down
File renamed without changes.

0 comments on commit 7ae5bea

Please sign in to comment.