Skip to content

Commit

Permalink
Merge pull request #112 from tursodatabase/vector
Browse files Browse the repository at this point in the history
Vector search support
  • Loading branch information
penberg authored Jun 7, 2024
2 parents 7a75065 + 7c9d3ce commit 724cb3d
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 15 deletions.
21 changes: 7 additions & 14 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ crate-type = ["cdylib"]

[dependencies]
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
libsql = { version = "0.3.5", features = ["encryption"] }
libsql = { git = "https://github.com/libsql/libsql/", rev = "84ff2f7578cb634e531ac0075c0286893273238b", features = ["encryption"] }
tracing = "0.1"
once_cell = "1.18.0"
tokio = { version = "1.29.1", features = [ "rt-multi-thread" ] }
Expand Down
16 changes: 16 additions & 0 deletions examples/vector/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"name": "libsql-examples-vector",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "",
"license": "MIT",
"dependencies": {
"@xenova/transformers": "^2.17.1",
"csv-parse": "^5.5.5",
"libsql": "../../"
}
}
37 changes: 37 additions & 0 deletions examples/vector/vector.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import { pipeline } from "@xenova/transformers";
import { createReadStream } from "fs";
import { parse } from "csv-parse";
import Database from "libsql";

// Create a embeddings generator.
const extractor = await pipeline(
"feature-extraction",
"Xenova/jina-embeddings-v2-small-en",
{ quantized: false },
);

// Open a database file.
const db = new Database("movies.db");

// Create a table for movies with an embedding as a column.
db.exec("CREATE TABLE movies (title TEXT, year INT, embedding VECTOR(512))");

// Create a vector index on the embedding column.
db.exec("CREATE INDEX movies_idx USING vector ON movies (embedding)");

// Prepare a SQL `INSERT` statement.
const stmt = db.prepare(
"INSERT INTO movies (title, year, embedding) VALUES (?, ?, vector(?))",
);

// Process a CSV file of movies generating embeddings for plot synopsis.
createReadStream("wiki_movie_plots_deduped.csv")
.pipe(parse({ columns: true }))
.on("data", async (data) => {
const title = data.Title;
const year = data.Year;
const plot = data.Plot;
const output = await extractor([plot], { pooling: "mean" });
const embedding = output[0].data;
stmt.run([title, year, embedding]);
});

0 comments on commit 724cb3d

Please sign in to comment.