From c21b84398234b7c5722645fb90314851abdecc33 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 21 May 2024 14:44:00 +0300 Subject: [PATCH] Vector search support --- Cargo.lock | 21 +++++++------------- Cargo.toml | 2 +- examples/vector/package.json | 16 ++++++++++++++++ examples/vector/vector.mjs | 37 ++++++++++++++++++++++++++++++++++++ 4 files changed, 61 insertions(+), 15 deletions(-) create mode 100644 examples/vector/package.json create mode 100644 examples/vector/vector.mjs diff --git a/Cargo.lock b/Cargo.lock index 43e48bb..8188717 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -723,8 +723,7 @@ dependencies = [ [[package]] name = "libsql" version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bd17bcc143f2a5be449680dc63b91327d953bcabebe34a69c549fca8934ec9d" +source = "git+https://github.com/tursodatabase/libsql/?rev=56bfe98f9f12b80f5d8ce1f0e5019e8642cc19fe#56bfe98f9f12b80f5d8ce1f0e5019e8642cc19fe" dependencies = [ "anyhow", "async-stream", @@ -761,8 +760,7 @@ dependencies = [ [[package]] name = "libsql-ffi" version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f67a0b0c6585ed06d48bfb2fd702a826065420e602fa34f8eb6e3a44e26520b9" +source = "git+https://github.com/tursodatabase/libsql/?rev=56bfe98f9f12b80f5d8ce1f0e5019e8642cc19fe#56bfe98f9f12b80f5d8ce1f0e5019e8642cc19fe" dependencies = [ "bindgen", "cc", @@ -771,8 +769,7 @@ dependencies = [ [[package]] name = "libsql-hrana" version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "220a925fe6d49dbfa7523b20f5a5391f579b5d9dcf9dd1225606d00929fcab3a" +source = "git+https://github.com/tursodatabase/libsql/?rev=56bfe98f9f12b80f5d8ce1f0e5019e8642cc19fe#56bfe98f9f12b80f5d8ce1f0e5019e8642cc19fe" dependencies = [ "base64 0.21.7", "bytes", @@ -795,8 +792,7 @@ dependencies = [ [[package]] name = "libsql-rusqlite" version = "0.30.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6227e9be9b8cb061692babbcdc7ab268a8df3517d5b377f1d603cfdc9cafe12a" +source = "git+https://github.com/tursodatabase/libsql/?rev=56bfe98f9f12b80f5d8ce1f0e5019e8642cc19fe#56bfe98f9f12b80f5d8ce1f0e5019e8642cc19fe" dependencies = [ "bitflags 2.4.2", "fallible-iterator 0.2.0", @@ -809,8 +805,7 @@ dependencies = [ [[package]] name = "libsql-sqlite3-parser" version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "095d2cf702a5c9c152e48b369f69da30cc44351fa9432621dd8976834abc1752" +source = "git+https://github.com/tursodatabase/libsql/?rev=56bfe98f9f12b80f5d8ce1f0e5019e8642cc19fe#56bfe98f9f12b80f5d8ce1f0e5019e8642cc19fe" dependencies = [ "bitflags 2.4.2", "cc", @@ -828,8 +823,7 @@ dependencies = [ [[package]] name = "libsql-sys" version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5829f758a82418429de6cb0eb310502de78d0908efc3fe2f84b6a41f2f6bebd1" +source = "git+https://github.com/tursodatabase/libsql/?rev=56bfe98f9f12b80f5d8ce1f0e5019e8642cc19fe#56bfe98f9f12b80f5d8ce1f0e5019e8642cc19fe" dependencies = [ "bytes", "libsql-ffi", @@ -842,8 +836,7 @@ dependencies = [ [[package]] name = "libsql_replication" version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef903dda0e85af2224e7454380e59d9602738a5041546a4807438951ad0d5b25" +source = "git+https://github.com/tursodatabase/libsql/?rev=56bfe98f9f12b80f5d8ce1f0e5019e8642cc19fe#56bfe98f9f12b80f5d8ce1f0e5019e8642cc19fe" dependencies = [ "aes", "async-stream", diff --git a/Cargo.toml b/Cargo.toml index 621f394..c207e60 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ crate-type = ["cdylib"] [dependencies] tracing-subscriber = { version = "0.3", features = ["env-filter"] } -libsql = { version = "0.3.5", features = ["encryption"] } +libsql = { git = "https://github.com/tursodatabase/libsql/", rev = "56bfe98f9f12b80f5d8ce1f0e5019e8642cc19fe", features = ["encryption"] } tracing = "0.1" once_cell = "1.18.0" tokio = { version = "1.29.1", features = [ "rt-multi-thread" ] } diff --git a/examples/vector/package.json b/examples/vector/package.json new file mode 100644 index 0000000..e02dc13 --- /dev/null +++ b/examples/vector/package.json @@ -0,0 +1,16 @@ +{ + "name": "libsql-examples-vector", + "version": "1.0.0", + "description": "", + "main": "index.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "author": "", + "license": "MIT", + "dependencies": { + "@xenova/transformers": "^2.17.1", + "csv-parse": "^5.5.5", + "libsql": "../../" + } +} diff --git a/examples/vector/vector.mjs b/examples/vector/vector.mjs new file mode 100644 index 0000000..915e145 --- /dev/null +++ b/examples/vector/vector.mjs @@ -0,0 +1,37 @@ +import { pipeline } from "@xenova/transformers"; +import { createReadStream } from "fs"; +import { parse } from "csv-parse"; +import Database from "libsql"; + +// Create a embeddings generator. +const extractor = await pipeline( + "feature-extraction", + "Xenova/jina-embeddings-v2-small-en", + { quantized: false }, +); + +// Open a database file. +const db = new Database("movies.db"); + +// Create a table for movies with an embedding as a column. +db.exec("CREATE TABLE movies (title TEXT, year INT, embedding VECTOR(512))"); + +// Create a vector index on the embedding column. +db.exec("CREATE INDEX movies_idx USING vector ON movies (embedding)"); + +// Prepare a SQL `INSERT` statement. +const stmt = db.prepare( + "INSERT INTO movies (title, year, embedding) VALUES (?, ?, vector(?))", +); + +// Process a CSV file of movies generating embeddings for plot synopsis. +createReadStream("wiki_movie_plots_deduped.csv") + .pipe(parse({ columns: true })) + .on("data", async (data) => { + const title = data.Title; + const year = data.Year; + const plot = data.Plot; + const output = await extractor([plot], { pooling: "mean" }); + const embedding = output[0].data; + stmt.run([title, year, embedding]); + });