From af44de36cb82277bb5e535e6c2ba968c5b52a18d Mon Sep 17 00:00:00 2001 From: francislan Date: Mon, 21 Oct 2024 01:44:25 -0700 Subject: [PATCH] Change post-filters to pre-filters in BigQueryVectorStore (#554) BigQueryVectorStore's _create_filters are post-filters. Now that BigQuery Vector Search supports pre-filtering on stored columns (https://cloud.google.com/bigquery/docs/vector-index#stored-columns), we can change all post-filters to pre-filters. Note that this requires storing the columns that are referenced by the pre-filter. This cannot be done in Langchain yet. If the required columns are not stored, then the pre-filters will act as post-filters. The results returned by the new post-filters will be a superset of the post-filters done currently, which can return less results. --- .../bq_storage_vectorstores/bigquery.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/libs/community/langchain_google_community/bq_storage_vectorstores/bigquery.py b/libs/community/langchain_google_community/bq_storage_vectorstores/bigquery.py index 8c6bb525..b28a1da8 100644 --- a/libs/community/langchain_google_community/bq_storage_vectorstores/bigquery.py +++ b/libs/community/langchain_google_community/bq_storage_vectorstores/bigquery.py @@ -333,9 +333,9 @@ def _create_search_query( full_query = f"""{embeddings_query} {select_clause} FROM VECTOR_SEARCH( - TABLE `{self.full_table_id}`, + (SELECT * FROM `{self.full_table_id}` WHERE {where_filter_expr}), "{self.embedding_field}", - (SELECT row_num, {self.embedding_field} from embeddings), + (SELECT row_num, {self.embedding_field} FROM embeddings), distance_type => "{self.distance_type}", top_k => {k} ) @@ -346,7 +346,6 @@ def _create_search_query( FROM ( {full_query} ) AS result - WHERE {where_filter_expr} ORDER BY row_num, score """ return full_query_wrapper