diff --git a/libs/vertexai/langchain_google_vertexai/vectorstores/document_storage.py b/libs/vertexai/langchain_google_vertexai/vectorstores/document_storage.py index ad2061bd..27e9e5ab 100644 --- a/libs/vertexai/langchain_google_vertexai/vectorstores/document_storage.py +++ b/libs/vertexai/langchain_google_vertexai/vectorstores/document_storage.py @@ -234,6 +234,7 @@ def __init__( kind: str = "document_id", text_property_name: str = "text", metadata_property_name: str = "metadata", + exclude_from_indexes: Optional[List[str]] = None, ) -> None: """Constructor. Args: @@ -244,6 +245,7 @@ def __init__( self._client = datastore_client self._text_property_name = text_property_name self._metadata_property_name = metadata_property_name + self.exclude_from_indexes = exclude_from_indexes self._kind = kind def mget(self, keys: Sequence[str]) -> List[Optional[Document]]: @@ -289,7 +291,9 @@ def mset(self, key_value_pairs: Sequence[Tuple[str, Document]]) -> None: entities = [] for key, document in zip(keys, documents): - entity = self._client.entity(key=key) + entity = self._client.entity( + key=key, exclude_from_indexes=self.exclude_from_indexes + ) entity[self._text_property_name] = document.page_content entity[self._metadata_property_name] = document.metadata entities.append(entity) diff --git a/libs/vertexai/langchain_google_vertexai/vectorstores/vectorstores.py b/libs/vertexai/langchain_google_vertexai/vectorstores/vectorstores.py index e9c49734..5c83caa2 100644 --- a/libs/vertexai/langchain_google_vertexai/vectorstores/vectorstores.py +++ b/libs/vertexai/langchain_google_vertexai/vectorstores/vectorstores.py @@ -375,6 +375,7 @@ def from_components( embedding: Optional[Embeddings] = None, stream_update: bool = False, datastore_client_kwargs: Optional[Dict[str, Any]] = None, + exclude_from_indexes: Optional[List[str]] = None, datastore_kind: str = "document_id", datastore_text_property_name: str = "text", datastore_metadata_property_name: str = "metadata", @@ -399,6 +400,7 @@ def from_components( index must be compatible with stream/batch updates. kwargs: Additional keyword arguments to pass to VertexAIVectorSearch.__init__(). + exclude_from_indexes: Fields to exclude from datastore indexing Returns: A configured VectorSearchVectorStoreDatastore. @@ -425,11 +427,14 @@ def from_components( datastore_client = sdk_manager.get_datastore_client(**datastore_client_kwargs) + if exclude_from_indexes is None: + exclude_from_indexes = [] document_storage = DataStoreDocumentStorage( datastore_client=datastore_client, kind=datastore_kind, text_property_name=datastore_text_property_name, metadata_property_name=datastore_metadata_property_name, + exclude_from_indexes=exclude_from_indexes, ) return cls(