googleapis · daniel-sanche · Jun 23, 2023 · Apr 1, 2023 · Apr 1, 2023 · Apr 2, 2023
diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py
@@ -31,13 +31,15 @@
 import warnings
 import sys
 import random
+from itertools import chain
 
 from google.cloud.bigtable_v2.services.bigtable.client import BigtableClientMeta
 from google.cloud.bigtable_v2.services.bigtable.async_client import BigtableAsyncClient
 from google.cloud.bigtable_v2.services.bigtable.async_client import DEFAULT_CLIENT_INFO
 from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import (
     PooledBigtableGrpcAsyncIOTransport,
 )
+from google.cloud.bigtable_v2.types.bigtable import PingAndWarmRequest
 from google.cloud.client import ClientWithProject
 from google.api_core.exceptions import GoogleAPICallError
 from google.api_core import retry_async as retries
@@ -50,17 +52,24 @@
 from google.cloud.bigtable.row import Row
 from google.cloud.bigtable.read_rows_query import ReadRowsQuery
 from google.cloud.bigtable.iterators import ReadRowsIterator
+from google.cloud.bigtable.exceptions import FailedQueryShardError
+from google.cloud.bigtable.exceptions import ShardedReadRowsExceptionGroup
+
 from google.cloud.bigtable.mutations import Mutation, RowMutationEntry
 from google.cloud.bigtable._mutate_rows import _MutateRowsOperation
 from google.cloud.bigtable._helpers import _make_metadata
 from google.cloud.bigtable._helpers import _convert_retry_deadline
+from google.cloud.bigtable._helpers import _attempt_timeout_generator
 
 if TYPE_CHECKING:
     from google.cloud.bigtable.mutations_batcher import MutationsBatcher
     from google.cloud.bigtable import RowKeySamples
     from google.cloud.bigtable.row_filters import RowFilter
     from google.cloud.bigtable.read_modify_write_rules import ReadModifyWriteRule
 
+# used by read_rows_sharded to limit how many requests are attempted in parallel
+CONCURRENCY_LIMIT = 10
+
 
 class BigtableDataClient(ClientWithProject):
     def __init__(
@@ -186,10 +195,13 @@ async def _ping_and_warm_instances(
             - sequence of results or exceptions from the ping requests
         """
         ping_rpc = channel.unary_unary(
-            "/google.bigtable.v2.Bigtable/PingAndWarmChannel"
+            "/google.bigtable.v2.Bigtable/PingAndWarm",
+            request_serializer=PingAndWarmRequest.serialize,
         )
         tasks = [ping_rpc({"name": n}) for n in self._active_instances]
-        return await asyncio.gather(*tasks, return_exceptions=True)
+        result = await asyncio.gather(*tasks, return_exceptions=True)
+        # return None in place of empty successful responses
+        return [r or None for r in result]
 
     async def _manage_channel(
         self,
@@ -517,20 +529,59 @@ async def read_rows_sharded(
         self,
         query_list: list[ReadRowsQuery] | list[dict[str, Any]],
         *,
-        limit: int | None,
-        operation_timeout: int | float | None = 60,
+        operation_timeout: int | float | None = None,
         per_request_timeout: int | float | None = None,
-    ) -> ReadRowsIterator:
+    ) -> list[Row]:
         """
-        Runs a sharded query in parallel
+        Runs a sharded query in parallel, then return the results in a single list.
+        Results will be returned in the order of the input queries.
+
+        This function is intended to be run on the results on a query.shard() call:
 
-        Each query in query list will be run concurrently, with results yielded as they are ready
-        yielded results may be out of order
+        ```
+        table_shard_keys = await table.sample_row_keys()
+        query = ReadRowsQuery(...)
+        shard_queries = query.shard(table_shard_keys)
+        results = await table.read_rows_sharded(shard_queries)
+        ```
 
         Args:
             - query_list: a list of queries to run in parallel
-        """
-        raise NotImplementedError
+        Raises:
+            - ShardedReadRowsExceptionGroup: if any of the queries failed
+            - ValueError: if the query_list is empty
+        """
+        if not query_list:
+            raise ValueError("query_list must contain at least one query")
+        routine_list = [
+            self.read_rows(
+                query,
+                operation_timeout=operation_timeout,
+                per_request_timeout=per_request_timeout,
+            )
+            for query in query_list
+        ]
+        # submit requests in batches to limit concurrency
+        batched_routines = [
+            routine_list[i : i + CONCURRENCY_LIMIT]
+            for i in range(0, len(routine_list), CONCURRENCY_LIMIT)
+        ]
+        # run batches and collect results
+        results_list = []
+        for batch in batched_routines:
+            batch_result = await asyncio.gather(*batch, return_exceptions=True)
+            results_list.extend(batch_result)
+        # collect exceptions
+        exception_list = [
+            FailedQueryShardError(idx, query_list[idx], e)
+            for idx, e in enumerate(results_list)
+            if isinstance(e, Exception)
+        ]
+        if exception_list:
+            # if any sub-request failed, raise an exception instead of returning results
+            raise ShardedReadRowsExceptionGroup(exception_list, len(query_list))
+        combined_list = list(chain.from_iterable(results_list))
+        return combined_list
 
     async def row_exists(
         self,
@@ -549,32 +600,81 @@ async def row_exists(
         """
         raise NotImplementedError
 
-    async def sample_keys(
+    async def sample_row_keys(
         self,
         *,
-        operation_timeout: int | float | None = 60,
-        per_sample_timeout: int | float | None = 10,
-        per_request_timeout: int | float | None = None,
+        operation_timeout: float | None = None,
+        per_request_timeout: float | None = None,
     ) -> RowKeySamples:
         """
         Return a set of RowKeySamples that delimit contiguous sections of the table of
         approximately equal size
 
         RowKeySamples output can be used with ReadRowsQuery.shard() to create a sharded query that
         can be parallelized across multiple backend nodes read_rows and read_rows_stream
-        requests will call sample_keys internally for this purpose when sharding is enabled
+        requests will call sample_row_keys internally for this purpose when sharding is enabled
 
         RowKeySamples is simply a type alias for list[tuple[bytes, int]]; a list of
             row_keys, along with offset positions in the table
 
         Returns:
             - a set of RowKeySamples the delimit contiguous sections of the table
         Raises:
-            - DeadlineExceeded: raised after operation timeout
-                will be chained with a RetryExceptionGroup containing all GoogleAPIError
-                exceptions from any retries that failed
+            - GoogleAPICallError: if the sample_row_keys request fails
         """
-        raise NotImplementedError
+        # prepare timeouts
+        operation_timeout = operation_timeout or self.default_operation_timeout
+        per_request_timeout = per_request_timeout or self.default_per_request_timeout
+
+        if operation_timeout <= 0:
+            raise ValueError("operation_timeout must be greater than 0")
+        if per_request_timeout is not None and per_request_timeout <= 0:
+            raise ValueError("per_request_timeout must be greater than 0")
+        if per_request_timeout is not None and per_request_timeout > operation_timeout:
+            raise ValueError(
+                "per_request_timeout must not be greater than operation_timeout"
+            )
+        attempt_timeout_gen = _attempt_timeout_generator(
+            per_request_timeout, operation_timeout
+        )
+        # prepare retryable
+        predicate = retries.if_exception_type(
+            core_exceptions.DeadlineExceeded,
+            core_exceptions.ServiceUnavailable,
+        )
+        transient_errors = []
+
+        def on_error_fn(exc):
+            # add errors to list if retryable
+            if predicate(exc):
+                transient_errors.append(exc)
+
+        retry = retries.AsyncRetry(
+            predicate=predicate,
+            timeout=operation_timeout,
+            initial=0.01,
+            multiplier=2,
+            maximum=60,
+            on_error=on_error_fn,
+            is_stream=False,
+        )
+
+        # prepare request
+        metadata = _make_metadata(self.table_name, self.app_profile_id)
+
+        async def execute_rpc():
+            results = await self.client._gapic_client.sample_row_keys(
+                table_name=self.table_name,
+                app_profile_id=self.app_profile_id,
+                timeout=next(attempt_timeout_gen),
+                metadata=metadata,
+            )
+            return [(s.row_key, s.offset_bytes) async for s in results]
+
+        wrapped_fn = _convert_retry_deadline(
+            retry(execute_rpc), operation_timeout, transient_errors
+        )
+        return await wrapped_fn()
 
     def mutations_batcher(self, **kwargs) -> MutationsBatcher:
         """
@@ -819,16 +919,17 @@ async def close(self):
         """
         Called to close the Table instance and release any resources held by it.
         """
+        self._register_instance_task.cancel()
         await self.client._remove_instance_registration(self.instance_id, self)
 
     async def __aenter__(self):
         """
         Implement async context manager protocol
 
-        Register this instance with the client, so that
+        Ensure registration task has time to run, so that
         grpc channels will be warmed for the specified instance
         """
-        await self.client._register_instance(self.instance_id, self)
+        await self._register_instance_task
         return self
 
     async def __aexit__(self, exc_type, exc_val, exc_tb):

diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py
@@ -16,14 +16,15 @@
 
 import sys
 
-from typing import TYPE_CHECKING
+from typing import Any, TYPE_CHECKING
 
 from google.api_core import exceptions as core_exceptions
 
 is_311_plus = sys.version_info >= (3, 11)
 
 if TYPE_CHECKING:
     from google.cloud.bigtable.mutations import RowMutationEntry
+    from google.cloud.bigtable.read_rows_query import ReadRowsQuery
 
 
 class IdleTimeout(core_exceptions.DeadlineExceeded):
@@ -137,3 +138,39 @@ def __init__(self, excs: list[Exception]):
 
     def __new__(cls, excs: list[Exception]):
         return super().__new__(cls, cls._format_message(excs), excs)
+
+
+class ShardedReadRowsExceptionGroup(BigtableExceptionGroup):
+    """
+    Represents one or more exceptions that occur during a sharded read rows operation
+    """
+
+    @staticmethod
+    def _format_message(excs: list[FailedQueryShardError], total_queries: int):
+        query_str = "query" if total_queries == 1 else "queries"
+        plural_str = "" if len(excs) == 1 else "s"
+        return f"{len(excs)} sub-exception{plural_str} (from {total_queries} {query_str} attempted)"
+
+    def __init__(self, excs: list[FailedQueryShardError], total_queries: int):
+        super().__init__(self._format_message(excs, total_queries), excs)
+
+    def __new__(cls, excs: list[FailedQueryShardError], total_queries: int):
+        return super().__new__(cls, cls._format_message(excs, total_queries), excs)
+
+
+class FailedQueryShardError(Exception):
+    """
+    Represents an individual failed query in a sharded read rows operation
+    """
+
+    def __init__(
+        self,
+        failed_index: int,
+        failed_query: "ReadRowsQuery" | dict[str, Any],
+        cause: Exception,
+    ):
+        message = f"Failed query at index {failed_index} with cause: {cause!r}"
+        super().__init__(message)
+        self.index = failed_index
+        self.query = failed_query
+        self.__cause__ = cause