Update type hinting in examples to PEP 585 standards (#32970)

apache · Nov 1, 2024 · 0987e79 · 0987e79
1 parent d749c08
commit 0987e79
Show file tree

Hide file tree

Showing 24 changed files with 74 additions and 97 deletions.
diff --git a/sdks/python/apache_beam/examples/complete/estimate_pi.py b/sdks/python/apache_beam/examples/complete/estimate_pi.py
@@ -30,17 +30,16 @@
 import json
 import logging
 import random
+from collections.abc import Iterable
 from typing import Any
-from typing import Iterable
-from typing import Tuple
 
 import apache_beam as beam
 from apache_beam.io import WriteToText
 from apache_beam.options.pipeline_options import PipelineOptions
 from apache_beam.options.pipeline_options import SetupOptions
 
 
-@beam.typehints.with_output_types(Tuple[int, int, int])
+@beam.typehints.with_output_types(tuple[int, int, int])
 @beam.typehints.with_input_types(int)
 def run_trials(runs):
   """Run trials and return a 3-tuple representing the results.
@@ -62,8 +61,8 @@ def run_trials(runs):
   return runs, inside_runs, 0
 
 
-@beam.typehints.with_output_types(Tuple[int, int, float])
-@beam.typehints.with_input_types(Iterable[Tuple[int, int, Any]])
+@beam.typehints.with_output_types(tuple[int, int, float])
+@beam.typehints.with_input_types(Iterable[tuple[int, int, Any]])
 def combine_results(results):
   """Combiner function to sum up trials and compute the estimate.
 

diff --git a/sdks/python/apache_beam/examples/cookbook/bigtableio_it_test.py b/sdks/python/apache_beam/examples/cookbook/bigtableio_it_test.py
@@ -25,7 +25,6 @@
 import unittest
 import uuid
 from typing import TYPE_CHECKING
-from typing import List
 
 import pytest
 import pytz
@@ -53,7 +52,7 @@
 if TYPE_CHECKING:
   import google.cloud.bigtable.instance
 
-EXISTING_INSTANCES: List['google.cloud.bigtable.instance.Instance'] = []
+EXISTING_INSTANCES: list['google.cloud.bigtable.instance.Instance'] = []
 LABEL_KEY = 'python-bigtable-beam'
 label_stamp = datetime.datetime.utcnow().replace(tzinfo=UTC)
 label_stamp_micros = _microseconds_from_datetime(label_stamp)

diff --git a/sdks/python/apache_beam/examples/cookbook/datastore_wordcount.py b/sdks/python/apache_beam/examples/cookbook/datastore_wordcount.py
@@ -59,7 +59,7 @@
 import logging
 import re
 import sys
-from typing import Iterable
+from collections.abc import Iterable
 from typing import Optional
 from typing import Text
 import uuid

diff --git a/sdks/python/apache_beam/examples/cookbook/group_with_coder.py b/sdks/python/apache_beam/examples/cookbook/group_with_coder.py
@@ -30,7 +30,6 @@
 import argparse
 import logging
 import sys
-import typing
 
 import apache_beam as beam
 from apache_beam import coders
@@ -71,7 +70,7 @@ def is_deterministic(self):
 # Annotate the get_players function so that the typehint system knows that the
 # input to the CombinePerKey operation is a key-value pair of a Player object
 # and an integer.
-@with_output_types(typing.Tuple[Player, int])
+@with_output_types(tuple[Player, int])
 def get_players(descriptor):
   name, points = descriptor.split(',')
   return Player(name), int(points)

diff --git a/sdks/python/apache_beam/examples/inference/huggingface_language_modeling.py b/sdks/python/apache_beam/examples/inference/huggingface_language_modeling.py
@@ -27,10 +27,8 @@
 
 import argparse
 import logging
-from typing import Dict
-from typing import Iterable
-from typing import Iterator
-from typing import Tuple
+from collections.abc import Iterable
+from collections.abc import Iterator
 
 import apache_beam as beam
 import torch
@@ -45,14 +43,14 @@
 from transformers import AutoTokenizer
 
 
-def add_mask_to_last_word(text: str) -> Tuple[str, str]:
+def add_mask_to_last_word(text: str) -> tuple[str, str]:
   text_list = text.split()
   return text, ' '.join(text_list[:-2] + ['<mask>', text_list[-1]])
 
 
 def tokenize_sentence(
-    text_and_mask: Tuple[str, str],
-    tokenizer: AutoTokenizer) -> Tuple[str, Dict[str, torch.Tensor]]:
+    text_and_mask: tuple[str, str],
+    tokenizer: AutoTokenizer) -> tuple[str, dict[str, torch.Tensor]]:
   text, masked_text = text_and_mask
   tokenized_sentence = tokenizer.encode_plus(masked_text, return_tensors="pt")
 
@@ -81,7 +79,7 @@ def __init__(self, tokenizer: AutoTokenizer):
     super().__init__()
     self.tokenizer = tokenizer
 
-  def process(self, element: Tuple[str, PredictionResult]) -> Iterable[str]:
+  def process(self, element: tuple[str, PredictionResult]) -> Iterable[str]:
     text, prediction_result = element
     inputs = prediction_result.example
     logits = prediction_result.inference['logits']

diff --git a/sdks/python/apache_beam/examples/inference/huggingface_question_answering.py b/sdks/python/apache_beam/examples/inference/huggingface_question_answering.py
@@ -28,8 +28,7 @@
 
 import argparse
 import logging
-from typing import Iterable
-from typing import Tuple
+from collections.abc import Iterable
 
 import apache_beam as beam
 from apache_beam.ml.inference.base import KeyedModelHandler
@@ -49,7 +48,7 @@ class PostProcessor(beam.DoFn):
   Hugging Face Pipeline for Question Answering returns a dictionary
   with score, start and end index of answer and the answer.
   """
-  def process(self, result: Tuple[str, PredictionResult]) -> Iterable[str]:
+  def process(self, result: tuple[str, PredictionResult]) -> Iterable[str]:
     text, prediction = result
     predicted_answer = prediction.inference['answer']
     yield text + ';' + predicted_answer

diff --git a/sdks/python/apache_beam/examples/inference/onnx_sentiment_classification.py b/sdks/python/apache_beam/examples/inference/onnx_sentiment_classification.py
@@ -28,9 +28,8 @@
 
 import argparse
 import logging
-from typing import Iterable
-from typing import Iterator
-from typing import Tuple
+from collections.abc import Iterable
+from collections.abc import Iterator
 
 import numpy as np
 
@@ -47,7 +46,7 @@
 
 
 def tokenize_sentence(text: str,
-                      tokenizer: RobertaTokenizer) -> Tuple[str, torch.Tensor]:
+                      tokenizer: RobertaTokenizer) -> tuple[str, torch.Tensor]:
   tokenized_sentence = tokenizer.encode(text, add_special_tokens=True)
 
   # Workaround to manually remove batch dim until we have the feature to
@@ -63,7 +62,7 @@ def filter_empty_lines(text: str) -> Iterator[str]:
 
 
 class PostProcessor(beam.DoFn):
-  def process(self, element: Tuple[str, PredictionResult]) -> Iterable[str]:
+  def process(self, element: tuple[str, PredictionResult]) -> Iterable[str]:
     filename, prediction_result = element
     prediction = np.argmax(prediction_result.inference, axis=0)
     yield filename + ';' + str(prediction)

diff --git a/sdks/python/apache_beam/examples/inference/pytorch_image_classification.py b/sdks/python/apache_beam/examples/inference/pytorch_image_classification.py
@@ -21,9 +21,8 @@
 import io
 import logging
 import os
-from typing import Iterator
+from collections.abc import Iterator
 from typing import Optional
-from typing import Tuple
 
 import apache_beam as beam
 import torch
@@ -41,7 +40,7 @@
 
 
 def read_image(image_file_name: str,
-               path_to_dir: Optional[str] = None) -> Tuple[str, Image.Image]:
+               path_to_dir: Optional[str] = None) -> tuple[str, Image.Image]:
   if path_to_dir is not None:
     image_file_name = os.path.join(path_to_dir, image_file_name)
   with FileSystems().open(image_file_name, 'r') as file:
@@ -122,13 +121,13 @@ def run(
     model_class = models.mobilenet_v2
     model_params = {'num_classes': 1000}
 
-  def preprocess(image_name: str) -> Tuple[str, torch.Tensor]:
+  def preprocess(image_name: str) -> tuple[str, torch.Tensor]:
     image_name, image = read_image(
       image_file_name=image_name,
       path_to_dir=known_args.images_dir)
     return (image_name, preprocess_image(image))
 
-  def postprocess(element: Tuple[str, PredictionResult]) -> str:
+  def postprocess(element: tuple[str, PredictionResult]) -> str:
     filename, prediction_result = element
     prediction = torch.argmax(prediction_result.inference, dim=0)
     return filename + ',' + str(prediction.item())

diff --git a/sdks/python/apache_beam/examples/inference/pytorch_image_classification_with_side_inputs.py b/sdks/python/apache_beam/examples/inference/pytorch_image_classification_with_side_inputs.py
@@ -62,10 +62,9 @@
 import io
 import logging
 import os
-from typing import Iterable
-from typing import Iterator
+from collections.abc import Iterable
+from collections.abc import Iterator
 from typing import Optional
-from typing import Tuple
 
 import apache_beam as beam
 import torch
@@ -84,7 +83,7 @@
 
 
 def read_image(image_file_name: str,
-               path_to_dir: Optional[str] = None) -> Tuple[str, Image.Image]:
+               path_to_dir: Optional[str] = None) -> tuple[str, Image.Image]:
   if path_to_dir is not None:
     image_file_name = os.path.join(path_to_dir, image_file_name)
   with FileSystems().open(image_file_name, 'r') as file:
@@ -116,7 +115,7 @@ class PostProcessor(beam.DoFn):
   Return filename, prediction and the model id used to perform the
   prediction
   """
-  def process(self, element: Tuple[str, PredictionResult]) -> Iterable[str]:
+  def process(self, element: tuple[str, PredictionResult]) -> Iterable[str]:
     filename, prediction_result = element
     prediction = torch.argmax(prediction_result.inference, dim=0)
     yield filename, prediction, prediction_result.model_id

diff --git a/sdks/python/apache_beam/examples/inference/pytorch_image_segmentation.py b/sdks/python/apache_beam/examples/inference/pytorch_image_segmentation.py
@@ -21,10 +21,9 @@
 import io
 import logging
 import os
-from typing import Iterable
-from typing import Iterator
+from collections.abc import Iterable
+from collections.abc import Iterator
 from typing import Optional
-from typing import Tuple
 
 import apache_beam as beam
 import torch
@@ -138,7 +137,7 @@
 
 
 def read_image(image_file_name: str,
-               path_to_dir: Optional[str] = None) -> Tuple[str, Image.Image]:
+               path_to_dir: Optional[str] = None) -> tuple[str, Image.Image]:
   if path_to_dir is not None:
     image_file_name = os.path.join(path_to_dir, image_file_name)
   with FileSystems().open(image_file_name, 'r') as file:
@@ -161,7 +160,7 @@ def filter_empty_lines(text: str) -> Iterator[str]:
 
 
 class PostProcessor(beam.DoFn):
-  def process(self, element: Tuple[str, PredictionResult]) -> Iterable[str]:
+  def process(self, element: tuple[str, PredictionResult]) -> Iterable[str]:
     filename, prediction_result = element
     prediction_labels = prediction_result.inference['labels']
     classes = [CLASS_ID_TO_NAME[label.item()] for label in prediction_labels]

diff --git a/sdks/python/apache_beam/examples/inference/pytorch_language_modeling.py b/sdks/python/apache_beam/examples/inference/pytorch_language_modeling.py
@@ -26,10 +26,8 @@
 
 import argparse
 import logging
-from typing import Dict
-from typing import Iterable
-from typing import Iterator
-from typing import Tuple
+from collections.abc import Iterable
+from collections.abc import Iterator
 
 import apache_beam as beam
 import torch
@@ -45,14 +43,14 @@
 from transformers import BertTokenizer
 
 
-def add_mask_to_last_word(text: str) -> Tuple[str, str]:
+def add_mask_to_last_word(text: str) -> tuple[str, str]:
   text_list = text.split()
   return text, ' '.join(text_list[:-2] + ['[MASK]', text_list[-1]])
 
 
 def tokenize_sentence(
-    text_and_mask: Tuple[str, str],
-    bert_tokenizer: BertTokenizer) -> Tuple[str, Dict[str, torch.Tensor]]:
+    text_and_mask: tuple[str, str],
+    bert_tokenizer: BertTokenizer) -> tuple[str, dict[str, torch.Tensor]]:
   text, masked_text = text_and_mask
   tokenized_sentence = bert_tokenizer.encode_plus(
       masked_text, return_tensors="pt")
@@ -84,7 +82,7 @@ def __init__(self, bert_tokenizer: BertTokenizer):
     super().__init__()
     self.bert_tokenizer = bert_tokenizer
 
-  def process(self, element: Tuple[str, PredictionResult]) -> Iterable[str]:
+  def process(self, element: tuple[str, PredictionResult]) -> Iterable[str]:
     text, prediction_result = element
     inputs = prediction_result.example
     logits = prediction_result.inference['logits']

diff --git a/sdks/python/apache_beam/examples/inference/pytorch_model_per_key_image_segmentation.py b/sdks/python/apache_beam/examples/inference/pytorch_model_per_key_image_segmentation.py
@@ -24,10 +24,9 @@
 import io
 import logging
 import os
-from typing import Iterable
-from typing import Iterator
+from collections.abc import Iterable
+from collections.abc import Iterator
 from typing import Optional
-from typing import Tuple
 
 import apache_beam as beam
 import torch
@@ -143,7 +142,7 @@
 
 
 def read_image(image_file_name: str,
-               path_to_dir: Optional[str] = None) -> Tuple[str, Image.Image]:
+               path_to_dir: Optional[str] = None) -> tuple[str, Image.Image]:
   if path_to_dir is not None:
     image_file_name = os.path.join(path_to_dir, image_file_name)
   with FileSystems().open(image_file_name, 'r') as file:
@@ -168,15 +167,15 @@ def filter_empty_lines(text: str) -> Iterator[str]:
 class KeyExamplesForEachModelType(beam.DoFn):
   """Duplicate data to run against each model type"""
   def process(
-      self, element: Tuple[torch.Tensor,
-                           str]) -> Iterable[Tuple[str, torch.Tensor]]:
+      self, element: tuple[torch.Tensor,
+                           str]) -> Iterable[tuple[str, torch.Tensor]]:
     yield 'v1', element[0]
     yield 'v2', element[0]
 
 
 class PostProcessor(beam.DoFn):
   def process(
-      self, element: Tuple[str, PredictionResult]) -> Tuple[torch.Tensor, str]:
+      self, element: tuple[str, PredictionResult]) -> tuple[torch.Tensor, str]:
     model, prediction_result = element
     prediction_labels = prediction_result.inference['labels']
     classes = [CLASS_ID_TO_NAME[label.item()] for label in prediction_labels]

diff --git a/sdks/python/apache_beam/examples/inference/run_inference_side_inputs.py b/sdks/python/apache_beam/examples/inference/run_inference_side_inputs.py
@@ -22,9 +22,9 @@
 import argparse
 import logging
 import time
-from typing import Iterable
+from collections.abc import Iterable
+from collections.abc import Sequence
 from typing import Optional
-from typing import Sequence
 
 import apache_beam as beam
 from apache_beam.ml.inference import base

diff --git a/sdks/python/apache_beam/examples/inference/sklearn_japanese_housing_regression.py b/sdks/python/apache_beam/examples/inference/sklearn_japanese_housing_regression.py
@@ -31,7 +31,7 @@
 
 import argparse
 import os
-from typing import Iterable
+from collections.abc import Iterable
 
 import pandas
 

diff --git a/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py b/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py
@@ -27,9 +27,7 @@
 import argparse
 import logging
 import os
-from typing import Iterable
-from typing import List
-from typing import Tuple
+from collections.abc import Iterable
 
 import apache_beam as beam
 from apache_beam.ml.inference.base import KeyedModelHandler
@@ -42,7 +40,7 @@
 from apache_beam.runners.runner import PipelineResult
 
 
-def process_input(row: str) -> Tuple[int, List[int]]:
+def process_input(row: str) -> tuple[int, list[int]]:
   data = row.split(',')
   label, pixels = int(data[0]), data[1:]
   pixels = [int(pixel) for pixel in pixels]
@@ -53,7 +51,7 @@ class PostProcessor(beam.DoFn):
   """Process the PredictionResult to get the predicted label.
   Returns a comma separated string with true label and predicted label.
   """
-  def process(self, element: Tuple[int, PredictionResult]) -> Iterable[str]:
+  def process(self, element: tuple[int, PredictionResult]) -> Iterable[str]:
     label, prediction_result = element
     prediction = prediction_result.inference
     yield '{},{}'.format(label, prediction)