From a33963c44b5a35e4e5369ab1c87ccb443fbe3e1b Mon Sep 17 00:00:00 2001 From: "Michael A. Smith" Date: Wed, 19 Jul 2023 23:10:57 -0400 Subject: [PATCH] AVRO-3807 Cleanup Unused Imports and Variable Assignments (#2363) * AVRO-3807: Implement Autoflake in Lint * AVRO-3807: Cleanup Unused Imports and Assignments Other cleanups: - Use assertNotIn instead of assertFalse - Avoid fall-through returns after explicit returns - Avoid importing the same module more than once. * AVRO-3807: Configure Flake8 Max Line Length --- lang/py/avro/datafile.py | 2 +- lang/py/avro/io.py | 17 +++------------- lang/py/avro/ipc.py | 7 +++---- lang/py/avro/schema.py | 21 +++++++++----------- lang/py/avro/test/test_protocol.py | 2 +- lang/py/avro/test/test_schema.py | 6 +++--- lang/py/avro/test/test_tether_task.py | 1 - lang/py/avro/test/test_tether_task_runner.py | 2 -- lang/py/avro/tether/__init__.py | 10 ++++++++++ lang/py/avro/tether/tether_task.py | 12 +++++------ lang/py/avro/tether/tether_task_runner.py | 2 +- lang/py/avro/utils.py | 2 ++ lang/py/pyproject.toml | 9 +++++++++ lang/py/setup.cfg | 5 +++++ lang/py/setup.py | 2 -- lang/py/tox.ini | 2 ++ 16 files changed, 55 insertions(+), 47 deletions(-) diff --git a/lang/py/avro/datafile.py b/lang/py/avro/datafile.py index d39a91131e3..0f002cee1b5 100644 --- a/lang/py/avro/datafile.py +++ b/lang/py/avro/datafile.py @@ -26,7 +26,7 @@ import json import warnings from types import TracebackType -from typing import IO, AnyStr, BinaryIO, MutableMapping, Optional, Type, cast +from typing import IO, AnyStr, MutableMapping, Optional, Type, cast import avro.codecs import avro.errors diff --git a/lang/py/avro/io.py b/lang/py/avro/io.py index 7b5576697eb..5e3ffa6c537 100644 --- a/lang/py/avro/io.py +++ b/lang/py/avro/io.py @@ -89,17 +89,7 @@ import decimal import struct import warnings -from typing import ( - IO, - Deque, - Generator, - Iterable, - List, - Mapping, - Optional, - Sequence, - Union, -) +from typing import IO, Generator, Iterable, List, Mapping, Optional, Sequence, Union import avro.constants import avro.errors @@ -435,7 +425,6 @@ def write_null(self, datum: None) -> None: """ null is written as zero bytes """ - pass def write_boolean(self, datum: bool) -> None: """ @@ -810,7 +799,7 @@ def read_array(self, writers_schema: avro.schema.ArraySchema, readers_schema: av while block_count != 0: if block_count < 0: block_count = -block_count - block_size = decoder.read_long() + decoder.skip_long() for i in range(block_count): read_items.append(self.read_data(writers_schema.items, readers_schema.items, decoder)) block_count = decoder.read_long() @@ -847,7 +836,7 @@ def read_map(self, writers_schema: avro.schema.MapSchema, readers_schema: avro.s while block_count != 0: if block_count < 0: block_count = -block_count - block_size = decoder.read_long() + decoder.skip_long() for i in range(block_count): key = decoder.read_utf8() read_items[key] = self.read_data(writers_schema.values, readers_schema.values, decoder) diff --git a/lang/py/avro/ipc.py b/lang/py/avro/ipc.py index 7ab6ea497ae..7a5a5831f08 100644 --- a/lang/py/avro/ipc.py +++ b/lang/py/avro/ipc.py @@ -199,7 +199,7 @@ def read_call_response(self, message_name, decoder): the error, serialized per the message's error union schema. """ # response metadata - response_metadata = META_READER.read(decoder) + META_READER.read(decoder) # remote response schema remote_message_schema = self.remote_protocol.messages.get(message_name) @@ -288,7 +288,7 @@ def respond(self, call_request): return buffer_writer.getvalue() # read request using remote protocol - request_metadata = META_READER.read(buffer_decoder) + META_READER.read(buffer_decoder) remote_message_name = buffer_decoder.read_utf8() # get remote and local request schemas so we can do @@ -364,9 +364,8 @@ def process_handshake(self, decoder, encoder): def invoke(self, local_message, request): """ - Aactual work done by server: cf. handler in thrift. + Actual work done by server: cf. handler in thrift. """ - pass def read_request(self, writers_schema, readers_schema, decoder): datum_reader = avro.io.DatumReader(writers_schema, readers_schema) diff --git a/lang/py/avro/schema.py b/lang/py/avro/schema.py index f852e146a3e..fcd85114f93 100644 --- a/lang/py/avro/schema.py +++ b/lang/py/avro/schema.py @@ -62,7 +62,6 @@ import avro.constants import avro.errors -from avro.constants import NAMED_TYPES, PRIMITIVE_TYPES, VALID_TYPES from avro.name import Name, Names, validate_basename # @@ -232,7 +231,7 @@ class Schema(abc.ABC, CanonicalPropertiesMixin): def __init__(self, type_: str, other_props: Optional[Mapping[str, object]] = None, validate_names: bool = True) -> None: if not isinstance(type_, str): raise avro.errors.SchemaParseException("Schema type must be a string.") - if type_ not in VALID_TYPES: + if type_ not in avro.constants.VALID_TYPES: raise avro.errors.SchemaParseException(f"{type_} is not a valid type.") self.set_prop("type", type_) self.type = type_ @@ -491,7 +490,7 @@ class PrimitiveSchema(EqualByPropsMixin, Schema): def __init__(self, type, other_props=None): # Ensure valid ctor args - if type not in PRIMITIVE_TYPES: + if type not in avro.constants.PRIMITIVE_TYPES: raise avro.errors.AvroException(f"{type} is not a valid primitive type.") # Call parent ctor @@ -870,8 +869,8 @@ def __init__(self, schemas, names=None, validate_names: bool = True): raise avro.errors.SchemaParseException(f"Union item must be a valid Avro schema: {e}") # check the new schema if ( - new_schema.type in VALID_TYPES - and new_schema.type not in NAMED_TYPES + new_schema.type in avro.constants.VALID_TYPES + and new_schema.type not in avro.constants.NAMED_TYPES and new_schema.type in [schema.type for schema in schema_objects] ): raise avro.errors.SchemaParseException(f"{new_schema.type} type already in Union") @@ -910,9 +909,7 @@ def to_canonical_json(self, names=None): def validate(self, datum): """Return the first branch schema of which datum is a valid example, else None.""" - for branch in self.schemas: - if branch.validate(datum) is not None: - return branch + return next((branch for branch in self.schemas if branch.validate(datum) is not None), None) class ErrorUnionSchema(UnionSchema): @@ -1243,7 +1240,7 @@ def make_avsc_object( if logical_schema is not None: return cast(Schema, logical_schema) - if type_ in NAMED_TYPES: + if type_ in avro.constants.NAMED_TYPES: name = json_data.get("name") if not isinstance(name, str): raise avro.errors.SchemaParseException(f"Name {name} must be a string, but it is {type(name)}.") @@ -1273,10 +1270,10 @@ def make_avsc_object( return RecordSchema(name, namespace, fields, names, type_, doc, other_props, validate_names) raise avro.errors.SchemaParseException(f"Unknown Named Type: {type_}") - if type_ in PRIMITIVE_TYPES: + if type_ in avro.constants.PRIMITIVE_TYPES: return PrimitiveSchema(type_, other_props) - if type_ in VALID_TYPES: + if type_ in avro.constants.VALID_TYPES: if type_ == "array": items = json_data.get("items") return ArraySchema(items, names, other_props, validate_names) @@ -1296,7 +1293,7 @@ def make_avsc_object( elif isinstance(json_data, list): return UnionSchema(json_data, names, validate_names=validate_names) # JSON string (primitive) - elif json_data in PRIMITIVE_TYPES: + elif json_data in avro.constants.PRIMITIVE_TYPES: return PrimitiveSchema(json_data) # not for us! fail_msg = f"Could not make an Avro Schema object from {json_data}" diff --git a/lang/py/avro/test/test_protocol.py b/lang/py/avro/test/test_protocol.py index b3cd7bd692a..82fec0dd3f6 100644 --- a/lang/py/avro/test/test_protocol.py +++ b/lang/py/avro/test/test_protocol.py @@ -392,7 +392,7 @@ def test_inner_namespace_not_rendered(self): self.assertEqual("com.acme.Greeting", proto.types[0].fullname) self.assertEqual("Greeting", proto.types[0].name) # but there shouldn't be 'namespace' rendered to json on the inner type - self.assertFalse("namespace" in proto.to_json()["types"][0]) + self.assertNotIn("namespace", proto.to_json()["types"][0]) class ProtocolParseTestCase(unittest.TestCase): diff --git a/lang/py/avro/test/test_schema.py b/lang/py/avro/test/test_schema.py index 668ca8258f2..eac76af2e8b 100644 --- a/lang/py/avro/test/test_schema.py +++ b/lang/py/avro/test/test_schema.py @@ -58,12 +58,12 @@ class InvalidTestSchema(TestSchema): valid = False -PRIMITIVE_EXAMPLES = [InvalidTestSchema('"True"')] # type: List[TestSchema] +PRIMITIVE_EXAMPLES: List[TestSchema] = [InvalidTestSchema('"True"')] PRIMITIVE_EXAMPLES.append(InvalidTestSchema("True")) PRIMITIVE_EXAMPLES.append(InvalidTestSchema('{"no_type": "test"}')) PRIMITIVE_EXAMPLES.append(InvalidTestSchema('{"type": "panther"}')) -PRIMITIVE_EXAMPLES.extend([ValidTestSchema(f'"{t}"') for t in avro.schema.PRIMITIVE_TYPES]) -PRIMITIVE_EXAMPLES.extend([ValidTestSchema({"type": t}) for t in avro.schema.PRIMITIVE_TYPES]) +PRIMITIVE_EXAMPLES.extend([ValidTestSchema(f'"{t}"') for t in avro.constants.PRIMITIVE_TYPES]) +PRIMITIVE_EXAMPLES.extend([ValidTestSchema({"type": t}) for t in avro.constants.PRIMITIVE_TYPES]) FIXED_EXAMPLES = [ ValidTestSchema({"type": "fixed", "name": "Test", "size": 1}), diff --git a/lang/py/avro/test/test_tether_task.py b/lang/py/avro/test/test_tether_task.py index 5a4e2b26dbe..35be22437dd 100644 --- a/lang/py/avro/test/test_tether_task.py +++ b/lang/py/avro/test/test_tether_task.py @@ -18,7 +18,6 @@ # limitations under the License. import io -import os import subprocess import sys import time diff --git a/lang/py/avro/test/test_tether_task_runner.py b/lang/py/avro/test/test_tether_task_runner.py index 7696161d3c9..8e52c985edd 100644 --- a/lang/py/avro/test/test_tether_task_runner.py +++ b/lang/py/avro/test/test_tether_task_runner.py @@ -19,7 +19,6 @@ import io import logging -import os import subprocess import sys import time @@ -47,7 +46,6 @@ def test1(self): pyfile = avro.test.mock_tether_parent.__file__ proc = subprocess.Popen([sys.executable, pyfile, "start_server", f"{parent_port}"]) - input_port = avro.tether.util.find_port() print(f"Mock server started process pid={proc.pid}") # Possible race condition? open tries to connect to the subprocess before the subprocess is fully started diff --git a/lang/py/avro/tether/__init__.py b/lang/py/avro/tether/__init__.py index 4875581f292..68df39ec4d4 100644 --- a/lang/py/avro/tether/__init__.py +++ b/lang/py/avro/tether/__init__.py @@ -27,3 +27,13 @@ ) from avro.tether.tether_task_runner import TaskRunner from avro.tether.util import find_port + +__all__ = ( + "HTTPRequestor", + "TaskRunner", + "TaskType", + "TetherTask", + "find_port", + "inputProtocol", + "outputProtocol", +) diff --git a/lang/py/avro/tether/tether_task.py b/lang/py/avro/tether/tether_task.py index c521fa56b4c..6caac6abe90 100644 --- a/lang/py/avro/tether/tether_task.py +++ b/lang/py/avro/tether/tether_task.py @@ -285,7 +285,7 @@ def configure(self, taskType, inSchemaText, outSchemaText): try: inSchema = avro.schema.parse(inSchemaText) - outSchema = avro.schema.parse(outSchemaText) + avro.schema.parse(outSchemaText) if taskType == TaskType.MAP: self.inReader = avro.io.DatumReader(writers_schema=inSchema, readers_schema=self.inschema) @@ -299,7 +299,7 @@ def configure(self, taskType, inSchemaText, outSchemaText): # determine which fields in the input record are they keys for the reducer self._red_fkeys = [f.name for f in self.midschema.fields if not (f.order == "ignore")] - except Exception as e: + except Exception: estr = traceback.format_exc() self.fail(estr) @@ -345,7 +345,7 @@ def input(self, data, count): self.reduceFlush(prev, self.outCollector) self.reduce(self.midRecord, self.outCollector) - except Exception as e: + except Exception: estr = traceback.format_exc() self.log.warning("failing: %s", estr) self.fail(estr) @@ -357,7 +357,7 @@ def complete(self): if (self.taskType == TaskType.REDUCE) and not (self.midRecord is None): try: self.reduceFlush(self.midRecord, self.outCollector) - except Exception as e: + except Exception: estr = traceback.format_exc() self.log.warning("failing: %s", estr) self.fail(estr) @@ -430,7 +430,7 @@ def fail(self, message): try: self.outputClient.request("fail", {"message": message}) - except Exception as e: + except Exception: self.log.exception("TetherTask.fail: an exception occured while trying to send the fail message to the output server.") self.close() @@ -441,7 +441,7 @@ def close(self): try: self.clienTransciever.close() - except Exception as e: + except Exception: # ignore exceptions pass diff --git a/lang/py/avro/tether/tether_task_runner.py b/lang/py/avro/tether/tether_task_runner.py index c1533b33353..410f6c00e4c 100644 --- a/lang/py/avro/tether/tether_task_runner.py +++ b/lang/py/avro/tether/tether_task_runner.py @@ -66,7 +66,7 @@ def invoke(self, message, request): self.log.info("TetherTaskRunner: Received partitions") try: self.task.partitions = request["partitions"] - except Exception as e: + except Exception: self.log.error("Exception occured while processing the partitions message: Message:\n%s", traceback.format_exc()) raise elif message.name == "input": diff --git a/lang/py/avro/utils.py b/lang/py/avro/utils.py index 76d8f6ec293..32e7f5aa36a 100644 --- a/lang/py/avro/utils.py +++ b/lang/py/avro/utils.py @@ -36,3 +36,5 @@ def _randbytes(n: int) -> bytes: randbytes = getattr(random, "randbytes", _randbytes) + +__all__ = ("randbytes", "TypedDict") diff --git a/lang/py/pyproject.toml b/lang/py/pyproject.toml index 675e976379a..bc4e5d4bce8 100644 --- a/lang/py/pyproject.toml +++ b/lang/py/pyproject.toml @@ -25,3 +25,12 @@ line-length = 150 [tool.isort] profile = 'black' + +[tool.autoflake] +expand-star-imports = true +recursive = true +# Put a name in __all_ to explicitly export something a module imports. +# This is clearer and will keep autoflake from trying to remove it. +remove-all-unused-imports = true +remove-duplicate-keys = true +remove-unused-variables = true diff --git a/lang/py/setup.cfg b/lang/py/setup.cfg index ad50a763da2..d6d910005b0 100644 --- a/lang/py/setup.cfg +++ b/lang/py/setup.cfg @@ -81,3 +81,8 @@ zstandard = zstandard [aliases] dist = sdist --dist-dir ../../dist/py + +[flake8] +# Flake8 doesn't support configuration in pyproject.toml +# Otherwise, configure it to be compatible with black in that file. +max-line-length = 150 diff --git a/lang/py/setup.py b/lang/py/setup.py index dee0e7f2ee9..d68073a7b6d 100755 --- a/lang/py/setup.py +++ b/lang/py/setup.py @@ -19,9 +19,7 @@ import distutils.errors -import glob import os -import subprocess import setuptools # type: ignore diff --git a/lang/py/tox.ini b/lang/py/tox.ini index ababc1411cb..e5d76e1eb47 100644 --- a/lang/py/tox.ini +++ b/lang/py/tox.ini @@ -69,10 +69,12 @@ commands_post = [testenv:lint] deps = + autoflake black isort commands_pre = commands = + autoflake --check-diff . black --check . isort --check-only . commands_post =