diff --git a/examples/arm/aot_arm_compiler.py b/examples/arm/aot_arm_compiler.py index 29cc0c30c7..0a96f41ca8 100644 --- a/examples/arm/aot_arm_compiler.py +++ b/examples/arm/aot_arm_compiler.py @@ -25,7 +25,11 @@ from executorch.backends.arm.util.arm_model_evaluator import GenericModelEvaluator from executorch.devtools.backend_debug import get_delegation_info -from executorch.exir import EdgeCompileConfig, ExecutorchBackendConfig +from executorch.exir import ( + EdgeCompileConfig, + ExecutorchBackendConfig, + to_edge_transform_and_lower, +) from executorch.extension.export_util.utils import export_to_edge, save_pte_program from tabulate import tabulate @@ -185,7 +189,7 @@ def get_compile_spec(target: str, intermediates: bool) -> ArmCompileSpecBuilder: memory_mode="Shared_Sram", extra_flags="--debug-force-regor --output-format=raw", ) - .set_permute_memory_format(args.model_name in MODEL_NAME_TO_MODEL.keys()) + .set_permute_memory_format(True) .set_quantize_io(True) ) elif "ethos-u85" in target: @@ -202,7 +206,7 @@ def get_compile_spec(target: str, intermediates: bool) -> ArmCompileSpecBuilder: ) if intermediates is not None: - spec_builder.dump_intermediate_artifacts_to(args.intermediates) + spec_builder.dump_intermediate_artifacts_to(intermediates) return spec_builder.build() @@ -356,40 +360,42 @@ def get_args(): model, example_inputs = get_model_and_inputs_from_name(args.model_name) model = model.eval() + # export_for_training under the assumption we quantize, the exported form also works + # in to_edge if we don't quantize + exported_program = torch.export.export_for_training(model, example_inputs) + model = exported_program.module() model_fp32 = model - # pre-autograd export. eventually this will become torch.export - model = torch.export.export_for_training(model, example_inputs).module() - # Quantize if required model_int8 = None if args.quantize: model = quantize(model, example_inputs) model_int8 = model + # Wrap quantized model back into an exported_program + exported_program = torch.export.export_for_training(model, example_inputs) + + if args.delegate: + # As we can target multiple output encodings from ArmBackend, one must + # be specified. + compile_spec = get_compile_spec(args.target, args.intermediates) + edge = to_edge_transform_and_lower( + exported_program, + partitioner=[ArmPartitioner(compile_spec)], + compile_config=EdgeCompileConfig( + _check_ir_validity=False, + _skip_dim_order=True, + ), + ) + else: + edge = to_edge_transform_and_lower( + exported_program, + compile_config=EdgeCompileConfig( + _check_ir_validity=False, + _skip_dim_order=True, + ), + ) - edge = export_to_edge( - model, - example_inputs, - edge_compile_config=EdgeCompileConfig( - _check_ir_validity=False, - ), - ) - - # As we can target multiple output encodings from ArmBackend, one must - # be specified. - compile_spec = ( - get_compile_spec(args.target, args.intermediates) - if args.delegate is True - else None - ) - - logging.debug(f"Exported graph:\n{edge.exported_program().graph}") - if args.delegate is True: - edge = edge.to_backend(ArmPartitioner(compile_spec)) - - dump_delegation_info(edge, args.intermediates) - - logging.debug(f"Lowered graph:\n{edge.exported_program().graph}") + dump_delegation_info(edge, args.intermediates) try: exec_prog = edge.to_executorch(