-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Programs to "sort" and compare IFEX content
stable_sort_ifex defines a known order of keys so that IFEX files can be reliably compared. It is a module but can also be run as a standalone script. The diff_ifex.py script will print the diff between two files, after normalizing the order of the content using the stable_sort_ifex module. It can also be used with an external diff program. Signed-off-by: Gunnar Andersson <gunnar_dev@[email protected]>
- Loading branch information
Showing
2 changed files
with
210 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
#!/usr/bin/python | ||
# SPDX-FileCopyrightText: Copyright (c) 2023 Novaspring AB | ||
# SPDX-License-Identifier: MPL-2.0 | ||
|
||
# This file is part of the IFEX project | ||
|
||
from collections import OrderedDict | ||
import argparse | ||
import sys | ||
import yaml | ||
|
||
# This file formats a YAML input in a fixed order ("sorted", basically). | ||
# This facilitates reliable comparisons / diffing. | ||
|
||
# The implementation is separated from diff/comparison-scripts to factor | ||
# it out of that code. The first implementation is simple but it might be | ||
# refined over time. | ||
|
||
|
||
def ifex_stable_order(data): | ||
# To define a stable order we need two things. | ||
# 1. Use an OrderedDict instead of a normal dict | ||
# 2. Fill the dict by inputting the keys in the right order. | ||
if isinstance(data, dict): | ||
ordered_data = OrderedDict() | ||
# Insert the name first, but only if the node has a name. | ||
name = data.get("name") | ||
if name is not None: | ||
ordered_data["name"] = name | ||
|
||
# (Note: Make sure to use a loop instead of dict comprehension here | ||
# because dict comprehension supposedly does not guarantee key order?) | ||
for key in sorted(data.keys()): | ||
ordered_data[key] = ifex_stable_order(data[key]) | ||
|
||
return ordered_data | ||
|
||
elif isinstance(data, list): | ||
return [ifex_stable_order(item) for item in data] | ||
|
||
else: | ||
return data | ||
|
||
|
||
# If an ordered dict is printed as a normal dict we get a lot of unrelated | ||
# metadata output. Therefore, we need to specify how PyYAML shall represent an | ||
# ordered dict: (PyYAML does not seemingly have OrderedDict support built in...?) | ||
# | ||
# Solution from: | ||
# https://stackoverflow.com/questions/16782112/can-pyyaml-dump-dict-items-in-non-alphabetical-order | ||
def represent_ordereddict(dumper, data): | ||
value = [] | ||
|
||
for key, val in data.items(): | ||
node_key = dumper.represent_data(key) | ||
node_val = dumper.represent_data(val) | ||
value.append((node_key, node_val)) | ||
|
||
return yaml.nodes.MappingNode("tag:yaml.org,2002:map", value) | ||
|
||
|
||
# --------------------------------------------------------------------- | ||
# MAIN, used if this file is run standalone | ||
# --------------------------------------------------------------------- | ||
def usage(): | ||
print( | ||
""" | ||
This script reorders IFEX (YAML) input into a stable ("sorted") order and prints the result back out. | ||
The stable order is basically: | ||
0. Comments have no semantic meaning so they will be filtered out completely | ||
1. For dicts with key-value mappings, put the item 'name' first (if there is a key for 'name') | ||
2. Then, all other keys in alphabetical order | ||
3. Lists are not re-arranged (TODO: consider if lists should be sorted "by name" somehow?) | ||
4. Anything else remains in the input order. | ||
""" | ||
) | ||
|
||
|
||
def main(): | ||
# Create the parser | ||
parser = argparse.ArgumentParser( | ||
description='Reorder IFEX (YAML) input ) input into a stable ("sorted") order and prints the result back out.' | ||
) | ||
# Add the arguments | ||
parser.add_argument("file1", help="Input file. (- to use STDIN)", nargs="?") | ||
|
||
# Parse the arguments | ||
args = parser.parse_args() | ||
|
||
if args.file1 is None: | ||
parser.print_help() | ||
usage() | ||
sys.exit(1) | ||
|
||
if args.file1 == "-": | ||
# Use STDIN if file is '-' | ||
data = yaml.safe_load(sys.stdin) | ||
out = ifex_stable_order(data) | ||
else: | ||
with open(args.file1, "r") as file: | ||
data = yaml.safe_load(file) | ||
out = ifex_stable_order(data) | ||
|
||
yaml.add_representer(OrderedDict, represent_ordereddict) | ||
print(yaml.dump(out, sort_keys=False)) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
#!/usr/bin/python | ||
|
||
# SPDX-FileCopyrightText: Copyright (c) 2023 Novaspring AB | ||
# SPDX-License-Identifier: MPL-2.0 | ||
|
||
# This file is part of the IFEX project | ||
|
||
import argparse | ||
import subprocess | ||
import tempfile | ||
import yaml | ||
from collections import OrderedDict | ||
from ifex.model.stable_sort_ifex import ifex_stable_order, represent_ordereddict | ||
|
||
# The program compares two IFEX (YAML) files after normalizing ("sorting", | ||
# basically) the order of elements so that the comparison becomes more relevant. | ||
|
||
# The normal unix diff command seems to give the most useful output: | ||
def diff_files_with_external_program(path1, path2): | ||
"""Run standard unix diff program on the given paths""" | ||
# diff returns an error code if there is a difference => use run with check | ||
# False to ignore the error, (instead of check_output()) | ||
return subprocess.run( | ||
["diff", path1, path2], text=True, check=False, stdout=subprocess.PIPE | ||
).stdout | ||
|
||
|
||
# Alternative, using difflib | ||
def diff_files(path1, path2): | ||
import difflib | ||
|
||
"""Use difflib to print the difference between the given files""" | ||
with open(path1, "r") as f1: | ||
l1 = f1.readlines() | ||
with open(path2, "r") as f2: | ||
l2 = f2.readlines() | ||
|
||
for line in difflib.context_diff(l1, l2): | ||
print(line, end="") | ||
|
||
|
||
def stable_order_file(file1): | ||
"""Writes a new file containing the YAML content with keys in order, and | ||
returns the file name""" | ||
with open(file1, "r") as f1: | ||
with tempfile.NamedTemporaryFile("w", delete=False) as f2: | ||
yaml.add_representer(OrderedDict, represent_ordereddict) | ||
f2.write(yaml.dump(ifex_stable_order(yaml.safe_load(f1)), sort_keys=False)) | ||
return f2.name | ||
|
||
return None # Will fail on exception before this | ||
|
||
|
||
def compare_yaml_files(file1, file2): | ||
"""Order the keys of the given file names, write them to new temporary | ||
files, then diff the results""" | ||
f1 = stable_order_file(file1) | ||
f2 = stable_order_file(file2) | ||
|
||
print("Stable sorting...") | ||
print(f"temporary files are {file1} -> {f1}, {file2} -> {f2}") | ||
print("Comparing files:") | ||
return diff_files_with_external_program(f1, f2) | ||
|
||
|
||
# --------------------------------------------------------------------- | ||
# MAIN, used if this file is run standalone | ||
# --------------------------------------------------------------------- | ||
|
||
|
||
def main(): | ||
# Create the parser | ||
parser = argparse.ArgumentParser( | ||
description="Compare IFEX (YAML) file contents, after normalizing order of elements." | ||
) | ||
|
||
# Add the arguments | ||
parser.add_argument("file1", help="First, original file") | ||
parser.add_argument("file2", help="Second, possibly changed file") | ||
parser.add_argument( | ||
"-p", | ||
action="store_true", | ||
default=False, | ||
help="Only print the created temporary file paths, for use with an external diff program", | ||
) | ||
|
||
# Parse the arguments | ||
args = parser.parse_args() | ||
|
||
# If print filenames only | ||
if args.p: | ||
print(stable_order_file(args.file1)) | ||
print(stable_order_file(args.file2)) | ||
return | ||
|
||
# Otherwise, output diff as well | ||
print(compare_yaml_files(args.file1, args.file2)) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |