From 379410e9b1a5518737c66244e1456b5c865bad2e Mon Sep 17 00:00:00 2001 From: Zack Cerza Date: Fri, 14 Jul 2023 11:17:19 -0600 Subject: [PATCH 1/5] ansible.FailureAnalyzer: items -> values Signed-off-by: Zack Cerza --- teuthology/task/ansible.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/teuthology/task/ansible.py b/teuthology/task/ansible.py index 6ccdc7664..331c9b792 100644 --- a/teuthology/task/ansible.py +++ b/teuthology/task/ansible.py @@ -43,7 +43,7 @@ def analyze(self, failure_log): lines = set() if failure_obj is None: return lines - for host_obj in failure_obj.items(): + for host_obj in failure_obj.values(): lines = lines.union(self.analyze_host_record(host_obj)) return lines From 8260c34366a95b0f10ba94aedbe051b36ae034a0 Mon Sep 17 00:00:00 2001 From: Zack Cerza Date: Fri, 14 Jul 2023 11:37:33 -0600 Subject: [PATCH 2/5] ansible.FailureAnalyzer: Look for SSH errors Signed-off-by: Zack Cerza --- teuthology/task/ansible.py | 19 ++++++++++++++++--- teuthology/test/task/test_ansible.py | 8 ++++++++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/teuthology/task/ansible.py b/teuthology/task/ansible.py index 331c9b792..1ff833135 100644 --- a/teuthology/task/ansible.py +++ b/teuthology/task/ansible.py @@ -58,23 +58,36 @@ def analyze_host_record(self, record): lines.add(f"CPAN command failed: {cmd}") continue lines_to_analyze = result.get("stderr_lines", result["msg"].split("\n")) + lines_to_analyze.extend(result.get("err", "").split("\n")) for line in lines_to_analyze: - line = self.analyze_line(line) + line = self.analyze_line(line.strip()) if line: lines.add(line) return list(lines) def analyze_line(self, line): - # apt output sometimes contains warnings or suggestions. Those won't be - # helpful, so throw them out. if line.startswith("W: ") or line.endswith("?"): return "" + drop_phrases = [ + # apt output sometimes contains warnings or suggestions. Those won't be + # helpful, so throw them out. + r"^W: ", + r"\?$", + # some output from SSH is not useful + r"Warning: Permanently added .+ to the list of known hosts.", + r"^@+$", + ] + for phrase in drop_phrases: + match = re.search(rf"({phrase})", line, flags=re.IGNORECASE) + if match: + return "" # Next, we can normalize some common phrases. phrases = [ "connection timed out", r"(unable to|could not) connect to [^ ]+", r"temporary failure resolving [^ ]+", + r"Permissions \d+ for '.+' are too open.", ] for phrase in phrases: match = re.search(rf"({phrase})", line, flags=re.IGNORECASE) diff --git a/teuthology/test/task/test_ansible.py b/teuthology/test/task/test_ansible.py index 939ec3f93..a4a086700 100644 --- a/teuthology/test/task/test_ansible.py +++ b/teuthology/test/task/test_ansible.py @@ -34,6 +34,14 @@ class TestFailureAnalyzer: "E: Failed to fetch http://archive.ubuntu.com/ubuntu/pool/main/libb/libb-hooks-op-check-perl/libb-hooks-op-check-perl_0.22-1build2_amd64.deb Temporary failure resolving 'archive.ubuntu.com'", "Temporary failure resolving 'archive.ubuntu.com'" ], + [ + "Data could not be sent to remote host \"smithi068.front.sepia.ceph.com\".", + "Data could not be sent to remote host \"smithi068.front.sepia.ceph.com\"." + ], + [ + "Permissions 0644 for '/root/.ssh/id_rsa' are too open.", + "Permissions 0644 for '/root/.ssh/id_rsa' are too open." + ], ] ) def test_lines(self, line, result): From ad65ce07d4a0a5122fb0d5d7f8cbc1f6fc1dc95c Mon Sep 17 00:00:00 2001 From: Zack Cerza Date: Fri, 14 Jul 2023 11:38:54 -0600 Subject: [PATCH 3/5] Ansible._handle_failure: YAMLErrors are special Return to treating them differently, but also continue to catch other exceptions here. Signed-off-by: Zack Cerza --- teuthology/task/ansible.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/teuthology/task/ansible.py b/teuthology/task/ansible.py index 1ff833135..aa605d2aa 100644 --- a/teuthology/task/ansible.py +++ b/teuthology/task/ansible.py @@ -376,12 +376,12 @@ def _handle_failure(self, command, status): try: analyzer = FailureAnalyzer() failures = analyzer.analyze(fail_log) - except Exception as e: + except yaml.YAMLError as e: log.error( - "Failed to parse ansible failure log: {0} ({1})".format( - self.failure_log.name, e - ) + f"Failed to parse ansible failure log: {self.failure_log.name} ({e})" ) + except Exception: + log.exception(f"Failed to analyze ansible failure log: {self.failure_log.name}") # If we hit an exception, or if analyze() returned nothing, use the log as-is if not failures: failures = fail_log.replace('\n', '') From 1c413be82761863722fd2dabb91597ae7dac3f93 Mon Sep 17 00:00:00 2001 From: Zack Cerza Date: Fri, 14 Jul 2023 11:50:24 -0600 Subject: [PATCH 4/5] ansible.FailureAnalyzer: Drop malformed records If host_obj is the wrong type, we won't be able to extract anything useful from it. In these cases, we'll end up using the raw string as we used to do. Signed-off-by: Zack Cerza --- teuthology/task/ansible.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/teuthology/task/ansible.py b/teuthology/task/ansible.py index aa605d2aa..ea0f9c594 100644 --- a/teuthology/task/ansible.py +++ b/teuthology/task/ansible.py @@ -44,6 +44,8 @@ def analyze(self, failure_log): if failure_obj is None: return lines for host_obj in failure_obj.values(): + if not isinstance(host_obj, dict): + continue lines = lines.union(self.analyze_host_record(host_obj)) return lines @@ -57,7 +59,11 @@ def analyze_host_record(self, record): if "cpan" in cmd: lines.add(f"CPAN command failed: {cmd}") continue - lines_to_analyze = result.get("stderr_lines", result["msg"].split("\n")) + lines_to_analyze = [] + if "stderr_lines" in result: + lines_to_analyze = result["stderr_lines"] + elif "msg" in result: + lines_to_analyze = result["msg"].split("\n") lines_to_analyze.extend(result.get("err", "").split("\n")) for line in lines_to_analyze: line = self.analyze_line(line.strip()) From 4d5f75b7f28a873e175000d275286b99775fe8dd Mon Sep 17 00:00:00 2001 From: Zack Cerza Date: Fri, 14 Jul 2023 12:01:35 -0600 Subject: [PATCH 5/5] TestFailureAnalyzer: Add tests for dropped items Signed-off-by: Zack Cerza --- teuthology/test/task/test_ansible.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/teuthology/test/task/test_ansible.py b/teuthology/test/task/test_ansible.py index a4a086700..9f378b480 100644 --- a/teuthology/test/task/test_ansible.py +++ b/teuthology/test/task/test_ansible.py @@ -26,6 +26,14 @@ class TestFailureAnalyzer: @mark.parametrize( 'line,result', [ + [ + "W: --force-yes is deprecated, use one of the options starting with --allow instead.", + "", + ], + [ + "E: Unable to fetch some archives, maybe run apt-get update or try with --fix-missing?", + "", + ], [ "E: Failed to fetch http://security.ubuntu.com/ubuntu/pool/main/a/apache2/apache2-bin_2.4.41-4ubuntu3.14_amd64.deb Unable to connect to archive.ubuntu.com:http:", "Unable to connect to archive.ubuntu.com:http:"