diff --git a/.gitlab-ci/lava/utils/constants.py b/.gitlab-ci/lava/utils/constants.py index 2a9d1547037..6357e9aad1b 100644 --- a/.gitlab-ci/lava/utils/constants.py +++ b/.gitlab-ci/lava/utils/constants.py @@ -12,3 +12,6 @@ JOB_PRIORITY = int(getenv("JOB_PRIORITY", 75)) # Use UART over the default SSH mechanism to follow logs. # Caution: this can lead to device silence in some devices in Mesa CI. FORCE_UART = bool(getenv("LAVA_FORCE_UART", False)) + +# How many times the r8152 error may happen to consider it a known issue. +KNOWN_ISSUE_R8152_MAX_CONSECUTIVE_COUNTER: int = 10 diff --git a/.gitlab-ci/lava/utils/lava_log_hints.py b/.gitlab-ci/lava/utils/lava_log_hints.py index b147a8747ea..87e19507a2f 100644 --- a/.gitlab-ci/lava/utils/lava_log_hints.py +++ b/.gitlab-ci/lava/utils/lava_log_hints.py @@ -9,13 +9,14 @@ if TYPE_CHECKING: from lava.exceptions import MesaCIKnownIssueException from lava.utils.console_format import CONSOLE_LOG +from lava.utils.constants import KNOWN_ISSUE_R8152_MAX_CONSECUTIVE_COUNTER from lava.utils.log_section import LogSectionType @dataclass class LAVALogHints: log_follower: LogFollower - has_r8152_issue_history: bool = field(default=False, init=False) + r8152_issue_consecutive_counter: int = field(default=0, init=False) def detect_failure(self, new_lines: list[dict[str, Any]]): for line in new_lines: @@ -23,21 +24,22 @@ class LAVALogHints: def detect_r8152_issue(self, line): if ( - self.log_follower.phase == LogSectionType.TEST_CASE - and line["lvl"] == "target" + self.log_follower.phase == LogSectionType.TEST_CASE and line["lvl"] == "target" ): if re.search(r"r8152 \S+ eth0: Tx status -71", line["msg"]): - self.has_r8152_issue_history = True + self.r8152_issue_consecutive_counter += 1 return - if self.has_r8152_issue_history and re.search( - r"nfs: server \d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} not responding, still trying", - line["msg"], - ): - raise MesaCIKnownIssueException( - f"{CONSOLE_LOG['FG_MAGENTA']}" - "Probable network issue failure encountered, retrying the job" - f"{CONSOLE_LOG['RESET']}" - ) + if self.r8152_issue_consecutive_counter >= KNOWN_ISSUE_R8152_MAX_CONSECUTIVE_COUNTER: + if re.search( + r"nfs: server \d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} not responding, still trying", + line["msg"], + ): + raise MesaCIKnownIssueException( + f"{CONSOLE_LOG['FG_MAGENTA']}" + "Probable network issue failure encountered, retrying the job" + f"{CONSOLE_LOG['RESET']}" + ) - self.has_r8152_issue_history = False + # Reset the status, as the `nfs... still trying` complaint was not detected + self.r8152_issue_consecutive_counter = 0 diff --git a/.gitlab-ci/tests/utils/test_lava_log.py b/.gitlab-ci/tests/utils/test_lava_log.py index e74aaf2fead..5abcd24c56d 100644 --- a/.gitlab-ci/tests/utils/test_lava_log.py +++ b/.gitlab-ci/tests/utils/test_lava_log.py @@ -16,6 +16,7 @@ from lava.utils import ( fix_lava_gitlab_section_log, hide_sensitive_data, ) +from lava.utils.constants import KNOWN_ISSUE_R8152_MAX_CONSECUTIVE_COUNTER from ..lava.helpers import create_lava_yaml_msg, does_not_raise, lava_yaml, yaml_dump @@ -312,9 +313,9 @@ def test_gitlab_section_id(case_name, expected_id): A618_NETWORK_ISSUE_LOGS = [ - create_lava_yaml_msg( + *(KNOWN_ISSUE_R8152_MAX_CONSECUTIVE_COUNTER*[create_lava_yaml_msg( msg="[ 1733.599402] r8152 2-1.3:1.0 eth0: Tx status -71", lvl="target" - ), + )]), create_lava_yaml_msg( msg="[ 1733.604506] nfs: server 192.168.201.1 not responding, still trying", lvl="target",