ci/lava: Turn the r8152 issue check into a counter

We were just detecting if a log like
[  143.080663] r8152 2-1.3:1.0 eth0: Tx status -71
happened once before
[  316.389695] nfs: server 192.168.201.1 not responding, still trying

But we can use a counter to be more assured that the device is
struggling to recover and we can add let this detection happen during
the boot phase.

This mimics how other freedreno devices deal with this problem, see
`cros_servo_run.py:64` for example.

Signed-off-by: Guilherme Gallo <guilherme.gallo@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27081>
This commit is contained in:
Guilherme Gallo
2024-01-12 15:24:04 -03:00
committed by Marge Bot
parent 9a6ac1dd2f
commit bfd50f72eb
3 changed files with 22 additions and 16 deletions

View File

@@ -12,3 +12,6 @@ JOB_PRIORITY = int(getenv("JOB_PRIORITY", 75))
# Use UART over the default SSH mechanism to follow logs.
# Caution: this can lead to device silence in some devices in Mesa CI.
FORCE_UART = bool(getenv("LAVA_FORCE_UART", False))
# How many times the r8152 error may happen to consider it a known issue.
KNOWN_ISSUE_R8152_MAX_CONSECUTIVE_COUNTER: int = 10

View File

@@ -9,13 +9,14 @@ if TYPE_CHECKING:
from lava.exceptions import MesaCIKnownIssueException
from lava.utils.console_format import CONSOLE_LOG
from lava.utils.constants import KNOWN_ISSUE_R8152_MAX_CONSECUTIVE_COUNTER
from lava.utils.log_section import LogSectionType
@dataclass
class LAVALogHints:
log_follower: LogFollower
has_r8152_issue_history: bool = field(default=False, init=False)
r8152_issue_consecutive_counter: int = field(default=0, init=False)
def detect_failure(self, new_lines: list[dict[str, Any]]):
for line in new_lines:
@@ -23,14 +24,14 @@ class LAVALogHints:
def detect_r8152_issue(self, line):
if (
self.log_follower.phase == LogSectionType.TEST_CASE
and line["lvl"] == "target"
self.log_follower.phase == LogSectionType.TEST_CASE and line["lvl"] == "target"
):
if re.search(r"r8152 \S+ eth0: Tx status -71", line["msg"]):
self.has_r8152_issue_history = True
self.r8152_issue_consecutive_counter += 1
return
if self.has_r8152_issue_history and re.search(
if self.r8152_issue_consecutive_counter >= KNOWN_ISSUE_R8152_MAX_CONSECUTIVE_COUNTER:
if re.search(
r"nfs: server \d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} not responding, still trying",
line["msg"],
):
@@ -40,4 +41,5 @@ class LAVALogHints:
f"{CONSOLE_LOG['RESET']}"
)
self.has_r8152_issue_history = False
# Reset the status, as the `nfs... still trying` complaint was not detected
self.r8152_issue_consecutive_counter = 0

View File

@@ -16,6 +16,7 @@ from lava.utils import (
fix_lava_gitlab_section_log,
hide_sensitive_data,
)
from lava.utils.constants import KNOWN_ISSUE_R8152_MAX_CONSECUTIVE_COUNTER
from ..lava.helpers import create_lava_yaml_msg, does_not_raise, lava_yaml, yaml_dump
@@ -312,9 +313,9 @@ def test_gitlab_section_id(case_name, expected_id):
A618_NETWORK_ISSUE_LOGS = [
create_lava_yaml_msg(
*(KNOWN_ISSUE_R8152_MAX_CONSECUTIVE_COUNTER*[create_lava_yaml_msg(
msg="[ 1733.599402] r8152 2-1.3:1.0 eth0: Tx status -71", lvl="target"
),
)]),
create_lava_yaml_msg(
msg="[ 1733.604506] nfs: server 192.168.201.1 not responding, still trying",
lvl="target",