ci/lava: Only parse result within testcase section
This commit fixes an issue related to leftover between jobs in the same device under test in LAVA. There is a possibility of having the resulting output being dumped just after the boot, such as this job: https://gitlab.freedesktop.org/mesa/mesa/-/jobs/25674303#L155 Signed-off-by: Guilherme Gallo <guilherme.gallo@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17752>
This commit is contained in:

committed by
Marge Bot

parent
25dcb8d201
commit
2cb71ac530
@@ -390,7 +390,14 @@ def fetch_logs(job, max_idle_time, log_follower) -> None:
|
||||
job.heartbeat()
|
||||
parsed_lines = log_follower.flush()
|
||||
|
||||
parsed_lines = job.parse_job_result_from_log(parsed_lines)
|
||||
# Only parse job results when the script reaches the end of the logs.
|
||||
# Depending on how much payload the RPC scheduler.jobs.logs get, it may
|
||||
# reach the LAVA_POST_PROCESSING phase.
|
||||
if log_follower.current_section.type in (
|
||||
LogSectionType.TEST_CASE,
|
||||
LogSectionType.LAVA_POST_PROCESSING,
|
||||
):
|
||||
parsed_lines = job.parse_job_result_from_log(parsed_lines)
|
||||
|
||||
for line in parsed_lines:
|
||||
print_log(line)
|
||||
|
@@ -1,7 +1,7 @@
|
||||
from contextlib import nullcontext as does_not_raise
|
||||
from datetime import datetime
|
||||
from itertools import cycle
|
||||
from typing import Callable, Generator, Iterable, Tuple, Union
|
||||
from typing import Callable, Generator, Iterable, Optional, Tuple, Union
|
||||
|
||||
import yaml
|
||||
from freezegun import freeze_time
|
||||
@@ -34,6 +34,8 @@ def generate_testsuite_result(
|
||||
if extra is None:
|
||||
extra = {}
|
||||
return {"metadata": {"result": result, **metadata_extra}, "name": name}
|
||||
|
||||
|
||||
def jobs_logs_response(
|
||||
finished=False, msg=None, lvl="target", result=None
|
||||
) -> Tuple[bool, str]:
|
||||
@@ -48,12 +50,19 @@ def jobs_logs_response(
|
||||
|
||||
|
||||
def section_aware_message_generator(
|
||||
messages: dict[LogSectionType, Iterable[int]]
|
||||
messages: dict[LogSectionType, Iterable[int]], result: Optional[str] = None
|
||||
) -> Iterable[tuple[dict, Iterable[int]]]:
|
||||
default = [1]
|
||||
|
||||
result_message_section = LogSectionType.TEST_CASE
|
||||
|
||||
for section_type in LogSectionType:
|
||||
delay = messages.get(section_type, default)
|
||||
yield mock_lava_signal(section_type), delay
|
||||
if result and section_type == result_message_section:
|
||||
# To consider the job finished, the result `echo` should be produced
|
||||
# in the correct section
|
||||
yield create_lava_yaml_msg(msg=f"hwci: mesa: {result}"), delay
|
||||
|
||||
|
||||
def message_generator():
|
||||
@@ -98,24 +107,23 @@ def generate_n_logs(
|
||||
|
||||
def to_iterable(tick_fn):
|
||||
if isinstance(tick_fn, Generator):
|
||||
tick_gen = tick_fn
|
||||
return tick_fn
|
||||
elif isinstance(tick_fn, Iterable):
|
||||
tick_gen = cycle(tick_fn)
|
||||
return cycle(tick_fn)
|
||||
else:
|
||||
tick_gen = cycle((tick_fn,))
|
||||
return tick_gen
|
||||
return cycle((tick_fn,))
|
||||
|
||||
|
||||
def mock_logs(messages={}, result="pass"):
|
||||
def mock_logs(messages=None, result=None):
|
||||
if messages is None:
|
||||
messages = {}
|
||||
with freeze_time(datetime.now()) as time_travel:
|
||||
# Simulate a complete run given by message_fn
|
||||
for msg, tick_list in section_aware_message_generator(messages):
|
||||
for msg, tick_list in section_aware_message_generator(messages, result):
|
||||
for tick_sec in tick_list:
|
||||
yield jobs_logs_response(finished=False, msg=[msg])
|
||||
time_travel.tick(tick_sec)
|
||||
|
||||
yield jobs_logs_response(finished=True, result="pass")
|
||||
|
||||
|
||||
def mock_lava_signal(type: LogSectionType) -> dict[str, str]:
|
||||
return {
|
||||
|
@@ -8,7 +8,7 @@
|
||||
import xmlrpc.client
|
||||
from contextlib import nullcontext as does_not_raise
|
||||
from datetime import datetime
|
||||
from itertools import repeat
|
||||
from itertools import chain, repeat
|
||||
|
||||
import pytest
|
||||
from lava.exceptions import MesaCIException, MesaCIRetryError
|
||||
@@ -22,10 +22,10 @@ from lava.lava_job_submitter import (
|
||||
from lava.utils import LogSectionType
|
||||
|
||||
from .lava.helpers import (
|
||||
create_lava_yaml_msg,
|
||||
generate_n_logs,
|
||||
generate_testsuite_result,
|
||||
jobs_logs_response,
|
||||
mock_lava_signal,
|
||||
mock_logs,
|
||||
section_timeout,
|
||||
)
|
||||
@@ -36,7 +36,7 @@ NUMBER_OF_MAX_ATTEMPTS = NUMBER_OF_RETRIES_TIMEOUT_DETECTION + 1
|
||||
@pytest.fixture
|
||||
def mock_proxy_waiting_time(mock_proxy):
|
||||
def update_mock_proxy(frozen_time, **kwargs):
|
||||
wait_time = kwargs.pop("wait_time", 0)
|
||||
wait_time = kwargs.pop("wait_time", 1)
|
||||
proxy_mock = mock_proxy(**kwargs)
|
||||
proxy_job_state = proxy_mock.scheduler.job_state
|
||||
proxy_job_state.return_value = {"job_state": "Running"}
|
||||
@@ -59,57 +59,79 @@ NETWORK_EXCEPTION = xmlrpc.client.ProtocolError("", 0, "test", {})
|
||||
XMLRPC_FAULT = xmlrpc.client.Fault(0, "test")
|
||||
|
||||
PROXY_SCENARIOS = {
|
||||
"finish case": (generate_n_logs(1), does_not_raise(), True, {}),
|
||||
"simple pass case": (mock_logs(result="pass"), does_not_raise(), "pass", {}),
|
||||
"simple fail case": (mock_logs(result="fail"), does_not_raise(), "fail", {}),
|
||||
"simple hung case": (
|
||||
mock_logs(
|
||||
messages={
|
||||
LogSectionType.TEST_CASE: [
|
||||
section_timeout(LogSectionType.TEST_CASE) + 1
|
||||
]
|
||||
* 1000
|
||||
},
|
||||
result="fail",
|
||||
),
|
||||
pytest.raises(MesaCIRetryError),
|
||||
"hung",
|
||||
{},
|
||||
),
|
||||
"leftover dump from last job in boot section": (
|
||||
(
|
||||
mock_lava_signal(LogSectionType.LAVA_BOOT),
|
||||
jobs_logs_response(finished=False, msg=None, result="fail"),
|
||||
),
|
||||
pytest.raises(MesaCIRetryError),
|
||||
"hung",
|
||||
{},
|
||||
),
|
||||
"boot works at last retry": (
|
||||
mock_logs(
|
||||
{
|
||||
messages={
|
||||
LogSectionType.LAVA_BOOT: [
|
||||
section_timeout(LogSectionType.LAVA_BOOT) + 1
|
||||
]
|
||||
* NUMBER_OF_RETRIES_TIMEOUT_DETECTION
|
||||
+ [1]
|
||||
},
|
||||
result="pass",
|
||||
),
|
||||
does_not_raise(),
|
||||
True,
|
||||
"pass",
|
||||
{},
|
||||
),
|
||||
"post process test case took too long": pytest.param(
|
||||
"test case took too long": pytest.param(
|
||||
mock_logs(
|
||||
{
|
||||
LogSectionType.LAVA_POST_PROCESSING: [
|
||||
section_timeout(LogSectionType.LAVA_POST_PROCESSING) + 1
|
||||
messages={
|
||||
LogSectionType.TEST_CASE: [
|
||||
section_timeout(LogSectionType.TEST_CASE) + 1
|
||||
]
|
||||
* (NUMBER_OF_MAX_ATTEMPTS + 1)
|
||||
},
|
||||
result="pass",
|
||||
),
|
||||
pytest.raises(MesaCIRetryError),
|
||||
True,
|
||||
"pass",
|
||||
{},
|
||||
marks=pytest.mark.xfail(
|
||||
reason=(
|
||||
"The time travel mock is not behaving as expected. "
|
||||
"It makes a gitlab section end in the past when an "
|
||||
"exception happens."
|
||||
)
|
||||
),
|
||||
),
|
||||
"timed out more times than retry attempts": (
|
||||
generate_n_logs(n=4, tick_fn=9999999),
|
||||
pytest.raises(MesaCIRetryError),
|
||||
False,
|
||||
"fail",
|
||||
{},
|
||||
),
|
||||
"long log case, no silence": (
|
||||
generate_n_logs(n=1000, tick_fn=0),
|
||||
mock_logs(
|
||||
messages={LogSectionType.TEST_CASE: [1] * (1000)},
|
||||
result="pass",
|
||||
),
|
||||
does_not_raise(),
|
||||
True,
|
||||
"pass",
|
||||
{},
|
||||
),
|
||||
"no retries, testsuite succeed": (
|
||||
generate_n_logs(n=1, tick_fn=0),
|
||||
mock_logs(result="pass"),
|
||||
does_not_raise(),
|
||||
True,
|
||||
"pass",
|
||||
{
|
||||
"testsuite_results": [
|
||||
generate_testsuite_result(result="pass")
|
||||
@@ -117,9 +139,9 @@ PROXY_SCENARIOS = {
|
||||
},
|
||||
),
|
||||
"no retries, but testsuite fails": (
|
||||
generate_n_logs(n=1, tick_fn=0, result="fail"),
|
||||
mock_logs(result="fail"),
|
||||
does_not_raise(),
|
||||
False,
|
||||
"fail",
|
||||
{
|
||||
"testsuite_results": [
|
||||
generate_testsuite_result(result="fail")
|
||||
@@ -129,7 +151,7 @@ PROXY_SCENARIOS = {
|
||||
"no retries, one testsuite fails": (
|
||||
generate_n_logs(n=1, tick_fn=0, result="fail"),
|
||||
does_not_raise(),
|
||||
False,
|
||||
"fail",
|
||||
{
|
||||
"testsuite_results": [
|
||||
generate_testsuite_result(result="fail"),
|
||||
@@ -140,21 +162,21 @@ PROXY_SCENARIOS = {
|
||||
"very long silence": (
|
||||
generate_n_logs(n=NUMBER_OF_MAX_ATTEMPTS + 1, tick_fn=100000),
|
||||
pytest.raises(MesaCIRetryError),
|
||||
False,
|
||||
"fail",
|
||||
{},
|
||||
),
|
||||
# If a protocol error happens, _call_proxy will retry without affecting timeouts
|
||||
"unstable connection, ProtocolError followed by final message": (
|
||||
(NETWORK_EXCEPTION, jobs_logs_response(finished=True, result="pass")),
|
||||
(NETWORK_EXCEPTION, *list(mock_logs(result="pass"))),
|
||||
does_not_raise(),
|
||||
True,
|
||||
"pass",
|
||||
{},
|
||||
),
|
||||
# After an arbitrary number of retries, _call_proxy should call sys.exit
|
||||
"unreachable case, subsequent ProtocolErrors": (
|
||||
repeat(NETWORK_EXCEPTION),
|
||||
pytest.raises(SystemExit),
|
||||
False,
|
||||
"fail",
|
||||
{},
|
||||
),
|
||||
"XMLRPC Fault": ([XMLRPC_FAULT], pytest.raises(SystemExit, match="1"), False, {}),
|
||||
@@ -177,17 +199,15 @@ def test_retriable_follow_job(
|
||||
with expectation:
|
||||
proxy = mock_proxy(side_effect=test_log, **proxy_args)
|
||||
job: LAVAJob = retriable_follow_job(proxy, "")
|
||||
assert job_result == (job.status == "pass")
|
||||
assert job_result == job.status
|
||||
|
||||
|
||||
WAIT_FOR_JOB_SCENARIOS = {
|
||||
"one log run taking (sec):": (generate_n_logs(1), True),
|
||||
}
|
||||
WAIT_FOR_JOB_SCENARIOS = {"one log run taking (sec):": (mock_logs(result="pass"))}
|
||||
|
||||
|
||||
@pytest.mark.parametrize("wait_time", (0, DEVICE_HANGING_TIMEOUT_SEC * 2))
|
||||
@pytest.mark.parametrize("wait_time", (DEVICE_HANGING_TIMEOUT_SEC * 2,))
|
||||
@pytest.mark.parametrize(
|
||||
"side_effect, has_finished",
|
||||
"side_effect",
|
||||
WAIT_FOR_JOB_SCENARIOS.values(),
|
||||
ids=WAIT_FOR_JOB_SCENARIOS.keys(),
|
||||
)
|
||||
@@ -195,7 +215,6 @@ def test_simulate_a_long_wait_to_start_a_job(
|
||||
frozen_time,
|
||||
wait_time,
|
||||
side_effect,
|
||||
has_finished,
|
||||
mock_proxy_waiting_time,
|
||||
):
|
||||
start_time = datetime.now()
|
||||
@@ -209,7 +228,7 @@ def test_simulate_a_long_wait_to_start_a_job(
|
||||
end_time = datetime.now()
|
||||
delta_time = end_time - start_time
|
||||
|
||||
assert has_finished == (job.status == "pass")
|
||||
assert job.status == "pass"
|
||||
assert delta_time.total_seconds() >= wait_time
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user