ci/lava: Follow job execution via LogFollower
Now LogFollower is used to deal with the LAVA logs. Moreover, this commit adds timeouts per Gitlab section, if a section takes longer than expected, cancel the job and retry again. Signed-off-by: Guilherme Gallo <guilherme.gallo@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16323>
This commit is contained in:

committed by
Marge Bot

parent
2569d7d7df
commit
aa26a6ab72
@@ -1,10 +1,23 @@
|
||||
from contextlib import nullcontext as does_not_raise
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import datetime
|
||||
from itertools import cycle
|
||||
from typing import Callable, Generator, Iterable, Tuple, Union
|
||||
|
||||
import yaml
|
||||
from freezegun import freeze_time
|
||||
from lava.utils.lava_log import (
|
||||
DEFAULT_GITLAB_SECTION_TIMEOUTS,
|
||||
FALLBACK_GITLAB_SECTION_TIMEOUT,
|
||||
LogSectionType,
|
||||
)
|
||||
|
||||
|
||||
def section_timeout(section_type: LogSectionType) -> int:
|
||||
return int(
|
||||
DEFAULT_GITLAB_SECTION_TIMEOUTS.get(
|
||||
section_type, FALLBACK_GITLAB_SECTION_TIMEOUT
|
||||
).total_seconds()
|
||||
)
|
||||
|
||||
|
||||
def create_lava_yaml_msg(
|
||||
@@ -21,8 +34,6 @@ def generate_testsuite_result(
|
||||
if extra is None:
|
||||
extra = {}
|
||||
return {"metadata": {"result": result, **metadata_extra}, "name": name}
|
||||
|
||||
|
||||
def jobs_logs_response(
|
||||
finished=False, msg=None, lvl="target", result=None
|
||||
) -> Tuple[bool, str]:
|
||||
@@ -36,6 +47,19 @@ def jobs_logs_response(
|
||||
return finished, yaml.safe_dump(logs)
|
||||
|
||||
|
||||
def section_aware_message_generator(
|
||||
messages: dict[LogSectionType, Iterable[int]]
|
||||
) -> Iterable[tuple[dict, Iterable[int]]]:
|
||||
default = [1]
|
||||
for section_type in LogSectionType:
|
||||
delay = messages.get(section_type, default)
|
||||
yield mock_lava_signal(section_type), delay
|
||||
|
||||
|
||||
def message_generator():
|
||||
for section_type in LogSectionType:
|
||||
yield mock_lava_signal(section_type)
|
||||
|
||||
|
||||
def level_generator():
|
||||
# Tests all known levels by default
|
||||
@@ -80,3 +104,28 @@ def to_iterable(tick_fn):
|
||||
else:
|
||||
tick_gen = cycle((tick_fn,))
|
||||
return tick_gen
|
||||
|
||||
|
||||
def mock_logs(messages={}, result="pass"):
|
||||
with freeze_time(datetime.now()) as time_travel:
|
||||
# Simulate a complete run given by message_fn
|
||||
for msg, tick_list in section_aware_message_generator(messages):
|
||||
for tick_sec in tick_list:
|
||||
yield jobs_logs_response(finished=False, msg=[msg])
|
||||
time_travel.tick(tick_sec)
|
||||
|
||||
yield jobs_logs_response(finished=True, result="pass")
|
||||
|
||||
|
||||
def mock_lava_signal(type: LogSectionType) -> dict[str, str]:
|
||||
return {
|
||||
LogSectionType.TEST_CASE: create_lava_yaml_msg(
|
||||
msg="<STARTTC> case", lvl="debug"
|
||||
),
|
||||
LogSectionType.TEST_SUITE: create_lava_yaml_msg(
|
||||
msg="<STARTRUN> suite", lvl="debug"
|
||||
),
|
||||
LogSectionType.LAVA_POST_PROCESSING: create_lava_yaml_msg(
|
||||
msg="<LAVA_SIGNAL_ENDTC case>", lvl="target"
|
||||
),
|
||||
}.get(type, create_lava_yaml_msg())
|
||||
|
@@ -36,12 +36,15 @@ from lava.lava_job_submitter import (
|
||||
follow_job_execution,
|
||||
retriable_follow_job,
|
||||
)
|
||||
from lava.utils.lava_log import LogSectionType
|
||||
|
||||
from .lava.helpers import (
|
||||
create_lava_yaml_msg,
|
||||
generate_n_logs,
|
||||
generate_testsuite_result,
|
||||
jobs_logs_response,
|
||||
mock_logs,
|
||||
section_timeout,
|
||||
)
|
||||
|
||||
NUMBER_OF_MAX_ATTEMPTS = NUMBER_OF_RETRIES_TIMEOUT_DETECTION + 1
|
||||
@@ -74,17 +77,43 @@ XMLRPC_FAULT = xmlrpc.client.Fault(0, "test")
|
||||
|
||||
PROXY_SCENARIOS = {
|
||||
"finish case": (generate_n_logs(1), does_not_raise(), True, {}),
|
||||
"works at last retry": (
|
||||
generate_n_logs(n=NUMBER_OF_MAX_ATTEMPTS, tick_fn=[ DEVICE_HANGING_TIMEOUT_SEC + 1 ] * NUMBER_OF_RETRIES_TIMEOUT_DETECTION + [1]),
|
||||
"boot works at last retry": (
|
||||
mock_logs(
|
||||
{
|
||||
LogSectionType.LAVA_BOOT: [
|
||||
section_timeout(LogSectionType.LAVA_BOOT) + 1
|
||||
]
|
||||
* NUMBER_OF_RETRIES_TIMEOUT_DETECTION
|
||||
+ [1]
|
||||
},
|
||||
),
|
||||
does_not_raise(),
|
||||
True,
|
||||
{},
|
||||
),
|
||||
"timed out more times than retry attempts": (
|
||||
generate_n_logs(
|
||||
n=NUMBER_OF_MAX_ATTEMPTS + 1, tick_fn=DEVICE_HANGING_TIMEOUT_SEC + 1
|
||||
"post process test case took too long": pytest.param(
|
||||
mock_logs(
|
||||
{
|
||||
LogSectionType.LAVA_POST_PROCESSING: [
|
||||
section_timeout(LogSectionType.LAVA_POST_PROCESSING) + 1
|
||||
]
|
||||
* (NUMBER_OF_MAX_ATTEMPTS + 1)
|
||||
},
|
||||
),
|
||||
pytest.raises(MesaCIRetryError),
|
||||
True,
|
||||
{},
|
||||
marks=pytest.mark.xfail(
|
||||
reason=(
|
||||
"The time travel mock is not behaving as expected. "
|
||||
"It makes a gitlab section end in the past when an "
|
||||
"exception happens."
|
||||
)
|
||||
),
|
||||
),
|
||||
"timed out more times than retry attempts": (
|
||||
generate_n_logs(n=4, tick_fn=9999999),
|
||||
pytest.raises(MesaCIRetryError),
|
||||
False,
|
||||
{},
|
||||
),
|
||||
@@ -150,15 +179,20 @@ PROXY_SCENARIOS = {
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"side_effect, expectation, job_result, proxy_args",
|
||||
"test_log, expectation, job_result, proxy_args",
|
||||
PROXY_SCENARIOS.values(),
|
||||
ids=PROXY_SCENARIOS.keys(),
|
||||
)
|
||||
def test_retriable_follow_job(
|
||||
mock_sleep, side_effect, expectation, job_result, proxy_args, mock_proxy
|
||||
mock_sleep,
|
||||
test_log,
|
||||
expectation,
|
||||
job_result,
|
||||
proxy_args,
|
||||
mock_proxy,
|
||||
):
|
||||
with expectation:
|
||||
proxy = mock_proxy(side_effect=side_effect, **proxy_args)
|
||||
proxy = mock_proxy(side_effect=test_log, **proxy_args)
|
||||
job: LAVAJob = retriable_follow_job(proxy, "")
|
||||
assert job_result == (job.status == "pass")
|
||||
|
||||
@@ -196,6 +230,7 @@ def test_simulate_a_long_wait_to_start_a_job(
|
||||
assert delta_time.total_seconds() >= wait_time
|
||||
|
||||
|
||||
|
||||
CORRUPTED_LOG_SCENARIOS = {
|
||||
"too much subsequent corrupted data": (
|
||||
[(False, "{'msg': 'Incomplete}")] * 100 + [jobs_logs_response(True)],
|
||||
|
@@ -22,13 +22,15 @@
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
from lava.exceptions import MesaCITimeoutError
|
||||
from lava.utils.lava_log import (
|
||||
GitlabSection,
|
||||
LogFollower,
|
||||
LogSectionType,
|
||||
filter_debug_messages,
|
||||
fix_lava_color_log,
|
||||
fix_lava_gitlab_section_log,
|
||||
@@ -66,8 +68,14 @@ GITLAB_SECTION_SCENARIOS = {
|
||||
ids=GITLAB_SECTION_SCENARIOS.keys(),
|
||||
)
|
||||
def test_gitlab_section(method, collapsed, expectation):
|
||||
gs = GitlabSection(id="my_first_section", header="my_header", start_collapsed=collapsed)
|
||||
gs.get_timestamp = lambda: "mock_date"
|
||||
gs = GitlabSection(
|
||||
id="my_first_section",
|
||||
header="my_header",
|
||||
type=LogSectionType.TEST_CASE,
|
||||
start_collapsed=collapsed,
|
||||
)
|
||||
gs.get_timestamp = lambda x: "mock_date"
|
||||
gs.start()
|
||||
result = getattr(gs, method)()
|
||||
assert result == expectation
|
||||
|
||||
@@ -274,3 +282,49 @@ LAVA_DEBUG_SPAM_MESSAGES = {
|
||||
)
|
||||
def test_filter_debug_messages(message, expectation):
|
||||
assert filter_debug_messages(message) == expectation
|
||||
|
||||
|
||||
WATCHDOG_SCENARIOS = {
|
||||
"1 second before timeout": ({"seconds": -1}, does_not_raise()),
|
||||
"1 second after timeout": ({"seconds": 1}, pytest.raises(MesaCITimeoutError)),
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"timedelta_kwargs, exception",
|
||||
WATCHDOG_SCENARIOS.values(),
|
||||
ids=WATCHDOG_SCENARIOS.keys(),
|
||||
)
|
||||
def test_log_follower_watchdog(frozen_time, timedelta_kwargs, exception):
|
||||
lines = [
|
||||
{
|
||||
"dt": datetime.now(),
|
||||
"lvl": "debug",
|
||||
"msg": "Received signal: <STARTTC> mesa-ci_iris-kbl-traces",
|
||||
},
|
||||
]
|
||||
td = {LogSectionType.TEST_CASE: timedelta(minutes=1)}
|
||||
lf = LogFollower(timeout_durations=td)
|
||||
lf.feed(lines)
|
||||
frozen_time.tick(
|
||||
lf.timeout_durations[LogSectionType.TEST_CASE] + timedelta(**timedelta_kwargs)
|
||||
)
|
||||
lines = [create_lava_yaml_msg()]
|
||||
with exception:
|
||||
lf.feed(lines)
|
||||
|
||||
|
||||
GITLAB_SECTION_ID_SCENARIOS = [
|
||||
("a-good_name", "a-good_name"),
|
||||
("spaces are not welcome", "spaces-are-not-welcome"),
|
||||
("abc:amd64 1/3", "abc-amd64-1-3"),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("case_name, expected_id", GITLAB_SECTION_ID_SCENARIOS)
|
||||
def test_gitlab_section_id(case_name, expected_id):
|
||||
gl = GitlabSection(
|
||||
id=case_name, header=case_name, type=LogSectionType.LAVA_POST_PROCESSING
|
||||
)
|
||||
|
||||
assert gl.id == expected_id
|
||||
|
Reference in New Issue
Block a user