ci/lava: Follow job execution via LogFollower

Now LogFollower is used to deal with the LAVA logs.

Moreover, this commit adds timeouts per Gitlab section, if a section
takes longer than expected, cancel the job and retry again.

Signed-off-by: Guilherme Gallo <guilherme.gallo@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16323>
This commit is contained in:
Guilherme Gallo
2022-04-04 11:26:17 -03:00
committed by Marge Bot
parent 2569d7d7df
commit aa26a6ab72
5 changed files with 300 additions and 62 deletions

View File

@@ -1,10 +1,23 @@
from contextlib import nullcontext as does_not_raise
from datetime import datetime, timedelta
from datetime import datetime
from itertools import cycle
from typing import Callable, Generator, Iterable, Tuple, Union
import yaml
from freezegun import freeze_time
from lava.utils.lava_log import (
DEFAULT_GITLAB_SECTION_TIMEOUTS,
FALLBACK_GITLAB_SECTION_TIMEOUT,
LogSectionType,
)
def section_timeout(section_type: LogSectionType) -> int:
return int(
DEFAULT_GITLAB_SECTION_TIMEOUTS.get(
section_type, FALLBACK_GITLAB_SECTION_TIMEOUT
).total_seconds()
)
def create_lava_yaml_msg(
@@ -21,8 +34,6 @@ def generate_testsuite_result(
if extra is None:
extra = {}
return {"metadata": {"result": result, **metadata_extra}, "name": name}
def jobs_logs_response(
finished=False, msg=None, lvl="target", result=None
) -> Tuple[bool, str]:
@@ -36,6 +47,19 @@ def jobs_logs_response(
return finished, yaml.safe_dump(logs)
def section_aware_message_generator(
messages: dict[LogSectionType, Iterable[int]]
) -> Iterable[tuple[dict, Iterable[int]]]:
default = [1]
for section_type in LogSectionType:
delay = messages.get(section_type, default)
yield mock_lava_signal(section_type), delay
def message_generator():
for section_type in LogSectionType:
yield mock_lava_signal(section_type)
def level_generator():
# Tests all known levels by default
@@ -80,3 +104,28 @@ def to_iterable(tick_fn):
else:
tick_gen = cycle((tick_fn,))
return tick_gen
def mock_logs(messages={}, result="pass"):
with freeze_time(datetime.now()) as time_travel:
# Simulate a complete run given by message_fn
for msg, tick_list in section_aware_message_generator(messages):
for tick_sec in tick_list:
yield jobs_logs_response(finished=False, msg=[msg])
time_travel.tick(tick_sec)
yield jobs_logs_response(finished=True, result="pass")
def mock_lava_signal(type: LogSectionType) -> dict[str, str]:
return {
LogSectionType.TEST_CASE: create_lava_yaml_msg(
msg="<STARTTC> case", lvl="debug"
),
LogSectionType.TEST_SUITE: create_lava_yaml_msg(
msg="<STARTRUN> suite", lvl="debug"
),
LogSectionType.LAVA_POST_PROCESSING: create_lava_yaml_msg(
msg="<LAVA_SIGNAL_ENDTC case>", lvl="target"
),
}.get(type, create_lava_yaml_msg())

View File

@@ -36,12 +36,15 @@ from lava.lava_job_submitter import (
follow_job_execution,
retriable_follow_job,
)
from lava.utils.lava_log import LogSectionType
from .lava.helpers import (
create_lava_yaml_msg,
generate_n_logs,
generate_testsuite_result,
jobs_logs_response,
mock_logs,
section_timeout,
)
NUMBER_OF_MAX_ATTEMPTS = NUMBER_OF_RETRIES_TIMEOUT_DETECTION + 1
@@ -74,17 +77,43 @@ XMLRPC_FAULT = xmlrpc.client.Fault(0, "test")
PROXY_SCENARIOS = {
"finish case": (generate_n_logs(1), does_not_raise(), True, {}),
"works at last retry": (
generate_n_logs(n=NUMBER_OF_MAX_ATTEMPTS, tick_fn=[ DEVICE_HANGING_TIMEOUT_SEC + 1 ] * NUMBER_OF_RETRIES_TIMEOUT_DETECTION + [1]),
"boot works at last retry": (
mock_logs(
{
LogSectionType.LAVA_BOOT: [
section_timeout(LogSectionType.LAVA_BOOT) + 1
]
* NUMBER_OF_RETRIES_TIMEOUT_DETECTION
+ [1]
},
),
does_not_raise(),
True,
{},
),
"timed out more times than retry attempts": (
generate_n_logs(
n=NUMBER_OF_MAX_ATTEMPTS + 1, tick_fn=DEVICE_HANGING_TIMEOUT_SEC + 1
"post process test case took too long": pytest.param(
mock_logs(
{
LogSectionType.LAVA_POST_PROCESSING: [
section_timeout(LogSectionType.LAVA_POST_PROCESSING) + 1
]
* (NUMBER_OF_MAX_ATTEMPTS + 1)
},
),
pytest.raises(MesaCIRetryError),
True,
{},
marks=pytest.mark.xfail(
reason=(
"The time travel mock is not behaving as expected. "
"It makes a gitlab section end in the past when an "
"exception happens."
)
),
),
"timed out more times than retry attempts": (
generate_n_logs(n=4, tick_fn=9999999),
pytest.raises(MesaCIRetryError),
False,
{},
),
@@ -150,15 +179,20 @@ PROXY_SCENARIOS = {
@pytest.mark.parametrize(
"side_effect, expectation, job_result, proxy_args",
"test_log, expectation, job_result, proxy_args",
PROXY_SCENARIOS.values(),
ids=PROXY_SCENARIOS.keys(),
)
def test_retriable_follow_job(
mock_sleep, side_effect, expectation, job_result, proxy_args, mock_proxy
mock_sleep,
test_log,
expectation,
job_result,
proxy_args,
mock_proxy,
):
with expectation:
proxy = mock_proxy(side_effect=side_effect, **proxy_args)
proxy = mock_proxy(side_effect=test_log, **proxy_args)
job: LAVAJob = retriable_follow_job(proxy, "")
assert job_result == (job.status == "pass")
@@ -196,6 +230,7 @@ def test_simulate_a_long_wait_to_start_a_job(
assert delta_time.total_seconds() >= wait_time
CORRUPTED_LOG_SCENARIOS = {
"too much subsequent corrupted data": (
[(False, "{'msg': 'Incomplete}")] * 100 + [jobs_logs_response(True)],

View File

@@ -22,13 +22,15 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
from datetime import datetime
from datetime import datetime, timedelta
import pytest
import yaml
from lava.exceptions import MesaCITimeoutError
from lava.utils.lava_log import (
GitlabSection,
LogFollower,
LogSectionType,
filter_debug_messages,
fix_lava_color_log,
fix_lava_gitlab_section_log,
@@ -66,8 +68,14 @@ GITLAB_SECTION_SCENARIOS = {
ids=GITLAB_SECTION_SCENARIOS.keys(),
)
def test_gitlab_section(method, collapsed, expectation):
gs = GitlabSection(id="my_first_section", header="my_header", start_collapsed=collapsed)
gs.get_timestamp = lambda: "mock_date"
gs = GitlabSection(
id="my_first_section",
header="my_header",
type=LogSectionType.TEST_CASE,
start_collapsed=collapsed,
)
gs.get_timestamp = lambda x: "mock_date"
gs.start()
result = getattr(gs, method)()
assert result == expectation
@@ -274,3 +282,49 @@ LAVA_DEBUG_SPAM_MESSAGES = {
)
def test_filter_debug_messages(message, expectation):
assert filter_debug_messages(message) == expectation
WATCHDOG_SCENARIOS = {
"1 second before timeout": ({"seconds": -1}, does_not_raise()),
"1 second after timeout": ({"seconds": 1}, pytest.raises(MesaCITimeoutError)),
}
@pytest.mark.parametrize(
"timedelta_kwargs, exception",
WATCHDOG_SCENARIOS.values(),
ids=WATCHDOG_SCENARIOS.keys(),
)
def test_log_follower_watchdog(frozen_time, timedelta_kwargs, exception):
lines = [
{
"dt": datetime.now(),
"lvl": "debug",
"msg": "Received signal: <STARTTC> mesa-ci_iris-kbl-traces",
},
]
td = {LogSectionType.TEST_CASE: timedelta(minutes=1)}
lf = LogFollower(timeout_durations=td)
lf.feed(lines)
frozen_time.tick(
lf.timeout_durations[LogSectionType.TEST_CASE] + timedelta(**timedelta_kwargs)
)
lines = [create_lava_yaml_msg()]
with exception:
lf.feed(lines)
GITLAB_SECTION_ID_SCENARIOS = [
("a-good_name", "a-good_name"),
("spaces are not welcome", "spaces-are-not-welcome"),
("abc:amd64 1/3", "abc-amd64-1-3"),
]
@pytest.mark.parametrize("case_name, expected_id", GITLAB_SECTION_ID_SCENARIOS)
def test_gitlab_section_id(case_name, expected_id):
gl = GitlabSection(
id=case_name, header=case_name, type=LogSectionType.LAVA_POST_PROCESSING
)
assert gl.id == expected_id