diff --git a/.gitlab-ci/lava/lava-submit.sh b/.gitlab-ci/lava/lava-submit.sh index 5f4c8432d6e..db44a8e8ad3 100755 --- a/.gitlab-ci/lava/lava-submit.sh +++ b/.gitlab-ci/lava/lava-submit.sh @@ -66,4 +66,5 @@ PYTHONPATH=artifacts/ artifacts/lava/lava_job_submitter.py \ --ssh-client-image "${LAVA_SSH_CLIENT_IMAGE}" \ --project-name "${CI_PROJECT_NAME}" \ --starting-section "${CURRENT_SECTION}" \ + --job-submitted-at "${CI_JOB_STARTED_AT}" \ >> results/lava.log diff --git a/.gitlab-ci/lava/lava_job_submitter.py b/.gitlab-ci/lava/lava_job_submitter.py index 2e9cf77ef13..5e6c09952bf 100755 --- a/.gitlab-ci/lava/lava_job_submitter.py +++ b/.gitlab-ci/lava/lava_job_submitter.py @@ -136,13 +136,16 @@ def raise_lava_error(job) -> None: job.status = "fail" -def show_final_job_data(job, colour=f"{CONSOLE_LOG['FG_GREEN']}"): +def show_final_job_data( + job, colour=f"{CONSOLE_LOG['FG_GREEN']}", timestamp_relative_to=None +): with GitlabSection( "job_data", "LAVA job info", type=LogSectionType.LAVA_POST_PROCESSING, start_collapsed=True, colour=colour, + timestamp_relative_to=timestamp_relative_to, ): wait_post_processing_retries: int = WAIT_FOR_LAVA_POST_PROCESSING_RETRIES while not job.is_post_processed() and wait_post_processing_retries > 0: @@ -252,16 +255,21 @@ def wait_for_job_get_started(job, attempt_no): print_log(f"Job {job.job_id} started.") -def bootstrap_log_follower(main_test_case) -> LogFollower: +def bootstrap_log_follower(main_test_case, timestamp_relative_to) -> LogFollower: start_section = GitlabSection( id="dut_boot", header="Booting hardware device", type=LogSectionType.LAVA_BOOT, start_collapsed=True, suppress_end=True, # init-stage2 prints the end for us + timestamp_relative_to=timestamp_relative_to, ) print(start_section.start()) - return LogFollower(starting_section=start_section, main_test_case=main_test_case) + return LogFollower( + starting_section=start_section, + main_test_case=main_test_case, + timestamp_relative_to=timestamp_relative_to + ) def follow_job_execution(job, log_follower): @@ -294,7 +302,7 @@ def structural_log_phases(job, log_follower): job.log["dut_job_phases"] = phases -def print_job_final_status(job): +def print_job_final_status(job, timestamp_relative_to): if job.status == "running": job.status = "hung" @@ -306,11 +314,15 @@ def print_job_final_status(job): ) job.refresh_log() - show_final_job_data(job, colour=f"{CONSOLE_LOG['FG_BOLD_RED']}") + show_final_job_data( + job, colour=f"{CONSOLE_LOG['FG_BOLD_RED']}", + timestamp_relative_to=timestamp_relative_to + ) def execute_job_with_retries( - proxy, job_definition, retry_count, jobs_log, main_test_case + proxy, job_definition, retry_count, jobs_log, main_test_case, + timestamp_relative_to ) -> Optional[LAVAJob]: last_failed_job = None for attempt_no in range(1, retry_count + 2): @@ -328,10 +340,13 @@ def execute_job_with_retries( header="Waiting for hardware device to become available", type=LogSectionType.LAVA_QUEUE, start_collapsed=False, + timestamp_relative_to=timestamp_relative_to ) with queue_section as section: wait_for_job_get_started(job, attempt_no) - log_follower: LogFollower = bootstrap_log_follower(main_test_case) + log_follower: LogFollower = bootstrap_log_follower( + main_test_case, timestamp_relative_to + ) follow_job_execution(job, log_follower) return job @@ -339,7 +354,7 @@ def execute_job_with_retries( job.handle_exception(exception) finally: - print_job_final_status(job) + print_job_final_status(job, timestamp_relative_to) # If LAVA takes too long to post process the job, the submitter # gives up and proceeds. job_log["submitter_end_time"] = datetime.now(tz=UTC).isoformat() @@ -355,12 +370,14 @@ def execute_job_with_retries( return last_failed_job -def retriable_follow_job(proxy, job_definition, main_test_case) -> LAVAJob: +def retriable_follow_job( + proxy, job_definition, main_test_case, timestamp_relative_to +) -> LAVAJob: number_of_retries = NUMBER_OF_RETRIES_TIMEOUT_DETECTION last_attempted_job = execute_job_with_retries( proxy, job_definition, number_of_retries, STRUCTURAL_LOG["dut_jobs"], - main_test_case + main_test_case, timestamp_relative_to ) if last_attempted_job.exception is not None: @@ -417,6 +434,7 @@ class LAVAJobSubmitter(PathResolver): ssh_client_image: str = None # x86_64 SSH client image to follow the job's output project_name: str = None # Project name to be used in the job name starting_section: str = None # GitLab section used to start + job_submitted_at: [str | datetime] = None __structured_log_context = contextlib.nullcontext() # Structured Logger context def __post_init__(self) -> None: @@ -427,6 +445,8 @@ class LAVAJobSubmitter(PathResolver): if not self.structured_log_file: return + if self.job_submitted_at: + self.job_submitted_at = datetime.fromisoformat(self.job_submitted_at) self.__structured_log_context = StructuredLoggerWrapper(self).logger_context() self.proxy = setup_lava_proxy() @@ -482,7 +502,8 @@ class LAVAJobSubmitter(PathResolver): id=self.starting_section, header="Preparing to submit job for scheduling", type=LogSectionType.LAVA_SUBMIT, - start_collapsed=True + start_collapsed=True, + timestamp_relative_to=self.job_submitted_at, ) gl.start() print(gl.end()) @@ -492,7 +513,8 @@ class LAVAJobSubmitter(PathResolver): try: last_attempt_job = retriable_follow_job( self.proxy, job_definition, - f'{self.project_name}_{self.mesa_job_name}') + f'{self.project_name}_{self.mesa_job_name}', + self.job_submitted_at) except MesaCIRetryError as retry_exception: last_attempt_job = retry_exception.last_job diff --git a/.gitlab-ci/lava/utils/gitlab_section.py b/.gitlab-ci/lava/utils/gitlab_section.py index 009be7d8dca..1e9894fc857 100644 --- a/.gitlab-ci/lava/utils/gitlab_section.py +++ b/.gitlab-ci/lava/utils/gitlab_section.py @@ -3,6 +3,7 @@ from __future__ import annotations import re from dataclasses import dataclass, field from datetime import datetime, timedelta, UTC +from math import floor from typing import TYPE_CHECKING, Optional from lava.utils.console_format import CONSOLE_LOG @@ -20,6 +21,7 @@ class GitlabSection: start_collapsed: bool = False suppress_end: bool = False suppress_start: bool = False + timestamp_relative_to: Optional[datetime] = None escape: str = "\x1b[0K" colour: str = f"{CONSOLE_LOG['FG_CYAN']}" __start_time: Optional[datetime] = field(default=None, init=False) @@ -60,7 +62,12 @@ class GitlabSection: timestamp = self.get_timestamp(time) before_header = ":".join([preamble, timestamp, section_id]) - colored_header = f"{self.colour}{header}\x1b[0m" if header else "" + if self.timestamp_relative_to: + delta = self.start_time - self.timestamp_relative_to + reltime = f"[{floor(delta.seconds / 60):02}:{(delta.seconds % 60):02}] " + else: + reltime = "" + colored_header = f"{self.colour}{reltime}{header}\x1b[0m" if header else "" header_wrapper = "\r" + f"{self.escape}{colored_header}" return f"{before_header}{header_wrapper}" diff --git a/.gitlab-ci/lava/utils/log_follower.py b/.gitlab-ci/lava/utils/log_follower.py index 4ba8f844cb3..d76b70203e8 100644 --- a/.gitlab-ci/lava/utils/log_follower.py +++ b/.gitlab-ci/lava/utils/log_follower.py @@ -34,6 +34,7 @@ from lava.utils.log_section import ( class LogFollower: starting_section: Optional[GitlabSection] = None main_test_case: Optional[str] = None + timestamp_relative_to: Optional[datetime] = None _current_section: Optional[GitlabSection] = None section_history: list[GitlabSection] = field(default_factory=list, init=False) timeout_durations: dict[LogSectionType, timedelta] = field( @@ -124,7 +125,7 @@ class LogFollower: for log_section in LOG_SECTIONS: if new_section := log_section.from_log_line_to_section( - line, self.main_test_case + line, self.main_test_case, self.timestamp_relative_to ): self.update_section(new_section) break diff --git a/.gitlab-ci/lava/utils/log_section.py b/.gitlab-ci/lava/utils/log_section.py index 8a4f5e6d4a0..d8cf265cef7 100644 --- a/.gitlab-ci/lava/utils/log_section.py +++ b/.gitlab-ci/lava/utils/log_section.py @@ -1,6 +1,6 @@ import re from dataclasses import dataclass -from datetime import timedelta +from datetime import datetime, timedelta from enum import Enum, auto from os import getenv from typing import Optional, Pattern, Union @@ -75,7 +75,8 @@ class LogSection: collapsed: bool = False def from_log_line_to_section( - self, lava_log_line: dict[str, str], main_test_case: Optional[str] + self, lava_log_line: dict[str, str], main_test_case: Optional[str], + timestamp_relative_to: Optional[datetime] ) -> Optional[GitlabSection]: if lava_log_line["lvl"] not in self.levels: return @@ -92,6 +93,7 @@ class LogSection: start_collapsed=self.collapsed, suppress_start=is_main_test_case, suppress_end=is_main_test_case, + timestamp_relative_to=timestamp_relative_to, ) diff --git a/.gitlab-ci/tests/test_lava_job_submitter.py b/.gitlab-ci/tests/test_lava_job_submitter.py index 8ed8649800b..d9cc7d8cc98 100644 --- a/.gitlab-ci/tests/test_lava_job_submitter.py +++ b/.gitlab-ci/tests/test_lava_job_submitter.py @@ -86,7 +86,7 @@ def test_submit_and_follow_respects_exceptions(mock_sleep, mock_proxy, exception with pytest.raises(MesaCIException): proxy = mock_proxy(side_effect=exception) job = LAVAJob(proxy, "") - log_follower = bootstrap_log_follower(main_test_case="") + log_follower = bootstrap_log_follower(main_test_case="", timestamp_relative_to=None) follow_job_execution(job, log_follower) @@ -242,7 +242,7 @@ def test_retriable_follow_job( ): with expectation: proxy = mock_proxy(side_effect=test_log, **proxy_args) - job: LAVAJob = retriable_follow_job(proxy, "", "") + job: LAVAJob = retriable_follow_job(proxy, "", "", None) assert job_result == job.status assert exit_code == job.exit_code @@ -269,6 +269,7 @@ def test_simulate_a_long_wait_to_start_a_job( ), "", "", + None ) end_time = datetime.now() @@ -323,7 +324,7 @@ def test_log_corruption(mock_sleep, data_sequence, expected_exception, mock_prox proxy_logs_mock = proxy_mock.scheduler.jobs.logs proxy_logs_mock.side_effect = data_sequence with expected_exception: - retriable_follow_job(proxy_mock, "", "") + retriable_follow_job(proxy_mock, "", "", None) LAVA_RESULT_LOG_SCENARIOS = { @@ -440,7 +441,7 @@ def test_full_yaml_log(mock_proxy, frozen_time, lava_job_submitter): try: time_travel_to_test_time() start_time = datetime.now() - retriable_follow_job(proxy, "", "") + retriable_follow_job(proxy, "", "", None) finally: try: # If the job fails, maybe there will be no structured log