diff --git a/.gitlab-ci/lava/lava_job_submitter.py b/.gitlab-ci/lava/lava_job_submitter.py index 77bbe993fed..07dbb34aa48 100755 --- a/.gitlab-ci/lava/lava_job_submitter.py +++ b/.gitlab-ci/lava/lava_job_submitter.py @@ -17,7 +17,6 @@ import time from collections import defaultdict from dataclasses import dataclass, fields from datetime import datetime, timedelta -from io import StringIO from os import environ, getenv, path from typing import Any, Optional @@ -34,11 +33,11 @@ from lava.utils import ( CONSOLE_LOG, GitlabSection, LAVAJob, + LAVAJobDefinition, LogFollower, LogSectionType, call_proxy, fatal_err, - generate_lava_job_definition, hide_sensitive_data, print_log, setup_lava_proxy, @@ -404,7 +403,7 @@ class LAVAJobSubmitter(PathResolver): minutes=self.job_timeout_min ) - job_definition = generate_lava_job_definition(self) + job_definition = LAVAJobDefinition(self).generate_lava_job_definition() if self.dump_yaml: self.dump_job_definition(job_definition) diff --git a/.gitlab-ci/lava/utils/__init__.py b/.gitlab-ci/lava/utils/__init__.py index 349d2b32561..e2388bb6808 100644 --- a/.gitlab-ci/lava/utils/__init__.py +++ b/.gitlab-ci/lava/utils/__init__.py @@ -1,7 +1,7 @@ from .console_format import CONSOLE_LOG from .gitlab_section import GitlabSection from .lava_job import LAVAJob -from .lava_job_definition import generate_lava_job_definition +from .lava_job_definition import LAVAJobDefinition from .lava_proxy import call_proxy, setup_lava_proxy from .log_follower import ( LogFollower, diff --git a/.gitlab-ci/lava/utils/lava_job_definition.py b/.gitlab-ci/lava/utils/lava_job_definition.py index 37f01141a50..7e4e4c1556c 100644 --- a/.gitlab-ci/lava/utils/lava_job_definition.py +++ b/.gitlab-ci/lava/utils/lava_job_definition.py @@ -3,10 +3,11 @@ from io import StringIO from os import getenv from typing import TYPE_CHECKING, Any -from lava.utils.lava_farm import LavaFarm, get_lava_farm from ruamel.yaml import YAML from ruamel.yaml.scalarstring import LiteralScalarString +from lava.utils.lava_farm import LavaFarm, get_lava_farm + if TYPE_CHECKING: from lava.lava_job_submitter import LAVAJobSubmitter @@ -19,50 +20,6 @@ NUMBER_OF_ATTEMPTS_LAVA_BOOT = int(getenv("LAVA_NUMBER_OF_ATTEMPTS_LAVA_BOOT", 3 JOB_PRIORITY = int(getenv("JOB_PRIORITY", 75)) -def has_ssh_support(job_submitter: "LAVAJobSubmitter") -> bool: - force_uart = bool(getenv("LAVA_FORCE_UART", False)) - - if force_uart: - return False - - # Only Collabora's farm supports to run docker container as a LAVA actions, - # which is required to follow the job in a SSH section - current_farm = get_lava_farm() - - # SSH job definition still needs to add support for fastboot. - job_uses_fastboot: bool = job_submitter.boot_method == "fastboot" - - return current_farm == LavaFarm.COLLABORA and not job_uses_fastboot - - -def generate_lava_yaml_payload(job_submitter: "LAVAJobSubmitter") -> dict[str, Any]: - """ - Bridge function to use the supported job definition depending on some Mesa - CI job characteristics. - - The strategy here, is to use LAVA with a containerized SSH session to follow - the job output, escaping from dumping data to the UART, which proves to be - error prone in some devices. - """ - from lava.utils.ssh_job_definition import \ - generate_lava_yaml_payload as ssh_lava_yaml - from lava.utils.uart_job_definition import \ - generate_lava_yaml_payload as uart_lava_yaml - - if has_ssh_support(job_submitter): - return ssh_lava_yaml(job_submitter) - - return uart_lava_yaml(job_submitter) - - -def generate_lava_job_definition(job_submitter: "LAVAJobSubmitter") -> str: - job_stream = StringIO() - yaml = YAML() - yaml.width = 4096 - yaml.dump(generate_lava_yaml_payload(job_submitter), job_stream) - return job_stream.getvalue() - - def to_yaml_block(steps_array: list[str], escape_vars=[]) -> LiteralScalarString: def escape_envvar(match): return "\\" + match.group(0) @@ -76,73 +33,127 @@ def to_yaml_block(steps_array: list[str], escape_vars=[]) -> LiteralScalarString return LiteralScalarString(final_str) -def generate_metadata(args) -> dict[str, Any]: - # General metadata and permissions - values = { - "job_name": f"{args.project_name}: {args.pipeline_info}", - "device_type": args.device_type, - "visibility": {"group": [args.visibility_group]}, - "priority": JOB_PRIORITY, - "context": { - "extra_nfsroot_args": " init=/init rootwait usbcore.quirks=0bda:8153:k" - }, - "timeouts": { - "job": {"minutes": args.job_timeout_min}, - "actions": { - "depthcharge-retry": { - # Could take between 1 and 1.5 min in slower boots - "minutes": 4 - }, - "depthcharge-start": { - # Should take less than 1 min. - "minutes": 1, - }, - "depthcharge-action": { - # This timeout englobes the entire depthcharge timing, - # including retries - "minutes": 5 - * NUMBER_OF_ATTEMPTS_LAVA_BOOT, +class LAVAJobDefinition: + """ + This class is responsible for generating the YAML payload to submit a LAVA + job. + """ + + def __init__(self, job_submitter: "LAVAJobSubmitter") -> None: + self.job_submitter: "LAVAJobSubmitter" = job_submitter + + def has_ssh_support(self) -> bool: + force_uart = bool(getenv("LAVA_FORCE_UART", False)) + + if force_uart: + return False + + # Only Collabora's farm supports to run docker container as a LAVA actions, + # which is required to follow the job in a SSH section + current_farm = get_lava_farm() + + # SSH job definition still needs to add support for fastboot. + job_uses_fastboot: bool = self.job_submitter.boot_method == "fastboot" + + return current_farm == LavaFarm.COLLABORA and not job_uses_fastboot + + def generate_lava_yaml_payload(self) -> dict[str, Any]: + """ + Bridge function to use the supported job definition depending on some Mesa + CI job characteristics. + + The strategy here, is to use LAVA with a containerized SSH session to follow + the job output, escaping from dumping data to the UART, which proves to be + error prone in some devices. + """ + from lava.utils.ssh_job_definition import generate_lava_yaml_payload as ssh_lava_yaml + from lava.utils.uart_job_definition import generate_lava_yaml_payload as uart_lava_yaml + + if self.has_ssh_support(): + return ssh_lava_yaml(self) + + return uart_lava_yaml(self) + + def generate_lava_job_definition(self) -> str: + job_stream = StringIO() + yaml = YAML() + yaml.width = 4096 + yaml.dump(self.generate_lava_yaml_payload(), job_stream) + return job_stream.getvalue() + + def generate_metadata(self) -> dict[str, Any]: + # General metadata and permissions + values = { + "job_name": f"{self.job_submitter.project_name}: {self.job_submitter.pipeline_info}", + "device_type": self.job_submitter.device_type, + "visibility": {"group": [self.job_submitter.visibility_group]}, + "priority": JOB_PRIORITY, + "context": {"extra_nfsroot_args": " init=/init rootwait usbcore.quirks=0bda:8153:k"}, + "timeouts": { + "job": {"minutes": self.job_submitter.job_timeout_min}, + "actions": { + "depthcharge-retry": { + # Could take between 1 and 1.5 min in slower boots + "minutes": 4 + }, + "depthcharge-start": { + # Should take less than 1 min. + "minutes": 1, + }, + "depthcharge-action": { + # This timeout englobes the entire depthcharge timing, + # including retries + "minutes": 5 + * NUMBER_OF_ATTEMPTS_LAVA_BOOT, + }, }, }, - }, - } + } - if args.lava_tags: - values["tags"] = args.lava_tags.split(",") + if self.job_submitter.lava_tags: + values["tags"] = self.job_submitter.lava_tags.split(",") - return values + return values + def attach_kernel_and_dtb(self, deploy_field): + if self.job_submitter.kernel_image_type: + deploy_field["kernel"]["type"] = self.job_submitter.kernel_image_type + if self.job_submitter.dtb_filename: + deploy_field["dtb"] = { + "url": f"{self.job_submitter.kernel_url_prefix}/" + f"{self.job_submitter.dtb_filename}.dtb" + } -def artifact_download_steps(args): - """ - This function is responsible for setting up the SSH server in the DUT and to - export the first boot environment to a file. - """ - # Putting JWT pre-processing and mesa download, within init-stage1.sh file, - # as we do with non-SSH version. - download_steps = [ - "set -ex", - "curl -L --retry 4 -f --retry-all-errors --retry-delay 60 " - f"{args.job_rootfs_overlay_url} | tar -xz -C /", - f"mkdir -p {args.ci_project_dir}", - f"curl -L --retry 4 -f --retry-all-errors --retry-delay 60 {args.build_url} | " - f"tar --zstd -x -C {args.ci_project_dir}", - ] - - # If the JWT file is provided, we will use it to authenticate with the cloud - # storage provider and will hide it from the job output in Gitlab. - if args.jwt_file: - with open(args.jwt_file) as jwt_file: - download_steps += [ - "set +x # HIDE_START", - f'echo -n "{jwt_file.read()}" > "{args.jwt_file}"', - "set -x # HIDE_END", - f'echo "export CI_JOB_JWT_FILE={args.jwt_file}" >> /set-job-env-vars.sh', - ] - else: - download_steps += [ - "echo Could not find jwt file, disabling S3 requests...", - "sed -i '/S3_RESULTS_UPLOAD/d' /set-job-env-vars.sh", + def artifact_download_steps(self): + """ + This function is responsible for setting up the SSH server in the DUT and to + export the first boot environment to a file. + """ + # Putting JWT pre-processing and mesa download, within init-stage1.sh file, + # as we do with non-SSH version. + download_steps = [ + "set -ex", + "curl -L --retry 4 -f --retry-all-errors --retry-delay 60 " + f"{self.job_submitter.job_rootfs_overlay_url} | tar -xz -C /", + f"mkdir -p {self.job_submitter.ci_project_dir}", + f"curl -L --retry 4 -f --retry-all-errors --retry-delay 60 {self.job_submitter.build_url} | " + f"tar --zstd -x -C {self.job_submitter.ci_project_dir}", ] - return download_steps + # If the JWT file is provided, we will use it to authenticate with the cloud + # storage provider and will hide it from the job output in Gitlab. + if self.job_submitter.jwt_file: + with open(self.job_submitter.jwt_file) as jwt_file: + download_steps += [ + "set +x # HIDE_START", + f'echo -n "{jwt_file.read()}" > "{self.job_submitter.jwt_file}"', + "set -x # HIDE_END", + f'echo "export CI_JOB_JWT_FILE={self.job_submitter.jwt_file}" >> /set-job-env-vars.sh', + ] + else: + download_steps += [ + "echo Could not find jwt file, disabling S3 requests...", + "sed -i '/S3_RESULTS_UPLOAD/d' /set-job-env-vars.sh", + ] + + return download_steps diff --git a/.gitlab-ci/lava/utils/ssh_job_definition.py b/.gitlab-ci/lava/utils/ssh_job_definition.py index 0c2c4dbcebf..7f030805cb0 100644 --- a/.gitlab-ci/lava/utils/ssh_job_definition.py +++ b/.gitlab-ci/lava/utils/ssh_job_definition.py @@ -29,14 +29,13 @@ script after sourcing "dut-env-vars.sh" again for the second SSH test case. from pathlib import Path -from typing import Any +from typing import TYPE_CHECKING, Any -from .lava_job_definition import ( - NUMBER_OF_ATTEMPTS_LAVA_BOOT, - artifact_download_steps, - generate_metadata, - to_yaml_block, -) +from .lava_job_definition import NUMBER_OF_ATTEMPTS_LAVA_BOOT, to_yaml_block + +if TYPE_CHECKING: + from ..lava_job_submitter import LAVAJobSubmitter + from .lava_job_definition import LAVAJobDefinition # Very early SSH server setup. Uses /dut_ready file to flag it is done. SSH_SERVER_COMMANDS = { @@ -79,7 +78,7 @@ lava_ssh_test_case() { ] -def generate_dut_test(args): +def generate_dut_test(args: "LAVAJobSubmitter") -> dict[str, Any]: # Commands executed on DUT. # Trying to execute the minimal number of commands, because the console data is # retrieved via UART, which is hang-prone in some devices. @@ -109,7 +108,8 @@ def generate_dut_test(args): } -def generate_docker_test(args): +def generate_docker_test(job_definition: "LAVAJobDefinition") -> dict[str, Any]: + args = job_definition.job_submitter # This is a growing list of commands that will be executed by the docker # guest, which will be the SSH client. docker_commands = [] @@ -148,7 +148,7 @@ def generate_docker_test(args): ( "lava_ssh_test_case 'artifact_download' 'bash --' << EOF", "source /dut-env-vars.sh", - *artifact_download_steps(args), + *job_definition.artifact_download_steps(), "EOF", ) ), @@ -163,8 +163,9 @@ def generate_docker_test(args): return init_stages_test -def generate_lava_yaml_payload(args) -> dict[str, Any]: - values = generate_metadata(args) +def generate_lava_yaml_payload(job_definition: "LAVAJobDefinition") -> dict[str, Any]: + values = job_definition.generate_metadata() + job_submitter = job_definition.job_submitter # URLs to our kernel rootfs to boot from, both generated by the base # container build @@ -175,22 +176,19 @@ def generate_lava_yaml_payload(args) -> dict[str, Any]: "timeouts": {"http-download": {"minutes": 2}}, "to": "tftp", "os": "oe", - "kernel": {"url": f"{args.kernel_url_prefix}/{args.kernel_image_name}"}, + "kernel": {"url": f"{job_submitter.kernel_url_prefix}/{job_submitter.kernel_image_name}"}, "nfsrootfs": { - "url": f"{args.rootfs_url_prefix}/lava-rootfs.tar.zst", + "url": f"{job_submitter.rootfs_url_prefix}/lava-rootfs.tar.zst", "compression": "zstd", }, } - if args.kernel_image_type: - deploy["kernel"]["type"] = args.kernel_image_type - if args.dtb_filename: - deploy["dtb"] = {"url": f"{args.kernel_url_prefix}/{args.dtb_filename}.dtb"} + job_definition.attach_kernel_and_dtb(deploy) # always boot over NFS boot = { "namespace": "dut", "failure_retry": NUMBER_OF_ATTEMPTS_LAVA_BOOT, - "method": args.boot_method, + "method": job_submitter.boot_method, "commands": "nfs", "prompts": ["lava-shell:"], **SSH_SERVER_COMMANDS, @@ -201,8 +199,8 @@ def generate_lava_yaml_payload(args) -> dict[str, Any]: values["actions"] = [ {"deploy": deploy}, {"boot": boot}, - {"test": generate_dut_test(args)}, - {"test": generate_docker_test(args)}, + {"test": generate_dut_test(job_submitter)}, + {"test": generate_docker_test(job_definition)}, ] return values diff --git a/.gitlab-ci/lava/utils/uart_job_definition.py b/.gitlab-ci/lava/utils/uart_job_definition.py index 8a6386d3ffb..c1bfaea5840 100644 --- a/.gitlab-ci/lava/utils/uart_job_definition.py +++ b/.gitlab-ci/lava/utils/uart_job_definition.py @@ -1,10 +1,10 @@ from typing import TYPE_CHECKING, Any if TYPE_CHECKING: - from lava.lava_job_submitter import LAVAJobSubmitter + from ..lava_job_submitter import LAVAJobSubmitter + from .lava_job_definition import LAVAJobDefinition -from .lava_job_definition import (NUMBER_OF_ATTEMPTS_LAVA_BOOT, - artifact_download_steps, generate_metadata) +from .lava_job_definition import NUMBER_OF_ATTEMPTS_LAVA_BOOT # Use the same image that is being used for the hardware enablement and health-checks. # They are pretty small (<100MB) and have all the tools we need to run LAVA, so it is a safe choice. @@ -14,14 +14,8 @@ from .lava_job_definition import (NUMBER_OF_ATTEMPTS_LAVA_BOOT, DOCKER_IMAGE = "registry.gitlab.collabora.com/lava/health-check-docker" -def attach_kernel_and_dtb(args, deploy_field): - if args.kernel_image_type: - deploy_field["kernel"]["type"] = args.kernel_image_type - if args.dtb_filename: - deploy_field["dtb"] = {"url": f"{args.kernel_url_prefix}/{args.dtb_filename}.dtb"} - - -def fastboot_deploy_actions(args: "LAVAJobSubmitter", nfsrootfs) -> list[dict[str, Any]]: +def fastboot_deploy_actions(job_definition: "LAVAJobDefinition", nfsrootfs) -> list[dict[str, Any]]: + args = job_definition.job_submitter fastboot_deploy_nfs = { "timeout": {"minutes": 10}, "to": "nfs", @@ -63,12 +57,13 @@ def fastboot_deploy_actions(args: "LAVAJobSubmitter", nfsrootfs) -> list[dict[st # URLs to our kernel rootfs to boot from, both generated by the base # container build - attach_kernel_and_dtb(args, fastboot_deploy_prepare) + job_definition.attach_kernel_and_dtb(fastboot_deploy_prepare["images"]) return [{"deploy": d} for d in (fastboot_deploy_nfs, fastboot_deploy_prepare, fastboot_deploy)] -def tftp_deploy_actions(args: "LAVAJobSubmitter", nfsrootfs) -> list[dict[str, Any]]: +def tftp_deploy_actions(job_definition: "LAVAJobDefinition", nfsrootfs) -> list[dict[str, Any]]: + args = job_definition.job_submitter tftp_deploy = { "timeout": {"minutes": 5}, "to": "tftp", @@ -78,7 +73,7 @@ def tftp_deploy_actions(args: "LAVAJobSubmitter", nfsrootfs) -> list[dict[str, A }, "nfsrootfs": nfsrootfs, } - attach_kernel_and_dtb(args, tftp_deploy) + job_definition.attach_kernel_and_dtb(tftp_deploy) return [{"deploy": d} for d in [tftp_deploy]] @@ -105,9 +100,10 @@ def init_stage1_steps(args: "LAVAJobSubmitter") -> list[str]: return run_steps -def test_actions(args: "LAVAJobSubmitter") -> list[dict[str, Any]]: +def test_actions(job_definition: "LAVAJobDefinition") -> list[dict[str, Any]]: # skeleton test definition: only declaring each job as a single 'test' # since LAVA's test parsing is not useful to us + args = job_definition.job_submitter run_steps = [] test = { "timeout": {"minutes": args.job_timeout_min}, @@ -133,7 +129,7 @@ def test_actions(args: "LAVAJobSubmitter") -> list[dict[str, Any]]: } run_steps += init_stage1_steps(args) - run_steps += artifact_download_steps(args) + run_steps += job_definition.artifact_download_steps() run_steps += [ f"mkdir -p {args.ci_project_dir}", @@ -174,7 +170,7 @@ def fastboot_boot_action(args: "LAVAJobSubmitter") -> dict[str, Any]: return fastboot_boot -def generate_lava_yaml_payload(args: "LAVAJobSubmitter") -> dict[str, Any]: +def generate_lava_yaml_payload(job_definition: "LAVAJobDefinition") -> dict[str, Any]: """ Generates a YAML payload for submitting a LAVA job, based on the provided arguments. @@ -187,23 +183,24 @@ def generate_lava_yaml_payload(args: "LAVAJobSubmitter") -> dict[str, Any]: a dictionary containing the values generated by the `generate_metadata` function and the actions for the LAVA job submission. """ - values = generate_metadata(args) + job_submitter = job_definition.job_submitter + values = job_definition.generate_metadata() nfsrootfs = { - "url": f"{args.rootfs_url_prefix}/lava-rootfs.tar.zst", + "url": f"{job_submitter.rootfs_url_prefix}/lava-rootfs.tar.zst", "compression": "zstd", } - if args.boot_method == "fastboot": + if job_submitter.boot_method == "fastboot": values["actions"] = [ - *fastboot_deploy_actions(args, nfsrootfs), - {"boot": fastboot_boot_action(args)}, + *fastboot_deploy_actions(job_definition, nfsrootfs), + {"boot": fastboot_boot_action(job_submitter)}, ] else: # tftp values["actions"] = [ - *tftp_deploy_actions(args, nfsrootfs), - {"boot": tftp_boot_action(args)}, + *tftp_deploy_actions(job_definition, nfsrootfs), + {"boot": tftp_boot_action(job_submitter)}, ] - values["actions"].extend(test_actions(args)) + values["actions"].extend(test_actions(job_definition)) return values