ci/lava: Create LAVAJobDefinition

To absorb complexity from the building blocks to generate job
definitions for each mode:
- fastboot-uart
- uboot-uart
- uboot-ssh

Signed-off-by: Guilherme Gallo <guilherme.gallo@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25912>
This commit is contained in:
Guilherme Gallo
2023-10-25 21:55:05 -03:00
committed by Marge Bot
parent 77c3091fdd
commit 76922f8404
5 changed files with 162 additions and 157 deletions

View File

@@ -17,7 +17,6 @@ import time
from collections import defaultdict
from dataclasses import dataclass, fields
from datetime import datetime, timedelta
from io import StringIO
from os import environ, getenv, path
from typing import Any, Optional
@@ -34,11 +33,11 @@ from lava.utils import (
CONSOLE_LOG,
GitlabSection,
LAVAJob,
LAVAJobDefinition,
LogFollower,
LogSectionType,
call_proxy,
fatal_err,
generate_lava_job_definition,
hide_sensitive_data,
print_log,
setup_lava_proxy,
@@ -404,7 +403,7 @@ class LAVAJobSubmitter(PathResolver):
minutes=self.job_timeout_min
)
job_definition = generate_lava_job_definition(self)
job_definition = LAVAJobDefinition(self).generate_lava_job_definition()
if self.dump_yaml:
self.dump_job_definition(job_definition)

View File

@@ -1,7 +1,7 @@
from .console_format import CONSOLE_LOG
from .gitlab_section import GitlabSection
from .lava_job import LAVAJob
from .lava_job_definition import generate_lava_job_definition
from .lava_job_definition import LAVAJobDefinition
from .lava_proxy import call_proxy, setup_lava_proxy
from .log_follower import (
LogFollower,

View File

@@ -3,10 +3,11 @@ from io import StringIO
from os import getenv
from typing import TYPE_CHECKING, Any
from lava.utils.lava_farm import LavaFarm, get_lava_farm
from ruamel.yaml import YAML
from ruamel.yaml.scalarstring import LiteralScalarString
from lava.utils.lava_farm import LavaFarm, get_lava_farm
if TYPE_CHECKING:
from lava.lava_job_submitter import LAVAJobSubmitter
@@ -19,50 +20,6 @@ NUMBER_OF_ATTEMPTS_LAVA_BOOT = int(getenv("LAVA_NUMBER_OF_ATTEMPTS_LAVA_BOOT", 3
JOB_PRIORITY = int(getenv("JOB_PRIORITY", 75))
def has_ssh_support(job_submitter: "LAVAJobSubmitter") -> bool:
force_uart = bool(getenv("LAVA_FORCE_UART", False))
if force_uart:
return False
# Only Collabora's farm supports to run docker container as a LAVA actions,
# which is required to follow the job in a SSH section
current_farm = get_lava_farm()
# SSH job definition still needs to add support for fastboot.
job_uses_fastboot: bool = job_submitter.boot_method == "fastboot"
return current_farm == LavaFarm.COLLABORA and not job_uses_fastboot
def generate_lava_yaml_payload(job_submitter: "LAVAJobSubmitter") -> dict[str, Any]:
"""
Bridge function to use the supported job definition depending on some Mesa
CI job characteristics.
The strategy here, is to use LAVA with a containerized SSH session to follow
the job output, escaping from dumping data to the UART, which proves to be
error prone in some devices.
"""
from lava.utils.ssh_job_definition import \
generate_lava_yaml_payload as ssh_lava_yaml
from lava.utils.uart_job_definition import \
generate_lava_yaml_payload as uart_lava_yaml
if has_ssh_support(job_submitter):
return ssh_lava_yaml(job_submitter)
return uart_lava_yaml(job_submitter)
def generate_lava_job_definition(job_submitter: "LAVAJobSubmitter") -> str:
job_stream = StringIO()
yaml = YAML()
yaml.width = 4096
yaml.dump(generate_lava_yaml_payload(job_submitter), job_stream)
return job_stream.getvalue()
def to_yaml_block(steps_array: list[str], escape_vars=[]) -> LiteralScalarString:
def escape_envvar(match):
return "\\" + match.group(0)
@@ -76,18 +33,64 @@ def to_yaml_block(steps_array: list[str], escape_vars=[]) -> LiteralScalarString
return LiteralScalarString(final_str)
def generate_metadata(args) -> dict[str, Any]:
class LAVAJobDefinition:
"""
This class is responsible for generating the YAML payload to submit a LAVA
job.
"""
def __init__(self, job_submitter: "LAVAJobSubmitter") -> None:
self.job_submitter: "LAVAJobSubmitter" = job_submitter
def has_ssh_support(self) -> bool:
force_uart = bool(getenv("LAVA_FORCE_UART", False))
if force_uart:
return False
# Only Collabora's farm supports to run docker container as a LAVA actions,
# which is required to follow the job in a SSH section
current_farm = get_lava_farm()
# SSH job definition still needs to add support for fastboot.
job_uses_fastboot: bool = self.job_submitter.boot_method == "fastboot"
return current_farm == LavaFarm.COLLABORA and not job_uses_fastboot
def generate_lava_yaml_payload(self) -> dict[str, Any]:
"""
Bridge function to use the supported job definition depending on some Mesa
CI job characteristics.
The strategy here, is to use LAVA with a containerized SSH session to follow
the job output, escaping from dumping data to the UART, which proves to be
error prone in some devices.
"""
from lava.utils.ssh_job_definition import generate_lava_yaml_payload as ssh_lava_yaml
from lava.utils.uart_job_definition import generate_lava_yaml_payload as uart_lava_yaml
if self.has_ssh_support():
return ssh_lava_yaml(self)
return uart_lava_yaml(self)
def generate_lava_job_definition(self) -> str:
job_stream = StringIO()
yaml = YAML()
yaml.width = 4096
yaml.dump(self.generate_lava_yaml_payload(), job_stream)
return job_stream.getvalue()
def generate_metadata(self) -> dict[str, Any]:
# General metadata and permissions
values = {
"job_name": f"{args.project_name}: {args.pipeline_info}",
"device_type": args.device_type,
"visibility": {"group": [args.visibility_group]},
"job_name": f"{self.job_submitter.project_name}: {self.job_submitter.pipeline_info}",
"device_type": self.job_submitter.device_type,
"visibility": {"group": [self.job_submitter.visibility_group]},
"priority": JOB_PRIORITY,
"context": {
"extra_nfsroot_args": " init=/init rootwait usbcore.quirks=0bda:8153:k"
},
"context": {"extra_nfsroot_args": " init=/init rootwait usbcore.quirks=0bda:8153:k"},
"timeouts": {
"job": {"minutes": args.job_timeout_min},
"job": {"minutes": self.job_submitter.job_timeout_min},
"actions": {
"depthcharge-retry": {
# Could take between 1 and 1.5 min in slower boots
@@ -107,13 +110,21 @@ def generate_metadata(args) -> dict[str, Any]:
},
}
if args.lava_tags:
values["tags"] = args.lava_tags.split(",")
if self.job_submitter.lava_tags:
values["tags"] = self.job_submitter.lava_tags.split(",")
return values
def attach_kernel_and_dtb(self, deploy_field):
if self.job_submitter.kernel_image_type:
deploy_field["kernel"]["type"] = self.job_submitter.kernel_image_type
if self.job_submitter.dtb_filename:
deploy_field["dtb"] = {
"url": f"{self.job_submitter.kernel_url_prefix}/"
f"{self.job_submitter.dtb_filename}.dtb"
}
def artifact_download_steps(args):
def artifact_download_steps(self):
"""
This function is responsible for setting up the SSH server in the DUT and to
export the first boot environment to a file.
@@ -123,21 +134,21 @@ def artifact_download_steps(args):
download_steps = [
"set -ex",
"curl -L --retry 4 -f --retry-all-errors --retry-delay 60 "
f"{args.job_rootfs_overlay_url} | tar -xz -C /",
f"mkdir -p {args.ci_project_dir}",
f"curl -L --retry 4 -f --retry-all-errors --retry-delay 60 {args.build_url} | "
f"tar --zstd -x -C {args.ci_project_dir}",
f"{self.job_submitter.job_rootfs_overlay_url} | tar -xz -C /",
f"mkdir -p {self.job_submitter.ci_project_dir}",
f"curl -L --retry 4 -f --retry-all-errors --retry-delay 60 {self.job_submitter.build_url} | "
f"tar --zstd -x -C {self.job_submitter.ci_project_dir}",
]
# If the JWT file is provided, we will use it to authenticate with the cloud
# storage provider and will hide it from the job output in Gitlab.
if args.jwt_file:
with open(args.jwt_file) as jwt_file:
if self.job_submitter.jwt_file:
with open(self.job_submitter.jwt_file) as jwt_file:
download_steps += [
"set +x # HIDE_START",
f'echo -n "{jwt_file.read()}" > "{args.jwt_file}"',
f'echo -n "{jwt_file.read()}" > "{self.job_submitter.jwt_file}"',
"set -x # HIDE_END",
f'echo "export CI_JOB_JWT_FILE={args.jwt_file}" >> /set-job-env-vars.sh',
f'echo "export CI_JOB_JWT_FILE={self.job_submitter.jwt_file}" >> /set-job-env-vars.sh',
]
else:
download_steps += [

View File

@@ -29,14 +29,13 @@ script after sourcing "dut-env-vars.sh" again for the second SSH test case.
from pathlib import Path
from typing import Any
from typing import TYPE_CHECKING, Any
from .lava_job_definition import (
NUMBER_OF_ATTEMPTS_LAVA_BOOT,
artifact_download_steps,
generate_metadata,
to_yaml_block,
)
from .lava_job_definition import NUMBER_OF_ATTEMPTS_LAVA_BOOT, to_yaml_block
if TYPE_CHECKING:
from ..lava_job_submitter import LAVAJobSubmitter
from .lava_job_definition import LAVAJobDefinition
# Very early SSH server setup. Uses /dut_ready file to flag it is done.
SSH_SERVER_COMMANDS = {
@@ -79,7 +78,7 @@ lava_ssh_test_case() {
]
def generate_dut_test(args):
def generate_dut_test(args: "LAVAJobSubmitter") -> dict[str, Any]:
# Commands executed on DUT.
# Trying to execute the minimal number of commands, because the console data is
# retrieved via UART, which is hang-prone in some devices.
@@ -109,7 +108,8 @@ def generate_dut_test(args):
}
def generate_docker_test(args):
def generate_docker_test(job_definition: "LAVAJobDefinition") -> dict[str, Any]:
args = job_definition.job_submitter
# This is a growing list of commands that will be executed by the docker
# guest, which will be the SSH client.
docker_commands = []
@@ -148,7 +148,7 @@ def generate_docker_test(args):
(
"lava_ssh_test_case 'artifact_download' 'bash --' << EOF",
"source /dut-env-vars.sh",
*artifact_download_steps(args),
*job_definition.artifact_download_steps(),
"EOF",
)
),
@@ -163,8 +163,9 @@ def generate_docker_test(args):
return init_stages_test
def generate_lava_yaml_payload(args) -> dict[str, Any]:
values = generate_metadata(args)
def generate_lava_yaml_payload(job_definition: "LAVAJobDefinition") -> dict[str, Any]:
values = job_definition.generate_metadata()
job_submitter = job_definition.job_submitter
# URLs to our kernel rootfs to boot from, both generated by the base
# container build
@@ -175,22 +176,19 @@ def generate_lava_yaml_payload(args) -> dict[str, Any]:
"timeouts": {"http-download": {"minutes": 2}},
"to": "tftp",
"os": "oe",
"kernel": {"url": f"{args.kernel_url_prefix}/{args.kernel_image_name}"},
"kernel": {"url": f"{job_submitter.kernel_url_prefix}/{job_submitter.kernel_image_name}"},
"nfsrootfs": {
"url": f"{args.rootfs_url_prefix}/lava-rootfs.tar.zst",
"url": f"{job_submitter.rootfs_url_prefix}/lava-rootfs.tar.zst",
"compression": "zstd",
},
}
if args.kernel_image_type:
deploy["kernel"]["type"] = args.kernel_image_type
if args.dtb_filename:
deploy["dtb"] = {"url": f"{args.kernel_url_prefix}/{args.dtb_filename}.dtb"}
job_definition.attach_kernel_and_dtb(deploy)
# always boot over NFS
boot = {
"namespace": "dut",
"failure_retry": NUMBER_OF_ATTEMPTS_LAVA_BOOT,
"method": args.boot_method,
"method": job_submitter.boot_method,
"commands": "nfs",
"prompts": ["lava-shell:"],
**SSH_SERVER_COMMANDS,
@@ -201,8 +199,8 @@ def generate_lava_yaml_payload(args) -> dict[str, Any]:
values["actions"] = [
{"deploy": deploy},
{"boot": boot},
{"test": generate_dut_test(args)},
{"test": generate_docker_test(args)},
{"test": generate_dut_test(job_submitter)},
{"test": generate_docker_test(job_definition)},
]
return values

View File

@@ -1,10 +1,10 @@
from typing import TYPE_CHECKING, Any
if TYPE_CHECKING:
from lava.lava_job_submitter import LAVAJobSubmitter
from ..lava_job_submitter import LAVAJobSubmitter
from .lava_job_definition import LAVAJobDefinition
from .lava_job_definition import (NUMBER_OF_ATTEMPTS_LAVA_BOOT,
artifact_download_steps, generate_metadata)
from .lava_job_definition import NUMBER_OF_ATTEMPTS_LAVA_BOOT
# Use the same image that is being used for the hardware enablement and health-checks.
# They are pretty small (<100MB) and have all the tools we need to run LAVA, so it is a safe choice.
@@ -14,14 +14,8 @@ from .lava_job_definition import (NUMBER_OF_ATTEMPTS_LAVA_BOOT,
DOCKER_IMAGE = "registry.gitlab.collabora.com/lava/health-check-docker"
def attach_kernel_and_dtb(args, deploy_field):
if args.kernel_image_type:
deploy_field["kernel"]["type"] = args.kernel_image_type
if args.dtb_filename:
deploy_field["dtb"] = {"url": f"{args.kernel_url_prefix}/{args.dtb_filename}.dtb"}
def fastboot_deploy_actions(args: "LAVAJobSubmitter", nfsrootfs) -> list[dict[str, Any]]:
def fastboot_deploy_actions(job_definition: "LAVAJobDefinition", nfsrootfs) -> list[dict[str, Any]]:
args = job_definition.job_submitter
fastboot_deploy_nfs = {
"timeout": {"minutes": 10},
"to": "nfs",
@@ -63,12 +57,13 @@ def fastboot_deploy_actions(args: "LAVAJobSubmitter", nfsrootfs) -> list[dict[st
# URLs to our kernel rootfs to boot from, both generated by the base
# container build
attach_kernel_and_dtb(args, fastboot_deploy_prepare)
job_definition.attach_kernel_and_dtb(fastboot_deploy_prepare["images"])
return [{"deploy": d} for d in (fastboot_deploy_nfs, fastboot_deploy_prepare, fastboot_deploy)]
def tftp_deploy_actions(args: "LAVAJobSubmitter", nfsrootfs) -> list[dict[str, Any]]:
def tftp_deploy_actions(job_definition: "LAVAJobDefinition", nfsrootfs) -> list[dict[str, Any]]:
args = job_definition.job_submitter
tftp_deploy = {
"timeout": {"minutes": 5},
"to": "tftp",
@@ -78,7 +73,7 @@ def tftp_deploy_actions(args: "LAVAJobSubmitter", nfsrootfs) -> list[dict[str, A
},
"nfsrootfs": nfsrootfs,
}
attach_kernel_and_dtb(args, tftp_deploy)
job_definition.attach_kernel_and_dtb(tftp_deploy)
return [{"deploy": d} for d in [tftp_deploy]]
@@ -105,9 +100,10 @@ def init_stage1_steps(args: "LAVAJobSubmitter") -> list[str]:
return run_steps
def test_actions(args: "LAVAJobSubmitter") -> list[dict[str, Any]]:
def test_actions(job_definition: "LAVAJobDefinition") -> list[dict[str, Any]]:
# skeleton test definition: only declaring each job as a single 'test'
# since LAVA's test parsing is not useful to us
args = job_definition.job_submitter
run_steps = []
test = {
"timeout": {"minutes": args.job_timeout_min},
@@ -133,7 +129,7 @@ def test_actions(args: "LAVAJobSubmitter") -> list[dict[str, Any]]:
}
run_steps += init_stage1_steps(args)
run_steps += artifact_download_steps(args)
run_steps += job_definition.artifact_download_steps()
run_steps += [
f"mkdir -p {args.ci_project_dir}",
@@ -174,7 +170,7 @@ def fastboot_boot_action(args: "LAVAJobSubmitter") -> dict[str, Any]:
return fastboot_boot
def generate_lava_yaml_payload(args: "LAVAJobSubmitter") -> dict[str, Any]:
def generate_lava_yaml_payload(job_definition: "LAVAJobDefinition") -> dict[str, Any]:
"""
Generates a YAML payload for submitting a LAVA job, based on the provided arguments.
@@ -187,23 +183,24 @@ def generate_lava_yaml_payload(args: "LAVAJobSubmitter") -> dict[str, Any]:
a dictionary containing the values generated by the `generate_metadata` function and the
actions for the LAVA job submission.
"""
values = generate_metadata(args)
job_submitter = job_definition.job_submitter
values = job_definition.generate_metadata()
nfsrootfs = {
"url": f"{args.rootfs_url_prefix}/lava-rootfs.tar.zst",
"url": f"{job_submitter.rootfs_url_prefix}/lava-rootfs.tar.zst",
"compression": "zstd",
}
if args.boot_method == "fastboot":
if job_submitter.boot_method == "fastboot":
values["actions"] = [
*fastboot_deploy_actions(args, nfsrootfs),
{"boot": fastboot_boot_action(args)},
*fastboot_deploy_actions(job_definition, nfsrootfs),
{"boot": fastboot_boot_action(job_submitter)},
]
else: # tftp
values["actions"] = [
*tftp_deploy_actions(args, nfsrootfs),
{"boot": tftp_boot_action(args)},
*tftp_deploy_actions(job_definition, nfsrootfs),
{"boot": tftp_boot_action(job_submitter)},
]
values["actions"].extend(test_actions(args))
values["actions"].extend(test_actions(job_definition))
return values