Files
third_party_mesa3d/.gitlab-ci/common/intel-gpu-freq.sh
Cristian Ciocaltea 99b04cbb73 ci: Add CPU frequency adjustment capability
Update intel-gpu-freq.sh script to offer the possibility to adjust CPU
operating frequencies in addition to GPU.

Note this is currently limited to just setting the maximum scaling
frequency as percentage of the maximum frequency allowed by the
hardware.

Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Reviewed-by: Guilherme Gallo <guilherme.gallo@collabora.com>
Acked-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16164>
2022-04-27 18:46:05 +03:00

759 lines
18 KiB
Bash
Executable File

#!/bin/sh
#
# This is an utility script to manage Intel GPU frequencies.
# It can be used for debugging performance problems or trying to obtain a stable
# frequency while benchmarking.
#
# Note the Intel i915 GPU driver allows to change the minimum, maximum and boost
# frequencies in steps of 50 MHz via:
#
# /sys/class/drm/card<n>/<freq_info>
#
# Where <n> is the DRM card index and <freq_info> one of the following:
#
# - gt_max_freq_mhz (enforced maximum freq)
# - gt_min_freq_mhz (enforced minimum freq)
# - gt_boost_freq_mhz (enforced boost freq)
#
# The hardware capabilities can be accessed via:
#
# - gt_RP0_freq_mhz (supported maximum freq)
# - gt_RPn_freq_mhz (supported minimum freq)
# - gt_RP1_freq_mhz (most efficient freq)
#
# The current frequency can be read from:
# - gt_act_freq_mhz (the actual GPU freq)
# - gt_cur_freq_mhz (the last requested freq)
#
# Also note that in addition to GPU management, the script offers the
# possibility to adjust CPU operating frequencies. However, this is currently
# limited to just setting the maximum scaling frequency as percentage of the
# maximum frequency allowed by the hardware.
#
# Copyright (C) 2022 Collabora Ltd.
# Author: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
#
# SPDX-License-Identifier: MIT
#
#
# Constants
#
# GPU
DRM_FREQ_SYSFS_PATTERN="/sys/class/drm/card%d/gt_%s_freq_mhz"
ENF_FREQ_INFO="max min boost"
CAP_FREQ_INFO="RP0 RPn RP1"
ACT_FREQ_INFO="act cur"
THROTT_DETECT_SLEEP_SEC=2
THROTT_DETECT_PID_FILE_PATH=/tmp/thrott-detect.pid
# CPU
CPU_SYSFS_PREFIX=/sys/devices/system/cpu
CPU_PSTATE_SYSFS_PATTERN="${CPU_SYSFS_PREFIX}/intel_pstate/%s"
CPU_FREQ_SYSFS_PATTERN="${CPU_SYSFS_PREFIX}/cpu%s/cpufreq/%s_freq"
CAP_CPU_FREQ_INFO="cpuinfo_max cpuinfo_min"
ENF_CPU_FREQ_INFO="scaling_max scaling_min"
ACT_CPU_FREQ_INFO="scaling_cur"
#
# Global variables.
#
unset INTEL_DRM_CARD_INDEX
unset GET_ACT_FREQ GET_ENF_FREQ GET_CAP_FREQ
unset SET_MIN_FREQ SET_MAX_FREQ
unset MONITOR_FREQ
unset CPU_SET_MAX_FREQ
unset DETECT_THROTT
unset DRY_RUN
#
# Simple printf based stderr logger.
#
log() {
local msg_type=$1
shift
printf "%s: %s: " "${msg_type}" "${0##*/}" >&2
printf "$@" >&2
printf "\n" >&2
}
#
# Helper to print sysfs path for the given card index and freq info.
#
# arg1: Frequency info sysfs name, one of *_FREQ_INFO constants above
# arg2: Video card index, defaults to INTEL_DRM_CARD_INDEX
#
print_freq_sysfs_path() {
printf ${DRM_FREQ_SYSFS_PATTERN} "${2:-${INTEL_DRM_CARD_INDEX}}" "$1"
}
#
# Helper to set INTEL_DRM_CARD_INDEX for the first identified Intel video card.
#
identify_intel_gpu() {
local i=0 vendor path
while [ ${i} -lt 16 ]; do
[ -c "/dev/dri/card$i" ] || {
i=$((i + 1))
continue
}
path=$(print_freq_sysfs_path "" ${i})
path=${path%/*}/device/vendor
[ -r "${path}" ] && read vendor < "${path}" && \
[ "${vendor}" = "0x8086" ] && INTEL_DRM_CARD_INDEX=$i && return 0
i=$((i + 1))
done
return 1
}
#
# Read the specified freq info from sysfs.
#
# arg1: Flag (y/n) to also enable printing the freq info.
# arg2...: Frequency info sysfs name(s), see *_FREQ_INFO constants above
# return: Global variable(s) FREQ_${arg} containing the requested information
#
read_freq_info() {
local var val info path print=0 ret=0
[ "$1" = "y" ] && print=1
shift
while [ $# -gt 0 ]; do
info=$1
shift
var=FREQ_${info}
path=$(print_freq_sysfs_path "${info}")
[ -r ${path} ] && read ${var} < ${path} || {
log ERROR "Failed to read freq info from: %s" "${path}"
ret=1
continue
}
[ -n "${var}" ] || {
log ERROR "Got empty freq info from: %s" "${path}"
ret=1
continue
}
[ ${print} -eq 1 ] && {
eval val=\$${var}
printf "%6s: %4s MHz\n" "${info}" "${val}"
}
done
return ${ret}
}
#
# Display requested info.
#
print_freq_info() {
local req_freq
[ -n "${GET_CAP_FREQ}" ] && {
printf "* Hardware capabilities\n"
read_freq_info y ${CAP_FREQ_INFO}
printf "\n"
}
[ -n "${GET_ENF_FREQ}" ] && {
printf "* Enforcements\n"
read_freq_info y ${ENF_FREQ_INFO}
printf "\n"
}
[ -n "${GET_ACT_FREQ}" ] && {
printf "* Actual\n"
read_freq_info y ${ACT_FREQ_INFO}
printf "\n"
}
}
#
# Helper to print frequency value as requested by user via '-s, --set' option.
# arg1: user requested freq value
#
compute_freq_set() {
local val
case "$1" in
+)
val=${FREQ_RP0}
;;
-)
val=${FREQ_RPn}
;;
*%)
val=$((${1%?} * ${FREQ_RP0} / 100))
# Adjust freq to comply with 50 MHz increments
val=$((val / 50 * 50))
;;
*[!0-9]*)
log ERROR "Cannot set freq to invalid value: %s" "$1"
return 1
;;
"")
log ERROR "Cannot set freq to unspecified value"
return 1
;;
*)
# Adjust freq to comply with 50 MHz increments
val=$(($1 / 50 * 50))
;;
esac
printf "%s" "${val}"
}
#
# Helper for set_freq().
#
set_freq_max() {
log INFO "Setting GPU max freq to %s MHz" "${SET_MAX_FREQ}"
read_freq_info n min || return $?
[ ${SET_MAX_FREQ} -gt ${FREQ_RP0} ] && {
log ERROR "Cannot set GPU max freq (%s) to be greater than hw max freq (%s)" \
"${SET_MAX_FREQ}" "${FREQ_RP0}"
return 1
}
[ ${SET_MAX_FREQ} -lt ${FREQ_RPn} ] && {
log ERROR "Cannot set GPU max freq (%s) to be less than hw min freq (%s)" \
"${SET_MIN_FREQ}" "${FREQ_RPn}"
return 1
}
[ ${SET_MAX_FREQ} -lt ${FREQ_min} ] && {
log ERROR "Cannot set GPU max freq (%s) to be less than min freq (%s)" \
"${SET_MAX_FREQ}" "${FREQ_min}"
return 1
}
[ -z "${DRY_RUN}" ] || return 0
printf "%s" ${SET_MAX_FREQ} | tee $(print_freq_sysfs_path max) \
$(print_freq_sysfs_path boost) > /dev/null
[ $? -eq 0 ] || {
log ERROR "Failed to set GPU max frequency"
return 1
}
}
#
# Helper for set_freq().
#
set_freq_min() {
log INFO "Setting GPU min freq to %s MHz" "${SET_MIN_FREQ}"
read_freq_info n max || return $?
[ ${SET_MIN_FREQ} -gt ${FREQ_max} ] && {
log ERROR "Cannot set GPU min freq (%s) to be greater than max freq (%s)" \
"${SET_MIN_FREQ}" "${FREQ_max}"
return 1
}
[ ${SET_MIN_FREQ} -lt ${FREQ_RPn} ] && {
log ERROR "Cannot set GPU min freq (%s) to be less than hw min freq (%s)" \
"${SET_MIN_FREQ}" "${FREQ_RPn}"
return 1
}
[ -z "${DRY_RUN}" ] || return 0
printf "%s" ${SET_MIN_FREQ} > $(print_freq_sysfs_path min)
[ $? -eq 0 ] || {
log ERROR "Failed to set GPU min frequency"
return 1
}
}
#
# Set min or max or both GPU frequencies to the user indicated values.
#
set_freq() {
# Get hw max & min frequencies
read_freq_info n RP0 RPn || return $?
[ -z "${SET_MAX_FREQ}" ] || {
SET_MAX_FREQ=$(compute_freq_set "${SET_MAX_FREQ}")
[ -z "${SET_MAX_FREQ}" ] && return 1
}
[ -z "${SET_MIN_FREQ}" ] || {
SET_MIN_FREQ=$(compute_freq_set "${SET_MIN_FREQ}")
[ -z "${SET_MIN_FREQ}" ] && return 1
}
#
# Ensure correct operation order, to avoid setting min freq
# to a value which is larger than max freq.
#
# E.g.:
# crt_min=crt_max=600; new_min=new_max=700
# > operation order: max=700; min=700
#
# crt_min=crt_max=600; new_min=new_max=500
# > operation order: min=500; max=500
#
if [ -n "${SET_MAX_FREQ}" ] && [ -n "${SET_MIN_FREQ}" ]; then
[ ${SET_MAX_FREQ} -lt ${SET_MIN_FREQ} ] && {
log ERROR "Cannot set GPU max freq to be less than min freq"
return 1
}
read_freq_info n min || return $?
if [ ${SET_MAX_FREQ} -lt ${FREQ_min} ]; then
set_freq_min || return $?
set_freq_max
else
set_freq_max || return $?
set_freq_min
fi
elif [ -n "${SET_MAX_FREQ}" ]; then
set_freq_max
elif [ -n "${SET_MIN_FREQ}" ]; then
set_freq_min
else
log "Unexpected call to set_freq()"
return 1
fi
}
#
# Helper for detect_throttling().
#
get_thrott_detect_pid() {
[ -e ${THROTT_DETECT_PID_FILE_PATH} ] || return 0
local pid
read pid < ${THROTT_DETECT_PID_FILE_PATH} || {
log ERROR "Failed to read pid from: %s" "${THROTT_DETECT_PID_FILE_PATH}"
return 1
}
local proc_path=/proc/${pid:-invalid}/cmdline
[ -r ${proc_path} ] && grep -qs "${0##*/}" ${proc_path} && {
printf "%s" "${pid}"
return 0
}
# Remove orphaned PID file
rm -rf ${THROTT_DETECT_PID_FILE_PATH}
return 1
}
#
# Control detection and reporting of GPU throttling events.
# arg1: start - run throttle detector in background
# stop - stop throttle detector process, if any
# status - verify if throttle detector is running
#
detect_throttling() {
local pid
pid=$(get_thrott_detect_pid)
case "$1" in
status)
printf "Throttling detector is "
[ -z "${pid}" ] && printf "not running\n" && return 0
printf "running (pid=%s)\n" ${pid}
;;
stop)
[ -z "${pid}" ] && return 0
log INFO "Stopping throttling detector (pid=%s)" "${pid}"
kill ${pid}; sleep 1; kill -0 ${pid} 2>/dev/null && kill -9 ${pid}
rm -rf ${THROTT_DETECT_PID_FILE_PATH}
;;
start)
[ -n "${pid}" ] && {
log WARN "Throttling detector is already running (pid=%s)" ${pid}
return 0
}
(
read_freq_info n RPn || exit $?
while true; do
sleep ${THROTT_DETECT_SLEEP_SEC}
read_freq_info n act min cur || exit $?
#
# The throttling seems to occur when act freq goes below min.
# However, it's necessary to exclude the idle states, where
# act freq normally reaches RPn and cur goes below min.
#
[ ${FREQ_act} -lt ${FREQ_min} ] && \
[ ${FREQ_act} -gt ${FREQ_RPn} ] && \
[ ${FREQ_cur} -ge ${FREQ_min} ] && \
printf "GPU throttling detected: act=%s min=%s cur=%s RPn=%s\n" \
${FREQ_act} ${FREQ_min} ${FREQ_cur} ${FREQ_RPn}
done
) &
pid=$!
log INFO "Started GPU throttling detector (pid=%s)" ${pid}
printf "%s\n" ${pid} > ${THROTT_DETECT_PID_FILE_PATH} || \
log WARN "Failed to write throttle detector PID file"
;;
esac
}
#
# Retrieve the list of online CPUs.
#
get_online_cpus() {
local path cpu_index
printf "0"
for path in $(grep 1 ${CPU_SYSFS_PREFIX}/cpu*/online); do
cpu_index=${path##*/cpu}
printf " %s" ${cpu_index%%/*}
done
}
#
# Helper to print sysfs path for the given CPU index and freq info.
#
# arg1: Frequency info sysfs name, one of *_CPU_FREQ_INFO constants above
# arg2: CPU index
#
print_cpu_freq_sysfs_path() {
printf ${CPU_FREQ_SYSFS_PATTERN} "$2" "$1"
}
#
# Read the specified CPU freq info from sysfs.
#
# arg1: CPU index
# arg2: Flag (y/n) to also enable printing the freq info.
# arg3...: Frequency info sysfs name(s), see *_CPU_FREQ_INFO constants above
# return: Global variable(s) CPU_FREQ_${arg} containing the requested information
#
read_cpu_freq_info() {
local var val info path cpu_index print=0 ret=0
cpu_index=$1
[ "$2" = "y" ] && print=1
shift 2
while [ $# -gt 0 ]; do
info=$1
shift
var=CPU_FREQ_${info}
path=$(print_cpu_freq_sysfs_path "${info}" ${cpu_index})
[ -r ${path} ] && read ${var} < ${path} || {
log ERROR "Failed to read CPU freq info from: %s" "${path}"
ret=1
continue
}
[ -n "${var}" ] || {
log ERROR "Got empty CPU freq info from: %s" "${path}"
ret=1
continue
}
[ ${print} -eq 1 ] && {
eval val=\$${var}
printf "%6s: %4s Hz\n" "${info}" "${val}"
}
done
return ${ret}
}
#
# Helper to print freq. value as requested by user via '--cpu-set-max' option.
# arg1: user requested freq value
#
compute_cpu_freq_set() {
local val
case "$1" in
+)
val=${CPU_FREQ_cpuinfo_max}
;;
-)
val=${CPU_FREQ_cpuinfo_min}
;;
*%)
val=$((${1%?} * ${CPU_FREQ_cpuinfo_max} / 100))
;;
*[!0-9]*)
log ERROR "Cannot set CPU freq to invalid value: %s" "$1"
return 1
;;
"")
log ERROR "Cannot set CPU freq to unspecified value"
return 1
;;
*)
log ERROR "Cannot set CPU freq to custom value; use +, -, or % instead"
return 1
;;
esac
printf "%s" "${val}"
}
#
# Adjust CPU max scaling frequency.
#
set_cpu_freq_max() {
local target_freq res=0
case "${CPU_SET_MAX_FREQ}" in
+)
target_freq=100
;;
-)
target_freq=1
;;
*%)
target_freq=${CPU_SET_MAX_FREQ%?}
;;
*)
log ERROR "Invalid CPU freq"
return 1
;;
esac
local pstate_info=$(printf "${CPU_PSTATE_SYSFS_PATTERN}" max_perf_pct)
[ -e "${pstate_info}" ] && {
log INFO "Setting intel_pstate max perf to %s" "${target_freq}%"
printf "%s" "${target_freq}" > "${pstate_info}"
[ $? -eq 0 ] || {
log ERROR "Failed to set intel_pstate max perf"
res=1
}
}
local cpu_index
for cpu_index in $(get_online_cpus); do
read_cpu_freq_info ${cpu_index} n ${CAP_CPU_FREQ_INFO} || { res=$?; continue; }
target_freq=$(compute_cpu_freq_set "${CPU_SET_MAX_FREQ}")
[ -z "${target_freq}" ] && { res=$?; continue; }
log INFO "Setting CPU%s max scaling freq to %s Hz" ${cpu_index} "${target_freq}"
[ -n "${DRY_RUN}" ] && continue
printf "%s" ${target_freq} > $(print_cpu_freq_sysfs_path scaling_max ${cpu_index})
[ $? -eq 0 ] || {
res=1
log ERROR "Failed to set CPU%s max scaling frequency" ${cpu_index}
}
done
return ${res}
}
#
# Show help message.
#
print_usage() {
cat <<EOF
Usage: ${0##*/} [OPTION]...
A script to manage Intel GPU frequencies. Can be used for debugging performance
problems or trying to obtain a stable frequency while benchmarking.
Note Intel GPUs only accept specific frequencies, usually multiples of 50 MHz.
Options:
-g, --get [act|enf|cap|all]
Get frequency information: active (default), enforced,
hardware capabilities or all of them.
-s, --set [{min|max}=]{FREQUENCY[%]|+|-}
Set min or max frequency to the given value (MHz).
Append '%' to interpret FREQUENCY as % of hw max.
Use '+' or '-' to set frequency to hardware max or min.
Omit min/max prefix to set both frequencies.
-r, --reset Reset frequencies to hardware defaults.
-m, --monitor [act|enf|cap|all]
Monitor the indicated frequencies via 'watch' utility.
See '-g, --get' option for more details.
-d|--detect-thrott [start|stop|status]
Start (default operation) the throttling detector
as a background process. Use 'stop' or 'status' to
terminate the detector process or verify its status.
--cpu-set-max [FREQUENCY%|+|-}
Set CPU max scaling frequency as % of hw max.
Use '+' or '-' to set frequency to hardware max or min.
-r, --reset Reset frequencies to hardware defaults.
--dry-run See what the script will do without applying any
frequency changes.
-h, --help Display this help text and exit.
EOF
}
#
# Parse user input for '-g, --get' option.
# Returns 0 if a value has been provided, otherwise 1.
#
parse_option_get() {
local ret=0
case "$1" in
act) GET_ACT_FREQ=1;;
enf) GET_ENF_FREQ=1;;
cap) GET_CAP_FREQ=1;;
all) GET_ACT_FREQ=1; GET_ENF_FREQ=1; GET_CAP_FREQ=1;;
-*|"")
# No value provided, using default.
GET_ACT_FREQ=1
ret=1
;;
*)
print_usage
exit 1
;;
esac
return ${ret}
}
#
# Validate user input for '-s, --set' option.
# arg1: input value to be validated
# arg2: optional flag indicating input is restricted to %
#
validate_option_set() {
case "$1" in
+|-|[0-9]%|[0-9][0-9]%)
return 0
;;
*[!0-9]*|"")
print_usage
exit 1
;;
esac
[ -z "$2" ] || { print_usage; exit 1; }
}
#
# Parse script arguments.
#
[ $# -eq 0 ] && { print_usage; exit 1; }
while [ $# -gt 0 ]; do
case "$1" in
-g|--get)
parse_option_get "$2" && shift
;;
-s|--set)
shift
case "$1" in
min=*)
SET_MIN_FREQ=${1#min=}
validate_option_set "${SET_MIN_FREQ}"
;;
max=*)
SET_MAX_FREQ=${1#max=}
validate_option_set "${SET_MAX_FREQ}"
;;
*)
SET_MIN_FREQ=$1
validate_option_set "${SET_MIN_FREQ}"
SET_MAX_FREQ=${SET_MIN_FREQ}
;;
esac
;;
-r|--reset)
RESET_FREQ=1
SET_MIN_FREQ="-"
SET_MAX_FREQ="+"
;;
-m|--monitor)
MONITOR_FREQ=act
parse_option_get "$2" && MONITOR_FREQ=$2 && shift
;;
-d|--detect-thrott)
DETECT_THROTT=start
case "$2" in
start|stop|status)
DETECT_THROTT=$2
shift
;;
esac
;;
--cpu-set-max)
shift
CPU_SET_MAX_FREQ=$1
validate_option_set "${CPU_SET_MAX_FREQ}" restricted
;;
--dry-run)
DRY_RUN=1
;;
-h|--help)
print_usage
exit 0
;;
*)
print_usage
exit 1
;;
esac
shift
done
#
# Main
#
RET=0
identify_intel_gpu || {
log INFO "No Intel GPU detected"
exit 0
}
[ -n "${SET_MIN_FREQ}${SET_MAX_FREQ}" ] && { set_freq || RET=$?; }
print_freq_info
[ -n "${DETECT_THROTT}" ] && detect_throttling ${DETECT_THROTT}
[ -n "${CPU_SET_MAX_FREQ}" ] && { set_cpu_freq_max || RET=$?; }
[ -n "${MONITOR_FREQ}" ] && {
log INFO "Entering frequency monitoring mode"
sleep 2
exec watch -d -n 1 "$0" -g "${MONITOR_FREQ}"
}
exit ${RET}