File: //usr/bin/google_set_multiqueue
#!/bin/bash
# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# For a single-queue / no MSI-X virtionet device, sets the IRQ affinities to
# processor 0. For this virtionet configuration, distributing IRQs to all
# processors results in comparatively high cpu utilization and comparatively
# low network bandwidth.
#
# For a multi-queue / MSI-X virtionet device, sets the IRQ affinities to the
# per-IRQ affinity hint. The virtionet driver maps each virtionet TX (RX) queue
# MSI-X interrupt to a unique single CPU if the number of TX (RX) queues equals
# the number of online CPUs. The mapping of network MSI-X interrupt vector to
# CPUs is stored in the virtionet MSI-X interrupt vector affinity hint. This
# configuration allows network traffic to be spread across the CPUs, giving
# each CPU a dedicated TX and RX network queue, while ensuring that all packets
# from a single flow are delivered to the same CPU.
#
# For a gvnic device, set the IRQ affinities to the per-IRQ affinity hint.
# The google virtual ethernet driver maps each queue MSI-X interrupt to a
# unique single CPU, which is stored in the affinity_hint for each MSI-X
# vector. In older versions of the kernel, irqblanace is expected to copy the
# affinity_hint to smp_affinity; however, GCE instances disable irqbalance by
# default. This script copies over the affinity_hint to smp_affinity on boot to
# replicate the behavior of irqbalance.
function is_decimal_int() {
[ "${1}" -eq "${1}" ] > /dev/null 2>&1
}
function set_channels() {
ethtool -L "${1}" combined "${2}" > /dev/null 2>&1
}
echo "Running $(basename $0)."
VIRTIO_NET_DEVS=/sys/bus/virtio/drivers/virtio_net/virtio*
# Loop through all the virtionet devices and enable multi-queue
if [ -x "$(command -v ethtool)" ]; then
for dev in $VIRTIO_NET_DEVS; do
ETH_DEVS=${dev}/net/*
for eth_dev in $ETH_DEVS; do
eth_dev=$(basename "$eth_dev")
if ! errormsg=$(ethtool -l "$eth_dev" 2>&1); then
echo "ethtool says that $eth_dev does not support virtionet multiqueue: $errormsg."
continue
fi
num_max_channels=$(ethtool -l "$eth_dev" | grep -m 1 Combined | cut -f2)
[ "${num_max_channels}" -eq "1" ] && continue
if is_decimal_int "$num_max_channels" && \
set_channels "$eth_dev" "$num_max_channels"; then
echo "Set channels for $eth_dev to $num_max_channels."
else
echo "Could not set channels for $eth_dev to $num_max_channels."
fi
done
done
else
echo "ethtool not found: cannot configure virtionet multiqueue."
fi
for dev in $VIRTIO_NET_DEVS
do
dev=$(basename "$dev")
irq_dir=/proc/irq/*
for irq in $irq_dir
do
smp_affinity="${irq}/smp_affinity_list"
[ ! -f "${smp_affinity}" ] && continue
# Classify this IRQ as virtionet intx, virtionet MSI-X, or non-virtionet
# If the IRQ type is virtionet intx, a subdirectory with the same name as
# the device will be present. If the IRQ type is virtionet MSI-X, then
# a subdirectory of the form <device name>-<input|output>.N will exist.
# In this case, N is the input (output) queue number, and is specified as
# a decimal integer ranging from 0 to K - 1 where K is the number of
# input (output) queues in the virtionet device.
virtionet_intx_dir="${irq}/${dev}"
virtionet_msix_dir_regex=".*/${dev}-(input|output)\.([0-9]+)$"
if [ -d "${virtionet_intx_dir}" ]; then
# All virtionet intx IRQs are delivered to CPU 0
echo "Setting ${smp_affinity} to 01 for device ${dev}."
echo "01" > "${smp_affinity}"
continue
fi
# Not virtionet intx, probe for MSI-X
virtionet_msix_found=0
for entry in ${irq}/${dev}*; do
if [[ "$entry" =~ ${virtionet_msix_dir_regex} ]]; then
virtionet_msix_found=1
queue_num=${BASH_REMATCH[2]}
fi
done
affinity_hint="${irq}/affinity_hint"
[ "$virtionet_msix_found" -eq 0 -o ! -f "${affinity_hint}" ] && continue
# Set the IRQ CPU affinity to the virtionet-initialized affinity hint
echo "Setting ${smp_affinity} to ${queue_num} for device ${dev}."
echo "${queue_num}" > "${smp_affinity}"
real_affinity=`cat ${smp_affinity}`
echo "${smp_affinity}: real affinity ${real_affinity}"
done
done
# Set smp_affinity properly for gvnic queues. '-ntfy-block.' is unique to gve
# and will not affect virtio queues.
for i in /proc/irq/*; do
if ls ${i}/*-ntfy-block.* 1> /dev/null 2>&1; then
if [ -f ${i}/affinity_hint ]; then
echo Setting smp_affinity on ${i} to $(cat ${i}/affinity_hint)
cp ${i}/affinity_hint ${i}/smp_affinity
fi
fi
done
XPS=/sys/class/net/e*/queues/tx*/xps_cpus
num_cpus=$(nproc)
[[ $num_cpus -gt 63 ]] && num_cpus=63
num_queues=0
for q in $XPS; do
num_queues=$((num_queues + 1))
done
# If we have more CPUs than queues, then stripe CPUs across tx affinity
# as CPUNumber % queue_count.
for q in $XPS; do
queue_re=".*tx-([0-9]+).*$"
if [[ "$q" =~ ${queue_re} ]]; then
queue_num=${BASH_REMATCH[1]}
fi
xps=0
for cpu in `seq $queue_num $num_queues $((num_cpus - 1))`; do
xps=$((xps | (1 << cpu)))
done
# Linux xps_cpus requires a hex number with commas every 32 bits. It ignores
# all bits above # cpus, so write a list of comma separated 32 bit hex values
# with a comma between dwords.
xps_dwords=()
for i in $(seq 0 $(((num_cpus - 1) / 32)))
do
xps_dwords=(`printf "%08x" $((xps & 0xffffffff))` "${xps_dwords[@]}")
xps=$((xps >> 32))
done
xps_string=$(IFS=, ; echo "${xps_dwords[*]}")
echo ${xps_string} > $q
printf "Queue %d XPS=%s for %s\n" $queue_num `cat $q` $q
done | sort -n -k2