criu/scripts/ci/docker-test.sh
Adrian Reber 30acbabcdd ci: also exclude docker version 29
Docker version 28 broke container restore in combination with network
namespaces. The workaround in the CI script was excluding Docker version
28. Now that there is also Docker version 29, which is still broken,
this also excludes Docker version 29.

Signed-off-by: Adrian Reber <areber@redhat.com>
2025-12-14 17:28:58 +09:00

139 lines
3.6 KiB
Bash
Executable file

#!/bin/bash
set -x -e -o pipefail
# Workaround: Docker 28.x and 29.x has a known regression that breaks the checkpoint and
# restore (C/R) feature. Let's install previous, or next major version. See
# https://github.com/moby/moby/issues/50750 for details on the bug.
export DEBIAN_FRONTEND=noninteractive
apt remove -y docker-ce docker-ce-cli
../../contrib/apt-install -y ca-certificates curl
install -m 0755 -d /etc/apt/keyrings
curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
chmod a+r /etc/apt/keyrings/docker.asc
# shellcheck disable=SC1091
echo \
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
$(. /etc/os-release && echo "${UBUNTU_CODENAME:-$VERSION_CODENAME}") stable" > /etc/apt/sources.list.d/docker.list
apt update -y
apt-cache madison docker-ce | awk '{ print $3 }'
verstr="$(apt-cache madison docker-ce | awk '{ print $3 }' | sort | grep -Ev ':(28|29)\.'| tail -n 1)"
../../contrib/apt-install -y "docker-ce=$verstr" "docker-ce-cli=$verstr"
# docker checkpoint and restore is an experimental feature
echo '{ "experimental": true }' > /etc/docker/daemon.json
service docker restart
CRIU_LOG='/criu.log'
mkdir -p /etc/criu
echo "log-file=$CRIU_LOG" > /etc/criu/runc.conf
# Test checkpoint/restore with action script
echo "action-script /usr/bin/true" | sudo tee /etc/criu/default.conf
export SKIP_CI_TEST=1
./run-ci-tests.sh
cd ../../
make install
docker info
criu --version
run_container () {
docker run \
--tmpfs /tmp \
--tmpfs /run \
--read-only \
--name cr \
--health-cmd='sleep 1' \
--health-interval=1s \
-d \
alpine \
/bin/sh -c 'i=0; while true; do echo $i; i=$(expr $i + 1); sleep 1; done'
}
wait_running () {
until [ "$(docker inspect -f '{{.State.Running}}' cr)" = "true" ]; do
sleep 1;
done;
}
wait_healthy () {
until [ "$(docker inspect -f '{{.State.Health.Status}}' cr)" = "healthy" ]; do
sleep 1;
done;
}
checkpoint_container () {
CHECKPOINT_NAME=$1
docker checkpoint create cr "$CHECKPOINT_NAME" &&
(docker exec cr true >> /dev/null 2>&1 && exit 1 || exit 0) &&
# wait for container to stop
docker wait cr
}
print_logs () {
cat "$(grep log 'log file:' | sed 's/log file:\s*//')" || true
docker logs cr || true
cat $CRIU_LOG || true
dmesg
docker ps
exit 1
}
declare -i max_restore_container_tries=3
restore_container () {
CHECKPOINT_NAME=$1
for i in $(seq $max_restore_container_tries); do
docker start --checkpoint "$CHECKPOINT_NAME" cr 2>&1 | tee log && break
# FIXME: There is a race condition in docker/containerd that causes
# docker to occasionally fail when starting a container from a
# checkpoint immediately after the checkpoint has been created.
# https://github.com/moby/moby/issues/42900
if grep -Eq '^Error response from daemon: failed to upload checkpoint to containerd: commit failed: content sha256:.*: already exists$' log; then
echo "Retry container restore: $i/$max_restore_container_tries"
sleep 1;
else
print_logs
fi
done
}
# Scenario: Create multiple containers and checkpoint and restore them once
for i in $(seq 10); do
run_container
wait_running
docker ps
checkpoint_container checkpoint
docker ps
restore_container checkpoint
docker ps
docker rm -f cr
done
# Scenario: Create container and checkpoint and restore it multiple times
run_container
wait_running
for i in $(seq 5); do
docker ps
checkpoint_container checkpoint"${i}"
docker ps
restore_container checkpoint"${i}"
# Wait for healthy state before creating another checkpoint
wait_healthy
done