commit b75c91c98c0427397aa4bc89c707f26d65209cde Author: Guilherme G. Piccoli Date: Fri Mar 31 15:34:26 2023 -0300 Initial version of SteamOS kdump Signed-off-by: Guilherme G. Piccoli diff --git a/PKGBUILD b/PKGBUILD new file mode 100644 index 0000000..19e08da --- /dev/null +++ b/PKGBUILD @@ -0,0 +1,47 @@ +# SPDX-License-Identifier: LGPL-2.1+ +# +# Copyright (c) 2021 Valve. +# Maintainer: Guilherme G. Piccoli + +pkgname=kdump-steamos +pkgver=0.1 +pkgrel=1 +pkgdesc="Kdump scripts to collect vmcore/dmesg in a small dracut-based initramfs" +depends=('dracut' 'kexec-tools' 'systemd' 'zstd') +arch=('x86_64') +license=('GPL2') +install=kdump-steamos.install + +source=('kdump_collect.sh' + 'kdump.etc' + 'kdump_load.sh' + 'kdump-steamos.install' + 'kdump-steamos.service' + 'makedumpfile' + 'module-setup.sh' + 'README.md' + 'submit_report.sh') + +sha256sums=('b008f0afa1ca0eccbb27d5293fc624c6845eb89d1b6a92141b096d9746afb672' + 'c18621fb705decfff724b7498d418002cdf9c30c2c1a00d5379a51fdb4c21a26' + 'feef3082832df97e5a21ee90a94874b7776fceaa4bb9847ae57344db8aab73ef' + '8f2fb837c980975dfd3bb2c7c2dd66b20975f97fdecd2646e06543a869be6136' + '6063ed2283743d8d84a89d9f3c950e5f50adf99bba5ce865a25282081ebc04c2' + '86ef2bd71551598f392fe278507449c1c872e0d42b27600cfeb5bcf9a75aa881' + 'eaff70fd08c2378894bc0c7c340fb41cef6bc488a839d81ea7d0f06f4998e14e' + 'e4da9aa28643aee08f126f0fd62e273924e511daefbc8c2957ba34715b718b95' + '98fd860864cfb59043532dd6b4dfea0e6cf2abbd77da5d9b3200da718126a480') + +package() { + install -D -m0644 kdump.etc "$pkgdir/etc/default/kdump" + + install -D -m0644 kdump-steamos.service "$pkgdir/usr/lib/systemd/system/kdump-steamos.service" + + install -D -m0755 kdump_collect.sh "$pkgdir/usr/lib/dracut/modules.d/55kdump/kdump_collect.sh" + install -D -m0755 module-setup.sh "$pkgdir/usr/lib/dracut/modules.d/55kdump/module-setup.sh" + install -D -m0644 README.md "$pkgdir/usr/lib/dracut/modules.d/55kdump/README" + + install -D -m0755 kdump_load.sh "$pkgdir/usr/lib/kdump/kdump_load.sh" + install -D -m0755 makedumpfile "$pkgdir/usr/lib/kdump/makedumpfile" + install -D -m0755 submit_report.sh "$pkgdir/usr/lib/kdump/submit_report.sh" +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..ac56556 --- /dev/null +++ b/README.md @@ -0,0 +1,70 @@ +# SPDX-License-Identifier: LGPL-2.1+ +# +# Copyright (c) 2021 Valve. +# +# Code by Guilherme G. Piccoli +# +# +# ########################################################################### +# ############################ SteamOS Kdump ############################## +# ########################################################################### +# +# This is the first version of SteamOS Kdump infrastructure. The goal is to +# collect data whenever a kernel crash is detected. There is a lightweight +# collection, that only grabs dmesg, and a more complete setting to grab the +# whole (compressed) vmcore. The tunnings are available at /etc/default/kdump. +# +# After installation and a reboot, things should be all set EXCEPT for GRUB +# config - please check the CAVEATS/INSTRUCTIONS section below. Notice the +# package is under active development, this one being a kind of "Proof Of Concept" +# still - improvements are expected in the near future. Thanks for testing!!! +# +# +# CAVEATS / INSTRUCTIONS +# ########################################################################### +# (a) For now, we don't automatically edit any GRUB config, so the minimum +# necessary action after installing this package is to add "crashkernel=160M" +# to your GRUB config in order subsequent boots pick this setting and do reserve +# the memory, or else kdump cannot work. The memory amount was empirically +# determined - 128M wasn't enough and 144M is unstable, so 160M seems good enough. +# +# (b) It requires (obviously) a RW rootfs - we've used tune2fs in order to make +# it read-write, since it's RO by default. Also, we assume the nvme partition +# scheme is default across all versions and didn't change with new updates +# for example - kdump relies in mounting partitions, etc. +# +# (c) Due to a post-transaction hook executed by libalpm (90-dracut-install.hook), +# unfortunately after installing the kdump-steamos package *all* initramfs images +# are recreated - this is not necessary, we're thinking on how to prevent that, +# but for now be prepared: the installation take some (long) minutes due to that ={ +# +# (d) Unfortunately makedumpfile form Arch Linux is not available on official +# repos, only in AUR - and it doesn't build with zstd, which allows great and +# fast compression. So, we're hereby _packing the binary_ with all the scripts, +# which is a temporary workaround and should be resolved later - already started +# to "lobby" for package inclusion in the official channels and also we're trying +# to to add zstd support: +# https://aur.archlinux.org/packages/makedumpfile/#comment-841333 +# +# +# TODOs (for now - we expect to have more after some testing by the colleagues) +# +# (1) We'd like to be able to automatically edit GRUB and recreate its config +# file - after some future discussion on the proper parameters, this is expected +# to be added to the package. +# +# (2) Hopefully we can fix/prevent the unnecessary re-creation of all initramfs +# images - this happens due to our package installing files on /usr/lib/dracut/modules.d +# which is a trigger for this initramfs recreation. +# +# (3) We have a "fragile" way of determining a mount point required for kdump; +# this is something to improve in order to make the kdump more reliable. +# +# (4) Add a more reliable reboot mechanism - we had seen issues with "reboot -f" +# in the past and relying in sysrq reboot as a quirk managed to be a safe option, +# so this is something to think about here. Should be easy to implement. +# +# (5) Maybe a good idea would be to allow creating the minimum image for any +# specified kernel, not only for the running one (which is what we do now). +# Low-priority idea, easy to implement. +# diff --git a/kdump-steamos.install b/kdump-steamos.install new file mode 100755 index 0000000..215d898 --- /dev/null +++ b/kdump-steamos.install @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: LGPL-2.1+ +# +# Copyright (c) 2021 Valve. +# Maintainer: Guilherme G. Piccoli + +post_install() { + # Create the minimal kdump initramfs for the running kernel + /usr/lib/kdump/kdump_load.sh initrd +} + +pre_remove() { + # Delete all minimal initramfs images created for kdump + /usr/lib/kdump/kdump_load.sh clear +} diff --git a/kdump-steamos.service b/kdump-steamos.service new file mode 100644 index 0000000..6be5828 --- /dev/null +++ b/kdump-steamos.service @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: LGPL-2.1+ +# +# Copyright (c) 2021 Valve. +# + +[Unit] +Description=SteamOS kdump loader boot-time service + +[Service] +Type=oneshot +StandardOutput=journal +ExecStartPre=/usr/lib/kdump/kdump_load.sh +ExecStart=/usr/lib/kdump/submit_report.sh +RemainAfterExit=yes + +[Install] +WantedBy=multi-user.target diff --git a/kdump.etc b/kdump.etc new file mode 100644 index 0000000..2da4794 --- /dev/null +++ b/kdump.etc @@ -0,0 +1,26 @@ +#!/bin/sh +# +# SPDX-License-Identifier: LGPL-2.1+ +# +# Copyright (c) 2021 Valve +# +# Configuration settings for SteamOS kdump. After _any_ change in this +# file, it's required to create the kdump minimal initramfs by running: +# /usr/lib/kdump/kdump_load.sh initrd +# + +# Mount-related options - the DEVNODE must exist and be available during the kdump +# script execution. The KDUMP_FOLDER will be create if doesn't exist. + +MOUNT_DEVNODE="nvme0n1p8" +KDUMP_FOLDER="/.steamos/offload/var/kdump" + +# Dump controlling settings - for now we don't have network/iscsi dumps, only +# local storage dumps. If FULL_COREDUMP is !=0, we collect a full compressed +# vmcore, which might require a lot of disk space. The *_CMD settings refer +# to tunnings on makedumpfile - we rely on zstd compression and maximum page +# exclusion for the full vmcore, mimic'ing Debian kdump. + +FULL_COREDUMP=0 +MAKEDUMPFILE_COREDUMP_CMD="-z -d 31" +MAKEDUMPFILE_DMESG_CMD="--dump-dmesg" diff --git a/kdump_collect.sh b/kdump_collect.sh new file mode 100755 index 0000000..639156d --- /dev/null +++ b/kdump_collect.sh @@ -0,0 +1,45 @@ +#!/bin/sh +# +# SPDX-License-Identifier: LGPL-2.1+ +# +# Copyright (c) 2021 Valve. +# +# Kdump script that should effectively collect the core dump/dmesg from +# within a Dracut-generated initramfs on SteamOS kdump. +# The most fail-prone operations are guarded with conditionals to bail +# in case we indeed fail - worst thing here would be to have a bad condition +# and get stuck in this minimal initramfs with no output for the user. +# + +. /usr/lib/kdump/kdump.etc + +VMCORE="/proc/vmcore" +KDUMP_TIMESTAMP=$(date +"%Y%m%d%H%M") +KDUMP_FOLDER="/kdump_path/${KDUMP_FOLDER}/crash/${KDUMP_TIMESTAMP}" + +# Bail out in case we don't have a vmcore, i.e. either we're not kdumping +# or something is pretty wrong and we wouldn't be able to progress. +# +if [ ! -f $VMCORE ]; then + reboot -f +fi + +mkdir -p "/kdump_path" +if ! mount "/dev/${MOUNT_DEVNODE}" /kdump_path; then + reboot -f +fi + +mkdir -p "${KDUMP_FOLDER}" + +/usr/lib/kdump/makedumpfile ${MAKEDUMPFILE_DMESG_CMD} $VMCORE "${KDUMP_FOLDER}/dmesg.txt" +sync "${KDUMP_FOLDER}/dmesg.txt" + +if [ ${FULL_COREDUMP} -ne 0 ]; then + /usr/lib/kdump/makedumpfile ${MAKEDUMPFILE_COREDUMP_CMD} $VMCORE "${KDUMP_FOLDER}/vmcore.compressed" + sync "${KDUMP_FOLDER}/vmcore.compressed" +fi + +umount "/dev/${MOUNT_DEVNODE}" +sync + +reboot -f diff --git a/kdump_load.sh b/kdump_load.sh new file mode 100755 index 0000000..84cc200 --- /dev/null +++ b/kdump_load.sh @@ -0,0 +1,47 @@ +#!/bin/sh +# +# SPDX-License-Identifier: LGPL-2.1+ +# +# Copyright (c) 2021 Valve. +# +# Script that loads the panic kdump (from within a systemd service) +# or if the proper parameters are passed, either creates the minimal +# kdump initramfs for the running kernel or removes all the previously created. +# ones. Since it runs on boot time, avoid failing here to not risk a boot hang. +# + +if [ ! -f "/etc/default/kdump" ]; then + exit 0 +fi + +. /etc/default/kdump + +# Fragile way for finding the proper mount point for DEVNODE: +DEVN_MOUNTED=$(mount |grep "${MOUNT_DEVNODE}" | head -n1 | cut -f3 -d\ ) +KDUMP_FOLDER="${DEVN_MOUNTED}/${KDUMP_FOLDER}" + +if [ "$1" == "initrd" ]; then + mkdir -p "${KDUMP_FOLDER}" + rm -f "${KDUMP_FOLDER}/kdump-initrd-$(uname -r).img" + + echo "Creating the kdump initramfs for kernel \"$(uname -r)\" ..." + dracut --no-early-microcode --host-only -q -m\ + "bash systemd systemd-initrd systemd-sysusers modsign dbus-daemon kdump dbus udev-rules dracut-systemd base fs-lib shutdown"\ + --kver $(uname -r) "${KDUMP_FOLDER}/kdump-initrd-$(uname -r).img" + + exit 0 +fi + +if [ "$1" == "clear" ]; then + rm -f ${KDUMP_FOLDER}/kdump-initrd-* + exit 0 +fi + +# Stolen from Debian kdump +KDUMP_CMDLINE=$(sed -re 's/(^| )(crashkernel|hugepages|hugepagesz)=[^ ]*//g;s/"/\\\\"/' /proc/cmdline) + +KDUMP_CMDLINE="${KDUMP_CMDLINE} panic=-1 oops=panic fsck.mode=force fsck.repair=yes nr_cpus=1 reset_devices" +VMLINUX="$(grep -o 'BOOT_IMAGE=[^ ]*' /proc/cmdline)" + +kexec -s -p "${VMLINUX#*BOOT_IMAGE=}" --initrd "${KDUMP_FOLDER}/kdump-initrd-$(uname -r).img" --append="${KDUMP_CMDLINE}" || true +exit 0 diff --git a/makedumpfile b/makedumpfile new file mode 100755 index 0000000..50ddd39 Binary files /dev/null and b/makedumpfile differ diff --git a/module-setup.sh b/module-setup.sh new file mode 100755 index 0000000..0e0aa05 --- /dev/null +++ b/module-setup.sh @@ -0,0 +1,37 @@ +#!/bin/sh +# +# SPDX-License-Identifier: LGPL-2.1+ +# +# Copyright (c) 2021 Valve. +# +# SteamOS kdump module construction/inclusion script for +# Dracut-based initramfs. +# + +# Only include kdump if it is explicitly asked in the argument list +check() { + return 255 +} + +installkernel() { + hostonly='' instmods ext4 +} + +install() { + # First clear all unnecessary firmwares/drivers added by drm in order to + # reduce the size of this minimal initramfs being created. This should + # be already done via command-line arguments, but let's play safe and delete + # from here as well just in case. + rm -rf $initdir/usr/lib/firmware/amdgpu/ + rm -rf $initdir/usr/lib/modules/*/kernel/drivers/gpu/drm/amd/* + + # Install necessary binaries + inst date + inst sync + + mkdir -p $initdir/usr/lib/kdump + cp -LR --preserve=all /usr/lib/kdump/* $initdir/usr/lib/kdump/ + cp -LR --preserve=all /etc/default/kdump $initdir/usr/lib/kdump/kdump.etc + + inst_hook pre-mount 01 "$moddir/kdump_collect.sh" +} diff --git a/submit_report.sh b/submit_report.sh new file mode 100755 index 0000000..a308012 --- /dev/null +++ b/submit_report.sh @@ -0,0 +1,13 @@ +#!/bin/sh +# +# SPDX-License-Identifier: LGPL-2.1+ +# +# Copyright (c) 2021 Valve. +# +# This is a dummy script (for now) that aims to submit error/crash +# reports to Valve servers in order to do a post-mortem analysis. +# This is part of SteamOS kdump - it is invoked by systemd on boot time +# and for now it just graciously exits. +# + +exit 0