From a1f89d341f57119156312cd019171d39878b216b Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Wed, 29 Dec 2021 16:26:41 -0300 Subject: [PATCH] Add pstore-RAM support and fix comments indentation Check README.MD and /etc/default/kdump for instructions on pstore usage - should be simple, it's automatically configured. Notice that we expect all units to have the same e820 memory map, hence to have the RAM buffer available. This point should be better clarified by the team working with firmware. Also, the package now enables the kdump systemd service automatically, in a post-installer hook. Signed-off-by: Guilherme G. Piccoli --- PKGBUILD | 16 ++++++++-------- README.md | 26 ++++++++++++++++--------- kdump-steamos.install | 8 ++++++-- kdump.etc | 23 +++++++++++++++------- kdump_collect.sh | 4 ++-- kdump_load.sh | 41 +++++++++++++++++++++++++++++++++------- module-setup.sh | 12 ++++++------ submit_report.sh | 44 +++++++++++++++++++++++++++++++++++++++++-- 8 files changed, 131 insertions(+), 43 deletions(-) diff --git a/PKGBUILD b/PKGBUILD index 19e08da..37755ea 100644 --- a/PKGBUILD +++ b/PKGBUILD @@ -4,7 +4,7 @@ # Maintainer: Guilherme G. Piccoli pkgname=kdump-steamos -pkgver=0.1 +pkgver=0.2 pkgrel=1 pkgdesc="Kdump scripts to collect vmcore/dmesg in a small dracut-based initramfs" depends=('dracut' 'kexec-tools' 'systemd' 'zstd') @@ -22,15 +22,15 @@ source=('kdump_collect.sh' 'README.md' 'submit_report.sh') -sha256sums=('b008f0afa1ca0eccbb27d5293fc624c6845eb89d1b6a92141b096d9746afb672' - 'c18621fb705decfff724b7498d418002cdf9c30c2c1a00d5379a51fdb4c21a26' - 'feef3082832df97e5a21ee90a94874b7776fceaa4bb9847ae57344db8aab73ef' - '8f2fb837c980975dfd3bb2c7c2dd66b20975f97fdecd2646e06543a869be6136' +sha256sums=('38a3636c95cb97b33a71cfb2b95ccbf7a9a565e86b2128299ea7844d1135fe07' + '38751d1fa1607fc99607423a0051a2b3322db5579906401b40c11c10edd6bbc6' + '888024a0b121102688d0384cf00dca06d55d3c2fc6b18a3de0da1fc8b5c10066' + '06b38bd9f09da5fb22a765b6f1945fc349cc5f9d13cd32c9218b9b60b40a9010' '6063ed2283743d8d84a89d9f3c950e5f50adf99bba5ce865a25282081ebc04c2' '86ef2bd71551598f392fe278507449c1c872e0d42b27600cfeb5bcf9a75aa881' - 'eaff70fd08c2378894bc0c7c340fb41cef6bc488a839d81ea7d0f06f4998e14e' - 'e4da9aa28643aee08f126f0fd62e273924e511daefbc8c2957ba34715b718b95' - '98fd860864cfb59043532dd6b4dfea0e6cf2abbd77da5d9b3200da718126a480') + 'c3ceaf77021e49c3ec884e3959f49b0cbf5e8e89ad3f17d485d895d9e91725f4' + '01432491df80dfd37c6f261c17f55c574e8898003642334a4d61f8d93aef08c3' + '956efe1589d8d6533a231d8bdec6ac5cd4c1d1494b1f44b8494fe1d75f6a1e4e') package() { install -D -m0644 kdump.etc "$pkgdir/etc/default/kdump" diff --git a/README.md b/README.md index 2daf52f..d59df1f 100644 --- a/README.md +++ b/README.md @@ -15,10 +15,13 @@ # collection, that only grabs dmesg, and a more complete setting to grab the # whole (compressed) vmcore. The tunnings are available at /etc/default/kdump. # +# Also, the infrastructure is able to configure and save pstore-RAM logs. +# # After installation and a reboot, things should be all set EXCEPT for GRUB # config - please check the CAVEATS/INSTRUCTIONS section below. Notice the -# package is under active development, this one being a kind of "Proof Of Concept" -# still - improvements are expected in the near future. Thanks for testing!!! +# package is under active development, this version should still be considered +# a kind of "Proof Of Concept" - improvements are expected in the near future. +# Thanks for testing!!! # # # CAVEATS / INSTRUCTIONS @@ -28,6 +31,8 @@ # to your GRUB config in order subsequent boots pick this setting and do reserve # the memory, or else kdump cannot work. The memory amount was empirically # determined - 128M wasn't enough and 144M is unstable, so 160M seems good enough. +# If you prefer to rely on pstore-RAM, no GRUB setting should be required; this +# is currently the default (see /etc/default/kdump). # # (b) It requires (obviously) a RW rootfs - we've used tune2fs in order to make # it read-write, since it's RO by default. Also, we assume the nvme partition @@ -39,13 +44,11 @@ # are recreated - this is not necessary, we're thinking on how to prevent that, # but for now be prepared: the installation take some (long) minutes due to that ={ # -# (d) Unfortunately makedumpfile form Arch Linux is not available on official -# repos, only in AUR - and it doesn't build with zstd, which allows great and -# fast compression. So, we're hereby _packing the binary_ with all the scripts, +# (d) Unfortunately makedumpfile from Arch Linux is not available on official +# repos, only in AUR. So, we're hereby _packing the binary_ with all the scripts, # which is a temporary workaround and should be resolved later - already started -# to "lobby" for package inclusion in the official channels and also we're trying -# to to add zstd support: -# https://aur.archlinux.org/packages/makedumpfile/#comment-841333 +# to "lobby" for package inclusion in the official channels: +# https://aur.archlinux.org/packages/makedumpfile/#comment-843853 # # # TODOs (for now - we expect to have more after some testing by the colleagues) @@ -55,7 +58,7 @@ # to be added to the package. # # (2) Hopefully we can fix/prevent the unnecessary re-creation of all initramfs -# images - this happens due to our package installing files on /usr/lib/dracut/modules.d +# images - it happens due to our pkg installing files on /usr/lib/dracut/modules.d # which is a trigger for this initramfs recreation. # # (3) We have a "fragile" way of determining a mount point required for kdump; @@ -69,4 +72,9 @@ # specified kernel, not only for the running one (which is what we do now). # Low-priority idea, easy to implement. # +# (6) Pstore ramoops backend has some limitations that we're discussing with +# the kernel community - right now we can only collect ONE dmesg and its +# size is truncated on "record_size" bytes, not allowing a file split like +# efi-pstore; hopefully we can improve that. +# ``` diff --git a/kdump-steamos.install b/kdump-steamos.install index 215d898..835ea20 100755 --- a/kdump-steamos.install +++ b/kdump-steamos.install @@ -4,11 +4,15 @@ # Maintainer: Guilherme G. Piccoli post_install() { - # Create the minimal kdump initramfs for the running kernel + # Create the minimal kdump initramfs for the running kernel /usr/lib/kdump/kdump_load.sh initrd + + systemctl enable kdump-steamos.service } pre_remove() { - # Delete all minimal initramfs images created for kdump + systemctl disable kdump-steamos.service + + # Delete all minimal initramfs images created for kdump /usr/lib/kdump/kdump_load.sh clear } diff --git a/kdump.etc b/kdump.etc index 2da4794..79baf60 100644 --- a/kdump.etc +++ b/kdump.etc @@ -9,18 +9,27 @@ # /usr/lib/kdump/kdump_load.sh initrd # -# Mount-related options - the DEVNODE must exist and be available during the kdump -# script execution. The KDUMP_FOLDER will be create if doesn't exist. +# Mount-related options - the DEVNODE must exist and be available during the +# kdump script execution. The KDUMP_FOLDER will be create if doesn't exist. +# The KDUMP_MNT is just a temporary file that will carry the mounted folder +# path across boot-time scripts. MOUNT_DEVNODE="nvme0n1p8" KDUMP_FOLDER="/.steamos/offload/var/kdump" +KDUMP_MNT="/tmp/kdump.mnt" -# Dump controlling settings - for now we don't have network/iscsi dumps, only -# local storage dumps. If FULL_COREDUMP is !=0, we collect a full compressed -# vmcore, which might require a lot of disk space. The *_CMD settings refer -# to tunnings on makedumpfile - we rely on zstd compression and maximum page -# exclusion for the full vmcore, mimic'ing Debian kdump. +# Dump controlling settings - for now we don't have network/iscsi dumps, only +# local storage dumps. If FULL_COREDUMP is !=0, we collect a full compressed +# vmcore, which might require a lot of disk space. The *_CMD settings refer +# to tunnings on makedumpfile - we rely on zstd compression and maximum page +# exclusion for the full vmcore, mimic'ing Debian kdump. FULL_COREDUMP=0 MAKEDUMPFILE_COREDUMP_CMD="-z -d 31" MAKEDUMPFILE_DMESG_CMD="--dump-dmesg" + +# Pstore-RAM setting - if enabled, Kdump won't be loaded, instead the Pstore +# RAM backend will be configured. In order to have success, this operation +# relies in having an available RAM buffer on /proc/iomem with at least 5MiB +# in size. +USE_PSTORE_RAM=1 diff --git a/kdump_collect.sh b/kdump_collect.sh index 639156d..083548c 100755 --- a/kdump_collect.sh +++ b/kdump_collect.sh @@ -17,8 +17,8 @@ VMCORE="/proc/vmcore" KDUMP_TIMESTAMP=$(date +"%Y%m%d%H%M") KDUMP_FOLDER="/kdump_path/${KDUMP_FOLDER}/crash/${KDUMP_TIMESTAMP}" -# Bail out in case we don't have a vmcore, i.e. either we're not kdumping -# or something is pretty wrong and we wouldn't be able to progress. +# Bail out in case we don't have a vmcore, i.e. either we're not kdumping +# or something is pretty wrong and we wouldn't be able to progress. # if [ ! -f $VMCORE ]; then reboot -f diff --git a/kdump_load.sh b/kdump_load.sh index 84cc200..924eda1 100755 --- a/kdump_load.sh +++ b/kdump_load.sh @@ -4,10 +4,11 @@ # # Copyright (c) 2021 Valve. # -# Script that loads the panic kdump (from within a systemd service) -# or if the proper parameters are passed, either creates the minimal -# kdump initramfs for the running kernel or removes all the previously created. -# ones. Since it runs on boot time, avoid failing here to not risk a boot hang. +# Script that loads the panic kdump (from within a systemd service) and/or +# configures the Pstore-RAM mechanism. If the proper parameters are passed +# also, either it creates the minimal kdump initramfs for the running kernel +# or removes all the previously created ones. Since it runs on boot time, +# avoid failing here to not risk a boot hang. # if [ ! -f "/etc/default/kdump" ]; then @@ -16,10 +17,13 @@ fi . /etc/default/kdump -# Fragile way for finding the proper mount point for DEVNODE: +# Fragile way for finding the proper mount point for DEVNODE: DEVN_MOUNTED=$(mount |grep "${MOUNT_DEVNODE}" | head -n1 | cut -f3 -d\ ) KDUMP_FOLDER="${DEVN_MOUNTED}/${KDUMP_FOLDER}" +echo "${KDUMP_FOLDER}" > ${KDUMP_MNT} +sync ${KDUMP_MNT} + if [ "$1" == "initrd" ]; then mkdir -p "${KDUMP_FOLDER}" rm -f "${KDUMP_FOLDER}/kdump-initrd-$(uname -r).img" @@ -37,11 +41,34 @@ if [ "$1" == "clear" ]; then exit 0 fi -# Stolen from Debian kdump +# Pstore-RAM load; if it is configured via /etc/default/kdump and fails +# to configure pstore, we still try to load the kdump. We try to reserve +# here a 5MiB memory region. +# Notice that we assume ramoops is a module here - if built-in, we should +# properly load it through command-line parameters. +if [ ${USE_PSTORE_RAM} -eq 1 ]; then + MEM_REQUIRED=5242880 # 5MiB + RECORD_SIZE=0x200000 # 2MiB + RANGE=$(grep "RAM buffer" /proc/iomem | head -n1 | cut -f1 -d\ ) + + MEM_END=$(echo $RANGE | cut -f2 -d\-) + MEM_START=$(echo $RANGE | cut -f1 -d\-) + MEM_SIZE=$(( 16#${MEM_END} - 16#${MEM_START} )) + + if [ ${MEM_SIZE} -ge ${MEM_REQUIRED} ]; then + if modprobe ramoops mem_address=0x${MEM_START} mem_size=${MEM_REQUIRED} record_size=${RECORD_SIZE}; then + exit 0 + fi + fi + # Fallbacks to kdump load - if we fail when configuring pstore, better try kdump; + # who knows and we may be lucky enough to have some crashkernel reserved memory... + # TODO (maybe): could invert the order and try kdump first, if it fails, try pstore! +fi + +# Stolen from Debian kdump KDUMP_CMDLINE=$(sed -re 's/(^| )(crashkernel|hugepages|hugepagesz)=[^ ]*//g;s/"/\\\\"/' /proc/cmdline) KDUMP_CMDLINE="${KDUMP_CMDLINE} panic=-1 oops=panic fsck.mode=force fsck.repair=yes nr_cpus=1 reset_devices" VMLINUX="$(grep -o 'BOOT_IMAGE=[^ ]*' /proc/cmdline)" kexec -s -p "${VMLINUX#*BOOT_IMAGE=}" --initrd "${KDUMP_FOLDER}/kdump-initrd-$(uname -r).img" --append="${KDUMP_CMDLINE}" || true -exit 0 diff --git a/module-setup.sh b/module-setup.sh index 0e0aa05..a41121f 100755 --- a/module-setup.sh +++ b/module-setup.sh @@ -8,7 +8,7 @@ # Dracut-based initramfs. # -# Only include kdump if it is explicitly asked in the argument list +# Only include kdump if it is explicitly asked in the argument list check() { return 255 } @@ -18,14 +18,14 @@ installkernel() { } install() { - # First clear all unnecessary firmwares/drivers added by drm in order to - # reduce the size of this minimal initramfs being created. This should - # be already done via command-line arguments, but let's play safe and delete - # from here as well just in case. + # First clear all unnecessary firmwares/drivers added by drm in order to + # reduce the size of this minimal initramfs being created. This should + # be already done via command-line arguments, but let's play safe and delete + # from here as well just in case. rm -rf $initdir/usr/lib/firmware/amdgpu/ rm -rf $initdir/usr/lib/modules/*/kernel/drivers/gpu/drm/amd/* - # Install necessary binaries + # Install necessary binaries inst date inst sync diff --git a/submit_report.sh b/submit_report.sh index a308012..b927669 100755 --- a/submit_report.sh +++ b/submit_report.sh @@ -4,10 +4,50 @@ # # Copyright (c) 2021 Valve. # -# This is a dummy script (for now) that aims to submit error/crash +# This is currently a dummy script for kdump, but collects and clears +# pstore saved logs for now. It aims, in the future, to submit error/crash # reports to Valve servers in order to do a post-mortem analysis. # This is part of SteamOS kdump - it is invoked by systemd on boot time -# and for now it just graciously exits. +# and should always exit graciously to avoid breaking the boot services. # +if [ ! -f "/etc/default/kdump" ]; then + exit 0 +fi + +. /etc/default/kdump + +# Yeah, we assume pstore is mounted by default, in this location; +# if not, we get a 0 and don't loop. +PSTORE_CNT=$(find /sys/fs/pstore/* 2>/dev/null | grep -c ramoops) +if [ ${PSTORE_CNT} -eq 0 ]; then + exit 0 +fi + +# We do some validation to be sure KDUMP_MNT pointed path is valid... +KDUMP_CRASH_FOLDER="$(cat ${KDUMP_MNT})" +rm -f ${KDUMP_MNT} + +if [ ! -d "${KDUMP_CRASH_FOLDER}" ]; then + exit 0 +fi + +# If valid, then dump the pstore logs in the crash subfolder. +KDUMP_CRASH_FOLDER="${KDUMP_CRASH_FOLDER}/crash" +mkdir -p ${KDUMP_CRASH_FOLDER} + +PSTORE_TSTAMP=$(date +"%Y%m%d%H%M") +LOOP_CNT=0 +while [ ${PSTORE_CNT} -gt 0 ]; +do + PSTORE_FILE="$(find /sys/fs/pstore/* | grep ramoops | sort | head -n1)" + SAVED_FILE="${KDUMP_CRASH_FOLDER}/dmesg-pstore.${PSTORE_TSTAMP}-${LOOP_CNT}" + + cat ${PSTORE_FILE} > ${SAVED_FILE} + sync ${SAVED_FILE} + rm -f ${PSTORE_FILE} + + PSTORE_CNT=$((${PSTORE_CNT} - 1)) + LOOP_CNT=$((${LOOP_CNT} + 1)) +done exit 0