diff --git a/PKGBUILD b/PKGBUILD index 19e08da..37755ea 100644 --- a/PKGBUILD +++ b/PKGBUILD @@ -4,7 +4,7 @@ # Maintainer: Guilherme G. Piccoli pkgname=kdump-steamos -pkgver=0.1 +pkgver=0.2 pkgrel=1 pkgdesc="Kdump scripts to collect vmcore/dmesg in a small dracut-based initramfs" depends=('dracut' 'kexec-tools' 'systemd' 'zstd') @@ -22,15 +22,15 @@ source=('kdump_collect.sh' 'README.md' 'submit_report.sh') -sha256sums=('b008f0afa1ca0eccbb27d5293fc624c6845eb89d1b6a92141b096d9746afb672' - 'c18621fb705decfff724b7498d418002cdf9c30c2c1a00d5379a51fdb4c21a26' - 'feef3082832df97e5a21ee90a94874b7776fceaa4bb9847ae57344db8aab73ef' - '8f2fb837c980975dfd3bb2c7c2dd66b20975f97fdecd2646e06543a869be6136' +sha256sums=('38a3636c95cb97b33a71cfb2b95ccbf7a9a565e86b2128299ea7844d1135fe07' + '38751d1fa1607fc99607423a0051a2b3322db5579906401b40c11c10edd6bbc6' + '888024a0b121102688d0384cf00dca06d55d3c2fc6b18a3de0da1fc8b5c10066' + '06b38bd9f09da5fb22a765b6f1945fc349cc5f9d13cd32c9218b9b60b40a9010' '6063ed2283743d8d84a89d9f3c950e5f50adf99bba5ce865a25282081ebc04c2' '86ef2bd71551598f392fe278507449c1c872e0d42b27600cfeb5bcf9a75aa881' - 'eaff70fd08c2378894bc0c7c340fb41cef6bc488a839d81ea7d0f06f4998e14e' - 'e4da9aa28643aee08f126f0fd62e273924e511daefbc8c2957ba34715b718b95' - '98fd860864cfb59043532dd6b4dfea0e6cf2abbd77da5d9b3200da718126a480') + 'c3ceaf77021e49c3ec884e3959f49b0cbf5e8e89ad3f17d485d895d9e91725f4' + '01432491df80dfd37c6f261c17f55c574e8898003642334a4d61f8d93aef08c3' + '956efe1589d8d6533a231d8bdec6ac5cd4c1d1494b1f44b8494fe1d75f6a1e4e') package() { install -D -m0644 kdump.etc "$pkgdir/etc/default/kdump" diff --git a/README.md b/README.md index 2daf52f..d59df1f 100644 --- a/README.md +++ b/README.md @@ -15,10 +15,13 @@ # collection, that only grabs dmesg, and a more complete setting to grab the # whole (compressed) vmcore. The tunnings are available at /etc/default/kdump. # +# Also, the infrastructure is able to configure and save pstore-RAM logs. +# # After installation and a reboot, things should be all set EXCEPT for GRUB # config - please check the CAVEATS/INSTRUCTIONS section below. Notice the -# package is under active development, this one being a kind of "Proof Of Concept" -# still - improvements are expected in the near future. Thanks for testing!!! +# package is under active development, this version should still be considered +# a kind of "Proof Of Concept" - improvements are expected in the near future. +# Thanks for testing!!! # # # CAVEATS / INSTRUCTIONS @@ -28,6 +31,8 @@ # to your GRUB config in order subsequent boots pick this setting and do reserve # the memory, or else kdump cannot work. The memory amount was empirically # determined - 128M wasn't enough and 144M is unstable, so 160M seems good enough. +# If you prefer to rely on pstore-RAM, no GRUB setting should be required; this +# is currently the default (see /etc/default/kdump). # # (b) It requires (obviously) a RW rootfs - we've used tune2fs in order to make # it read-write, since it's RO by default. Also, we assume the nvme partition @@ -39,13 +44,11 @@ # are recreated - this is not necessary, we're thinking on how to prevent that, # but for now be prepared: the installation take some (long) minutes due to that ={ # -# (d) Unfortunately makedumpfile form Arch Linux is not available on official -# repos, only in AUR - and it doesn't build with zstd, which allows great and -# fast compression. So, we're hereby _packing the binary_ with all the scripts, +# (d) Unfortunately makedumpfile from Arch Linux is not available on official +# repos, only in AUR. So, we're hereby _packing the binary_ with all the scripts, # which is a temporary workaround and should be resolved later - already started -# to "lobby" for package inclusion in the official channels and also we're trying -# to to add zstd support: -# https://aur.archlinux.org/packages/makedumpfile/#comment-841333 +# to "lobby" for package inclusion in the official channels: +# https://aur.archlinux.org/packages/makedumpfile/#comment-843853 # # # TODOs (for now - we expect to have more after some testing by the colleagues) @@ -55,7 +58,7 @@ # to be added to the package. # # (2) Hopefully we can fix/prevent the unnecessary re-creation of all initramfs -# images - this happens due to our package installing files on /usr/lib/dracut/modules.d +# images - it happens due to our pkg installing files on /usr/lib/dracut/modules.d # which is a trigger for this initramfs recreation. # # (3) We have a "fragile" way of determining a mount point required for kdump; @@ -69,4 +72,9 @@ # specified kernel, not only for the running one (which is what we do now). # Low-priority idea, easy to implement. # +# (6) Pstore ramoops backend has some limitations that we're discussing with +# the kernel community - right now we can only collect ONE dmesg and its +# size is truncated on "record_size" bytes, not allowing a file split like +# efi-pstore; hopefully we can improve that. +# ``` diff --git a/kdump-steamos.install b/kdump-steamos.install index 215d898..835ea20 100755 --- a/kdump-steamos.install +++ b/kdump-steamos.install @@ -4,11 +4,15 @@ # Maintainer: Guilherme G. Piccoli post_install() { - # Create the minimal kdump initramfs for the running kernel + # Create the minimal kdump initramfs for the running kernel /usr/lib/kdump/kdump_load.sh initrd + + systemctl enable kdump-steamos.service } pre_remove() { - # Delete all minimal initramfs images created for kdump + systemctl disable kdump-steamos.service + + # Delete all minimal initramfs images created for kdump /usr/lib/kdump/kdump_load.sh clear } diff --git a/kdump.etc b/kdump.etc index 2da4794..79baf60 100644 --- a/kdump.etc +++ b/kdump.etc @@ -9,18 +9,27 @@ # /usr/lib/kdump/kdump_load.sh initrd # -# Mount-related options - the DEVNODE must exist and be available during the kdump -# script execution. The KDUMP_FOLDER will be create if doesn't exist. +# Mount-related options - the DEVNODE must exist and be available during the +# kdump script execution. The KDUMP_FOLDER will be create if doesn't exist. +# The KDUMP_MNT is just a temporary file that will carry the mounted folder +# path across boot-time scripts. MOUNT_DEVNODE="nvme0n1p8" KDUMP_FOLDER="/.steamos/offload/var/kdump" +KDUMP_MNT="/tmp/kdump.mnt" -# Dump controlling settings - for now we don't have network/iscsi dumps, only -# local storage dumps. If FULL_COREDUMP is !=0, we collect a full compressed -# vmcore, which might require a lot of disk space. The *_CMD settings refer -# to tunnings on makedumpfile - we rely on zstd compression and maximum page -# exclusion for the full vmcore, mimic'ing Debian kdump. +# Dump controlling settings - for now we don't have network/iscsi dumps, only +# local storage dumps. If FULL_COREDUMP is !=0, we collect a full compressed +# vmcore, which might require a lot of disk space. The *_CMD settings refer +# to tunnings on makedumpfile - we rely on zstd compression and maximum page +# exclusion for the full vmcore, mimic'ing Debian kdump. FULL_COREDUMP=0 MAKEDUMPFILE_COREDUMP_CMD="-z -d 31" MAKEDUMPFILE_DMESG_CMD="--dump-dmesg" + +# Pstore-RAM setting - if enabled, Kdump won't be loaded, instead the Pstore +# RAM backend will be configured. In order to have success, this operation +# relies in having an available RAM buffer on /proc/iomem with at least 5MiB +# in size. +USE_PSTORE_RAM=1 diff --git a/kdump_collect.sh b/kdump_collect.sh index 639156d..083548c 100755 --- a/kdump_collect.sh +++ b/kdump_collect.sh @@ -17,8 +17,8 @@ VMCORE="/proc/vmcore" KDUMP_TIMESTAMP=$(date +"%Y%m%d%H%M") KDUMP_FOLDER="/kdump_path/${KDUMP_FOLDER}/crash/${KDUMP_TIMESTAMP}" -# Bail out in case we don't have a vmcore, i.e. either we're not kdumping -# or something is pretty wrong and we wouldn't be able to progress. +# Bail out in case we don't have a vmcore, i.e. either we're not kdumping +# or something is pretty wrong and we wouldn't be able to progress. # if [ ! -f $VMCORE ]; then reboot -f diff --git a/kdump_load.sh b/kdump_load.sh index 84cc200..924eda1 100755 --- a/kdump_load.sh +++ b/kdump_load.sh @@ -4,10 +4,11 @@ # # Copyright (c) 2021 Valve. # -# Script that loads the panic kdump (from within a systemd service) -# or if the proper parameters are passed, either creates the minimal -# kdump initramfs for the running kernel or removes all the previously created. -# ones. Since it runs on boot time, avoid failing here to not risk a boot hang. +# Script that loads the panic kdump (from within a systemd service) and/or +# configures the Pstore-RAM mechanism. If the proper parameters are passed +# also, either it creates the minimal kdump initramfs for the running kernel +# or removes all the previously created ones. Since it runs on boot time, +# avoid failing here to not risk a boot hang. # if [ ! -f "/etc/default/kdump" ]; then @@ -16,10 +17,13 @@ fi . /etc/default/kdump -# Fragile way for finding the proper mount point for DEVNODE: +# Fragile way for finding the proper mount point for DEVNODE: DEVN_MOUNTED=$(mount |grep "${MOUNT_DEVNODE}" | head -n1 | cut -f3 -d\ ) KDUMP_FOLDER="${DEVN_MOUNTED}/${KDUMP_FOLDER}" +echo "${KDUMP_FOLDER}" > ${KDUMP_MNT} +sync ${KDUMP_MNT} + if [ "$1" == "initrd" ]; then mkdir -p "${KDUMP_FOLDER}" rm -f "${KDUMP_FOLDER}/kdump-initrd-$(uname -r).img" @@ -37,11 +41,34 @@ if [ "$1" == "clear" ]; then exit 0 fi -# Stolen from Debian kdump +# Pstore-RAM load; if it is configured via /etc/default/kdump and fails +# to configure pstore, we still try to load the kdump. We try to reserve +# here a 5MiB memory region. +# Notice that we assume ramoops is a module here - if built-in, we should +# properly load it through command-line parameters. +if [ ${USE_PSTORE_RAM} -eq 1 ]; then + MEM_REQUIRED=5242880 # 5MiB + RECORD_SIZE=0x200000 # 2MiB + RANGE=$(grep "RAM buffer" /proc/iomem | head -n1 | cut -f1 -d\ ) + + MEM_END=$(echo $RANGE | cut -f2 -d\-) + MEM_START=$(echo $RANGE | cut -f1 -d\-) + MEM_SIZE=$(( 16#${MEM_END} - 16#${MEM_START} )) + + if [ ${MEM_SIZE} -ge ${MEM_REQUIRED} ]; then + if modprobe ramoops mem_address=0x${MEM_START} mem_size=${MEM_REQUIRED} record_size=${RECORD_SIZE}; then + exit 0 + fi + fi + # Fallbacks to kdump load - if we fail when configuring pstore, better try kdump; + # who knows and we may be lucky enough to have some crashkernel reserved memory... + # TODO (maybe): could invert the order and try kdump first, if it fails, try pstore! +fi + +# Stolen from Debian kdump KDUMP_CMDLINE=$(sed -re 's/(^| )(crashkernel|hugepages|hugepagesz)=[^ ]*//g;s/"/\\\\"/' /proc/cmdline) KDUMP_CMDLINE="${KDUMP_CMDLINE} panic=-1 oops=panic fsck.mode=force fsck.repair=yes nr_cpus=1 reset_devices" VMLINUX="$(grep -o 'BOOT_IMAGE=[^ ]*' /proc/cmdline)" kexec -s -p "${VMLINUX#*BOOT_IMAGE=}" --initrd "${KDUMP_FOLDER}/kdump-initrd-$(uname -r).img" --append="${KDUMP_CMDLINE}" || true -exit 0 diff --git a/module-setup.sh b/module-setup.sh index 0e0aa05..a41121f 100755 --- a/module-setup.sh +++ b/module-setup.sh @@ -8,7 +8,7 @@ # Dracut-based initramfs. # -# Only include kdump if it is explicitly asked in the argument list +# Only include kdump if it is explicitly asked in the argument list check() { return 255 } @@ -18,14 +18,14 @@ installkernel() { } install() { - # First clear all unnecessary firmwares/drivers added by drm in order to - # reduce the size of this minimal initramfs being created. This should - # be already done via command-line arguments, but let's play safe and delete - # from here as well just in case. + # First clear all unnecessary firmwares/drivers added by drm in order to + # reduce the size of this minimal initramfs being created. This should + # be already done via command-line arguments, but let's play safe and delete + # from here as well just in case. rm -rf $initdir/usr/lib/firmware/amdgpu/ rm -rf $initdir/usr/lib/modules/*/kernel/drivers/gpu/drm/amd/* - # Install necessary binaries + # Install necessary binaries inst date inst sync diff --git a/submit_report.sh b/submit_report.sh index a308012..b927669 100755 --- a/submit_report.sh +++ b/submit_report.sh @@ -4,10 +4,50 @@ # # Copyright (c) 2021 Valve. # -# This is a dummy script (for now) that aims to submit error/crash +# This is currently a dummy script for kdump, but collects and clears +# pstore saved logs for now. It aims, in the future, to submit error/crash # reports to Valve servers in order to do a post-mortem analysis. # This is part of SteamOS kdump - it is invoked by systemd on boot time -# and for now it just graciously exits. +# and should always exit graciously to avoid breaking the boot services. # +if [ ! -f "/etc/default/kdump" ]; then + exit 0 +fi + +. /etc/default/kdump + +# Yeah, we assume pstore is mounted by default, in this location; +# if not, we get a 0 and don't loop. +PSTORE_CNT=$(find /sys/fs/pstore/* 2>/dev/null | grep -c ramoops) +if [ ${PSTORE_CNT} -eq 0 ]; then + exit 0 +fi + +# We do some validation to be sure KDUMP_MNT pointed path is valid... +KDUMP_CRASH_FOLDER="$(cat ${KDUMP_MNT})" +rm -f ${KDUMP_MNT} + +if [ ! -d "${KDUMP_CRASH_FOLDER}" ]; then + exit 0 +fi + +# If valid, then dump the pstore logs in the crash subfolder. +KDUMP_CRASH_FOLDER="${KDUMP_CRASH_FOLDER}/crash" +mkdir -p ${KDUMP_CRASH_FOLDER} + +PSTORE_TSTAMP=$(date +"%Y%m%d%H%M") +LOOP_CNT=0 +while [ ${PSTORE_CNT} -gt 0 ]; +do + PSTORE_FILE="$(find /sys/fs/pstore/* | grep ramoops | sort | head -n1)" + SAVED_FILE="${KDUMP_CRASH_FOLDER}/dmesg-pstore.${PSTORE_TSTAMP}-${LOOP_CNT}" + + cat ${PSTORE_FILE} > ${SAVED_FILE} + sync ${SAVED_FILE} + rm -f ${PSTORE_FILE} + + PSTORE_CNT=$((${PSTORE_CNT} - 1)) + LOOP_CNT=$((${LOOP_CNT} + 1)) +done exit 0