diff --git a/PKGBUILD b/PKGBUILD index 37755ea..4a51e30 100644 --- a/PKGBUILD +++ b/PKGBUILD @@ -4,10 +4,10 @@ # Maintainer: Guilherme G. Piccoli pkgname=kdump-steamos -pkgver=0.2 +pkgver=0.3 pkgrel=1 pkgdesc="Kdump scripts to collect vmcore/dmesg in a small dracut-based initramfs" -depends=('dracut' 'kexec-tools' 'systemd' 'zstd') +depends=('dmidecode' 'dracut' 'kexec-tools' 'makedumpfile' 'systemd' 'zstd') arch=('x86_64') license=('GPL2') install=kdump-steamos.install @@ -17,20 +17,20 @@ source=('kdump_collect.sh' 'kdump_load.sh' 'kdump-steamos.install' 'kdump-steamos.service' - 'makedumpfile' 'module-setup.sh' 'README.md' - 'submit_report.sh') + 'submit_report.sh' + 'submitter_load.sh') -sha256sums=('38a3636c95cb97b33a71cfb2b95ccbf7a9a565e86b2128299ea7844d1135fe07' - '38751d1fa1607fc99607423a0051a2b3322db5579906401b40c11c10edd6bbc6' - '888024a0b121102688d0384cf00dca06d55d3c2fc6b18a3de0da1fc8b5c10066' +sha256sums=('2514f79a496f76af847e262eadd55a5c2f8d95375cc513efa8cadd4cd98fe1d2' + 'd0ac5e7e38fa1d3355eacdf70188483456f53d3e2b18cd161dea3df87b0b8f9c' + '8a556a9ebbda88dfd29b9620a0f2e7dbea19cd5fc019eb5dc4ebf7c80e4bf238' '06b38bd9f09da5fb22a765b6f1945fc349cc5f9d13cd32c9218b9b60b40a9010' - '6063ed2283743d8d84a89d9f3c950e5f50adf99bba5ce865a25282081ebc04c2' - '86ef2bd71551598f392fe278507449c1c872e0d42b27600cfeb5bcf9a75aa881' - 'c3ceaf77021e49c3ec884e3959f49b0cbf5e8e89ad3f17d485d895d9e91725f4' - '01432491df80dfd37c6f261c17f55c574e8898003642334a4d61f8d93aef08c3' - '956efe1589d8d6533a231d8bdec6ac5cd4c1d1494b1f44b8494fe1d75f6a1e4e') + '12a9124b907f208471ba7aaac0f3261cbbd34a168cce3260fa9e7793994beebd' + '26bc2b64af0d468f050c0e0dd9e2053176d56886edad9146bc495797bf2c5810' + 'b87fb8e4c4602f8ddc3b0bf6d6175d0ee7b9e0942f4dca8f1b958ed3ad445470' + 'd8b432dc1602e330e61c91a8f9e6761273ff2ca8129f457828ff0d20ac6d5b25' + 'cbb207ecc0f6bacefbeed41f0d4910daac6500ac2345366e1f95f09a7653c65a') package() { install -D -m0644 kdump.etc "$pkgdir/etc/default/kdump" @@ -42,6 +42,6 @@ package() { install -D -m0644 README.md "$pkgdir/usr/lib/dracut/modules.d/55kdump/README" install -D -m0755 kdump_load.sh "$pkgdir/usr/lib/kdump/kdump_load.sh" - install -D -m0755 makedumpfile "$pkgdir/usr/lib/kdump/makedumpfile" install -D -m0755 submit_report.sh "$pkgdir/usr/lib/kdump/submit_report.sh" + install -D -m0755 submitter_load.sh "$pkgdir/usr/lib/kdump/submitter_load.sh" } diff --git a/README.md b/README.md index d59df1f..c92f9de 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,8 @@ # collection, that only grabs dmesg, and a more complete setting to grab the # whole (compressed) vmcore. The tunnings are available at /etc/default/kdump. # -# Also, the infrastructure is able to configure and save pstore-RAM logs. +# Also, the infrastructure is able to configure and save pstore-RAM logs; +# this is the default option. # # After installation and a reboot, things should be all set EXCEPT for GRUB # config - please check the CAVEATS/INSTRUCTIONS section below. Notice the @@ -27,17 +28,17 @@ # CAVEATS / INSTRUCTIONS # ########################################################################### # (a) For now, we don't automatically edit any GRUB config, so the minimum -# necessary action after installing this package is to add "crashkernel=160M" +# necessary action after installing this package is to add "crashkernel=192M" # to your GRUB config in order subsequent boots pick this setting and do reserve # the memory, or else kdump cannot work. The memory amount was empirically -# determined - 128M wasn't enough and 144M is unstable, so 160M seems good enough. +# determined - 144M wasn't enough and 160M is unstable, so 192M seems good enough. # If you prefer to rely on pstore-RAM, no GRUB setting should be required; this # is currently the default (see /etc/default/kdump). # # (b) It requires (obviously) a RW rootfs - we've used tune2fs in order to make # it read-write, since it's RO by default. Also, we assume the nvme partition # scheme is default across all versions and didn't change with new updates -# for example - kdump relies in mounting partitions, etc. +# for example - both kdump and pstore relies in mounting partitions, etc. # # (c) Due to a post-transaction hook executed by libalpm (90-dracut-install.hook), # unfortunately after installing the kdump-steamos package *all* initramfs images @@ -45,9 +46,9 @@ # but for now be prepared: the installation take some (long) minutes due to that ={ # # (d) Unfortunately makedumpfile from Arch Linux is not available on official -# repos, only in AUR. So, we're hereby _packing the binary_ with all the scripts, -# which is a temporary workaround and should be resolved later - already started -# to "lobby" for package inclusion in the official channels: +# repos, only in AUR. But it is available on Holo, so we make use of that. +# Also, a discussion was started to get it included on official repos: +# https://lists.archlinux.org/pipermail/aur-general/2022-January/036767.html # https://aur.archlinux.org/packages/makedumpfile/#comment-843853 # # @@ -68,13 +69,16 @@ # in the past and relying in sysrq reboot as a quirk managed to be a safe option, # so this is something to think about here. Should be easy to implement. # -# (5) Maybe a good idea would be to allow creating the minimum image for any -# specified kernel, not only for the running one (which is what we do now). -# Low-priority idea, easy to implement. +# (5) The log submission mechanism is incomplete - we save the logs as tar.zst +# files, but they are not submitted to any remote server, etc. # # (6) Pstore ramoops backend has some limitations that we're discussing with # the kernel community - right now we can only collect ONE dmesg and its # size is truncated on "record_size" bytes, not allowing a file split like # efi-pstore; hopefully we can improve that. # +# (7) Maybe a good idea would be to allow creating the minimum image for any +# specified kernel, not only for the running one (which is what we do now). +# Low-priority idea, easy to implement. +# ``` diff --git a/kdump-steamos.service b/kdump-steamos.service index 6be5828..a91f164 100644 --- a/kdump-steamos.service +++ b/kdump-steamos.service @@ -10,7 +10,7 @@ Description=SteamOS kdump loader boot-time service Type=oneshot StandardOutput=journal ExecStartPre=/usr/lib/kdump/kdump_load.sh -ExecStart=/usr/lib/kdump/submit_report.sh +ExecStart=/usr/lib/kdump/submitter_load.sh RemainAfterExit=yes [Install] diff --git a/kdump.etc b/kdump.etc index 79baf60..8b92cc1 100644 --- a/kdump.etc +++ b/kdump.etc @@ -33,3 +33,8 @@ MAKEDUMPFILE_DMESG_CMD="--dump-dmesg" # relies in having an available RAM buffer on /proc/iomem with at least 5MiB # in size. USE_PSTORE_RAM=1 + +# This is a log submission setting, based on Steam config files, and +# *should not* be changed, or else the log sending mechanism will be +# impaired. +LOGINVDF="/home/doorstop/.local/share/Steam/config/loginusers.vdf" diff --git a/kdump_collect.sh b/kdump_collect.sh index 083548c..9f03016 100755 --- a/kdump_collect.sh +++ b/kdump_collect.sh @@ -14,7 +14,7 @@ . /usr/lib/kdump/kdump.etc VMCORE="/proc/vmcore" -KDUMP_TIMESTAMP=$(date +"%Y%m%d%H%M") +KDUMP_TIMESTAMP=$(date -u +"%Y%m%d%H%M") KDUMP_FOLDER="/kdump_path/${KDUMP_FOLDER}/crash/${KDUMP_TIMESTAMP}" # Bail out in case we don't have a vmcore, i.e. either we're not kdumping @@ -31,11 +31,11 @@ fi mkdir -p "${KDUMP_FOLDER}" -/usr/lib/kdump/makedumpfile ${MAKEDUMPFILE_DMESG_CMD} $VMCORE "${KDUMP_FOLDER}/dmesg.txt" +/usr/bin/makedumpfile ${MAKEDUMPFILE_DMESG_CMD} $VMCORE "${KDUMP_FOLDER}/dmesg.txt" sync "${KDUMP_FOLDER}/dmesg.txt" -if [ ${FULL_COREDUMP} -ne 0 ]; then - /usr/lib/kdump/makedumpfile ${MAKEDUMPFILE_COREDUMP_CMD} $VMCORE "${KDUMP_FOLDER}/vmcore.compressed" +if [ "${FULL_COREDUMP}" -ne 0 ]; then + /usr/bin/makedumpfile ${MAKEDUMPFILE_COREDUMP_CMD} $VMCORE "${KDUMP_FOLDER}/vmcore.compressed" sync "${KDUMP_FOLDER}/vmcore.compressed" fi diff --git a/kdump_load.sh b/kdump_load.sh index 924eda1..498d367 100755 --- a/kdump_load.sh +++ b/kdump_load.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # # SPDX-License-Identifier: LGPL-2.1+ # @@ -21,22 +21,22 @@ fi DEVN_MOUNTED=$(mount |grep "${MOUNT_DEVNODE}" | head -n1 | cut -f3 -d\ ) KDUMP_FOLDER="${DEVN_MOUNTED}/${KDUMP_FOLDER}" -echo "${KDUMP_FOLDER}" > ${KDUMP_MNT} -sync ${KDUMP_MNT} +echo "${KDUMP_FOLDER}" > "${KDUMP_MNT}" +sync "${KDUMP_MNT}" -if [ "$1" == "initrd" ]; then +if [ "$1" = "initrd" ]; then mkdir -p "${KDUMP_FOLDER}" rm -f "${KDUMP_FOLDER}/kdump-initrd-$(uname -r).img" echo "Creating the kdump initramfs for kernel \"$(uname -r)\" ..." dracut --no-early-microcode --host-only -q -m\ "bash systemd systemd-initrd systemd-sysusers modsign dbus-daemon kdump dbus udev-rules dracut-systemd base fs-lib shutdown"\ - --kver $(uname -r) "${KDUMP_FOLDER}/kdump-initrd-$(uname -r).img" + --kver "$(uname -r)" "${KDUMP_FOLDER}/kdump-initrd-$(uname -r).img" exit 0 fi -if [ "$1" == "clear" ]; then +if [ "$1" = "clear" ]; then rm -f ${KDUMP_FOLDER}/kdump-initrd-* exit 0 fi @@ -46,29 +46,37 @@ fi # here a 5MiB memory region. # Notice that we assume ramoops is a module here - if built-in, we should # properly load it through command-line parameters. -if [ ${USE_PSTORE_RAM} -eq 1 ]; then +if [ "${USE_PSTORE_RAM}" -eq 1 ]; then MEM_REQUIRED=5242880 # 5MiB RECORD_SIZE=0x200000 # 2MiB RANGE=$(grep "RAM buffer" /proc/iomem | head -n1 | cut -f1 -d\ ) - MEM_END=$(echo $RANGE | cut -f2 -d\-) - MEM_START=$(echo $RANGE | cut -f1 -d\-) + MEM_END=$(echo "$RANGE" | cut -f2 -d\-) + MEM_START=$(echo "$RANGE" | cut -f1 -d\-) MEM_SIZE=$(( 16#${MEM_END} - 16#${MEM_START} )) if [ ${MEM_SIZE} -ge ${MEM_REQUIRED} ]; then if modprobe ramoops mem_address=0x${MEM_START} mem_size=${MEM_REQUIRED} record_size=${RECORD_SIZE}; then exit 0 fi + logger "pstore-RAM load was attempted and failed...will try kdump" fi # Fallbacks to kdump load - if we fail when configuring pstore, better try kdump; # who knows and we may be lucky enough to have some crashkernel reserved memory... # TODO (maybe): could invert the order and try kdump first, if it fails, try pstore! fi +# TODO: insert code here to validate that crashkernel is configured and +# memory is reserved; if not, set it on grub.cfg and recreate the EFI grub +# config file, warning users that in the current boot kdump is not set. + # Stolen from Debian kdump KDUMP_CMDLINE=$(sed -re 's/(^| )(crashkernel|hugepages|hugepagesz)=[^ ]*//g;s/"/\\\\"/' /proc/cmdline) KDUMP_CMDLINE="${KDUMP_CMDLINE} panic=-1 oops=panic fsck.mode=force fsck.repair=yes nr_cpus=1 reset_devices" VMLINUX="$(grep -o 'BOOT_IMAGE=[^ ]*' /proc/cmdline)" -kexec -s -p "${VMLINUX#*BOOT_IMAGE=}" --initrd "${KDUMP_FOLDER}/kdump-initrd-$(uname -r).img" --append="${KDUMP_CMDLINE}" || true +if ! kexec -s -p "${VMLINUX#*BOOT_IMAGE=}" --initrd "${KDUMP_FOLDER}/kdump-initrd-$(uname -r).img" --append="${KDUMP_CMDLINE}"; then + logger "kdump load was attempted and failed" + exit 0 +fi diff --git a/makedumpfile b/makedumpfile deleted file mode 100755 index 50ddd39..0000000 Binary files a/makedumpfile and /dev/null differ diff --git a/module-setup.sh b/module-setup.sh index a41121f..e18d335 100755 --- a/module-setup.sh +++ b/module-setup.sh @@ -28,6 +28,7 @@ install() { # Install necessary binaries inst date inst sync + inst makedumpfile mkdir -p $initdir/usr/lib/kdump cp -LR --preserve=all /usr/lib/kdump/* $initdir/usr/lib/kdump/ diff --git a/submit_report.sh b/submit_report.sh index b927669..7c4ba95 100755 --- a/submit_report.sh +++ b/submit_report.sh @@ -4,50 +4,138 @@ # # Copyright (c) 2021 Valve. # -# This is currently a dummy script for kdump, but collects and clears -# pstore saved logs for now. It aims, in the future, to submit error/crash -# reports to Valve servers in order to do a post-mortem analysis. -# This is part of SteamOS kdump - it is invoked by systemd on boot time -# and should always exit graciously to avoid breaking the boot services. +# This is the SteamOS kdump/pstore log collector and submitter; this script +# prepares the pstore/kdump collected data and submit it to the services that +# handle support at Valve. It considers pstore as a first alternative, if no +# logs found (or if pstore is not mounted for some reason), tries to check +# if kdump logs are present. # +# We do some validation to be sure KDUMP_MNT pointed path is valid... +# That and having a valid /etc/default/kdump are essential conditions. if [ ! -f "/etc/default/kdump" ]; then + logger "/etc/default/kdump not present - aborting..." || true exit 0 fi . /etc/default/kdump -# Yeah, we assume pstore is mounted by default, in this location; +KDUMP_MAIN_FOLDER="$(cat "${KDUMP_MNT}")" +rm -f "${KDUMP_MNT}" + +if [ ! -d "${KDUMP_MAIN_FOLDER}" ]; then + logger "invalid folder: ${KDUMP_MAIN_FOLDER} - aborting..." || true + exit 0 +fi + +LOGS_FOUND=0 +KDUMP_LOGS_FOLDER="${KDUMP_MAIN_FOLDER}/logs" + +# Use UTC timezone to match kdump collection +CURRENT_TSTAMP=$(date -u +"%Y%m%d%H%M") + +# We assume pstore is mounted by default, in this location; # if not, we get a 0 and don't loop. PSTORE_CNT=$(find /sys/fs/pstore/* 2>/dev/null | grep -c ramoops) -if [ ${PSTORE_CNT} -eq 0 ]; then - exit 0 +if [ "${PSTORE_CNT}" -ne 0 ]; then + + # Dump the pstore logs in the <...>/kdump/logs/pstore subfolder. + PSTORE_FOLDER="${KDUMP_LOGS_FOLDER}/pstore" + mkdir -p "${PSTORE_FOLDER}" + + LOOP_CNT=0 + while [ "${PSTORE_CNT}" -gt 0 ]; do + PSTORE_FILE="$(find /sys/fs/pstore/* | grep ramoops | sort | head -n1)" + SAVED_FILE="${PSTORE_FOLDER}/dmesg-pstore.${CURRENT_TSTAMP}-${LOOP_CNT}" + + cat "${PSTORE_FILE}" > "${SAVED_FILE}" + sync "${SAVED_FILE}" + rm -f "${PSTORE_FILE}" + + PSTORE_CNT=$((PSTORE_CNT - 1)) + LOOP_CNT=$((LOOP_CNT + 1)) + done + LOGS_FOUND=${LOOP_CNT} + +# Enter the else block in case we don't have pstore logs - maybe we +# have kdump logs then. +else + KDUMP_CRASH_FOLDER="${KDUMP_MAIN_FOLDER}/crash" + KDUMP_CNT=$(find ${KDUMP_CRASH_FOLDER}/* -type d 2>/dev/null | wc -l) + + if [ "${KDUMP_CNT}" -ne 0 ]; then + # Dump the kdump logs in the <...>/kdump/logs/kdump subfolder. + KD_FOLDER="${KDUMP_LOGS_FOLDER}/kdump" + mkdir -p "${KD_FOLDER}" + + LOOP_CNT=0 + while [ "${KDUMP_CNT}" -gt 0 ]; do + CRASH_CURRENT=$(find ${KDUMP_CRASH_FOLDER}/* -type d 2>/dev/null | head -n1) + CRASH_TSTAMP=$(basename "${CRASH_CURRENT}") + + if [ -s "${CRASH_CURRENT}/dmesg.txt" ]; then + SAVED_FILE="${KD_FOLDER}/dmesg-kdump.${CRASH_TSTAMP}" + mv "${CRASH_CURRENT}/dmesg.txt" "${SAVED_FILE}" + sync "${SAVED_FILE}" + + fi + + # We don't care about submitting a vmcore, but let's save it if such file exists. + if [ -s "${CRASH_CURRENT}/vmcore.compressed" ]; then + SAVED_FILE="${KDUMP_CRASH_FOLDER}/vmcore.${CRASH_TSTAMP}" + mv "${CRASH_CURRENT}/vmcore.compressed" "${SAVED_FILE}" + sync "${SAVED_FILE}" + + fi + + rm -rf "${CRASH_CURRENT}" + KDUMP_CNT=$((KDUMP_CNT - 1)) + LOOP_CNT=$((LOOP_CNT + 1)) + + done + LOGS_FOUND=$((LOGS_FOUND + LOOP_CNT)) + fi + fi -# We do some validation to be sure KDUMP_MNT pointed path is valid... -KDUMP_CRASH_FOLDER="$(cat ${KDUMP_MNT})" -rm -f ${KDUMP_MNT} +# If we have pstore and/or kdump logs, let's process them in order to submit... +if [ ${LOGS_FOUND} -ne 0 ]; then -if [ ! -d "${KDUMP_CRASH_FOLDER}" ]; then - exit 0 + PNAME="$(dmidecode -s system-product-name)" + if [ "${PNAME}" = "Jupiter" ]; then + SN="$(dmidecode -s system-serial-number)" + else + SN=0 + fi + + STEAM_ACCOUNT=0 + if [ -s "${LOGINVDF}" ]; then + # The following awk command was borrowed from: + # https://unix.stackexchange.com/a/663959 + NUMREG=$(grep -c AccountName "${LOGINVDF}") + IDX=1 + while [ ${IDX} -le "${NUMREG}" ]; do + MR=$(awk -v n=${IDX} -v RS='}' 'NR==n{gsub(/.*\{\n|\n$/,""); print}' "${LOGINVDF}" | grep "MostRecent" | cut -f4 -d\") + if [ "$MR" -ne 1 ]; then + IDX=$((IDX + 1)) + continue + fi + + STEAM_ACCOUNT=$(awk -v n=${IDX} -v RS='}' 'NR==n{gsub(/.*\{\n|\n$/,""); print}' "${LOGINVDF}" | grep "AccountName" | cut -f4 -d\") + break + + IDX=$((IDX + 1)) + done + fi + + LOG_FNAME="steamos-${SN}-${STEAM_ACCOUNT}.${CURRENT_TSTAMP}.tar" + LOG_FNAME="${KDUMP_MAIN_FOLDER}/${LOG_FNAME}" + tar cf "${LOG_FNAME}" "${KDUMP_LOGS_FOLDER}" 1>/dev/null 2>&1 + + zstd < "${LOG_FNAME}" > "${LOG_FNAME}.zst" + sync "${LOG_FNAME}.zst" + rm -rf "${KDUMP_LOGS_FOLDER}" "${LOG_FNAME}" + + # TODO: implement a log submission mechanism, in order to send the zstd file + # to Valve servers through an API. fi - -# If valid, then dump the pstore logs in the crash subfolder. -KDUMP_CRASH_FOLDER="${KDUMP_CRASH_FOLDER}/crash" -mkdir -p ${KDUMP_CRASH_FOLDER} - -PSTORE_TSTAMP=$(date +"%Y%m%d%H%M") -LOOP_CNT=0 -while [ ${PSTORE_CNT} -gt 0 ]; -do - PSTORE_FILE="$(find /sys/fs/pstore/* | grep ramoops | sort | head -n1)" - SAVED_FILE="${KDUMP_CRASH_FOLDER}/dmesg-pstore.${PSTORE_TSTAMP}-${LOOP_CNT}" - - cat ${PSTORE_FILE} > ${SAVED_FILE} - sync ${SAVED_FILE} - rm -f ${PSTORE_FILE} - - PSTORE_CNT=$((${PSTORE_CNT} - 1)) - LOOP_CNT=$((${LOOP_CNT} + 1)) -done -exit 0 diff --git a/submitter_load.sh b/submitter_load.sh new file mode 100644 index 0000000..a033328 --- /dev/null +++ b/submitter_load.sh @@ -0,0 +1,12 @@ +#!/bin/sh +# +# SPDX-License-Identifier: LGPL-2.1+ +# +# Copyright (c) 2022 Valve. +# +# This is the systemd loader for the SteamOS kdump log submitter; +# it's invoked by systemd, basically it just loads a detached +# process and exits successfuly, in order to prevent boot hangs. + +/usr/lib/kdump/submit_report.sh & disown +exit 0