#!/bin/sh # Auto-restart binaries with changed libraries # Copyright 2010-2014 Gentoo Foundation; Distributed under the GPL v2 # Author: Christian Ruppert VERBOSE=0 REPORT=1 CHECK=0 FIX=0 get_ppids() { local pid=${1:-1} [ $pid -eq 1 ] && return local ppid="$(awk '$1 == "PPid:" { print $2 }' /proc/$pid/status 2>/dev/null || echo 0)" [ $ppid -eq 1 ] && return # "Error" case [ $ppid -eq 0 ] && return echo $ppid get_ppids $ppid } init_restart() { local service=$1 [ ! -f /etc/init.d/$service ] && return 0 # Only restart if it is really running through the init script if /etc/init.d/$service --quiet status 1>/dev/null 2>&1; then echo "Restarting ${service}..." /etc/init.d/$service restart elif [ -f /etc/debian_version ]; then echo "Restarting ${service}..." /etc/init.d/$service restart else local ret=${?:-0} # service crashed if [ ${?:-0} -eq 32 ]; then echo "Restarting crashed service: ${service} ..." /etc/init.d/$service restart elif [ $ret -eq 3 ]; then echo "It seems service \"${service}\" has been started \"by hand\"" echo "would suggest to run \"killall ${service}; /etc/init.d/${service} start\"" fi fi } deleted_files() { local mode=${1:-report} local only_if="${2}" local check_out="" local exit_code=0 # deleted files test, lsof alternative for proc_pid in /proc/[0-9]*; do local pid=$(basename -- "${proc_pid}") # Skip init [ $pid -eq 1 ] && continue # Skip already fixed or gone procs [ ! -d "${proc_pid}" ] && continue local cmd=$(cut -d '' -f 1 $proc_pid/cmdline 2>/dev/null) [ -z "${cmd}" ] && continue local cmdline="$(cut -d '' --output-delimiter=" " -f 2- $proc_pid/cmdline)" # NOTE: "uniq" wouldn't work here local deleted="$(awk '$7 == "(deleted)" { print $6 }' $proc_pid/maps 2>/dev/null |egrep -v '^/SYSV[[:xdigit:]]+' | sort -u)" [ -z "${deleted}" ] && continue # Skip Threads local ppid="$(awk '$1 == "PPid:" { print $2 }' $proc_pid/status 2>/dev/null)" if [ -n "$ppid" -a "$ppid" -gt 1 ]; then local ppid_command=$(cut -d '' -f 1 /proc/$ppid/cmdline 2>/dev/null) [ "${cmd}" = "${ppid_command}" ] && continue fi local ppids="$(get_ppids $pid | sort -r | xargs)" local ppid_first="$(echo $ppids | awk 'BEGIN { FS = " "; } { print $1 }')" [ -z $ppid_first ] && ppid_first=1 local command_base="$(basename -- "${cmd}" | awk '{ print $1 }')" local command_dir="$(dirname -- "${cmd}")" # Ignore /dev/zero, /var/run/, /run/, /SYS and /proc/, /[aio], /var/tmp/mysql [ -z "$(echo $deleted | awk '{ for (i = 1; i <= NF; i++) { if (!match($i, /^\/(dev\/zero|((var\/)?run|SYS|proc)\/|\[aio\]|var\/tmp\/mysql)/ )) print $i; } }')" ] && continue if [ -n "${only_if}" ]; then local skip=1 for del in $deleted; do if [ -n "$(echo \"${del}\" | grep ${only_if})" ]; then skip=0 break fi done [ $skip -eq 1 ] && continue fi # Anything beyond should actually return 1 exit_code=1 if [ "${mode}" = "report" -o "${mode}" = "check" ]; then case $cmd in # Ignore those SCREEN|sshd:*|su|sudo|/bin/login|ssh-agent|irssi ) if [ $VERBOSE -eq 1 ]; then echo "Found a process which is still using deleted/updated files/libs:" echo "PID: ${pid}" [ -n "${ppids}" ] && echo "PPIDs: ${ppids}" echo "Command: ${cmd}" echo "Cmdline: ${cmdline}" echo "${deleted}" echo [ $ppid_first -gt 1 ] && pstree -naup $ppid_first echo fi exit_code=0 continue ;; * ) if [ "${mode}" = "report" ]; then echo "Found a process which is still using deleted/updated files/libs:" echo "PID: ${pid}" [ -n "${ppids}" ] && echo "PPIDs: ${ppids}" echo "Command: ${cmd}" echo "Cmdline: ${cmdline}" echo "${deleted}" echo [ $ppid_first -gt 1 ] && pstree -naup $ppid_first echo fi check_out="${check_out:+${check_out}, }${command_base}(${pid})" continue ;; esac elif [ "${mode}" = "fix" ]; then case $command_dir in /usr/sbin|/sbin|/usr/bin|/usr/local/bin|/usr/local/sbin ) case $command_base in getty|agetty ) pkill '^[a]getty$' continue ;; cron ) init_restart cronie continue ;; rsync ) # Evil rsync hack rsyncd_conf="$(awk 'BEGIN { FS = "\0"; } { for (i = 1; i < NF; i++) { if (match($i, /^--config=(.*\/)?([^\/]+)\.conf$/, m)) print m[2]; } }' ${proc_pid}/cmdline)" if [ -n "${rsyncd_conf}" ] && [ -f "/etc/init.d/${rsyncd_conf}" ]; then init_restart $rsyncd_conf else init_restart rsyncd fi continue ;; apache2 ) # Workaround the restart issues with the apache2 init script if [ -f /etc/gentoo_release ]; then /etc/init.d/apache2 stop; sleep 5; /etc/init.d/apache2 zap start else init_restart apache2 fi continue ;; mysqld ) mysqld_conf="$(awk 'BEGIN { FS = "\0"; } { for (i = 1; i < NF; i++) { if (match($i, /^--defaults-file=(.*\/)?(my-?)?([^\/]+)\.cnf$/, m)) print m[3]; } }' ${proc_pid}/cmdline)" if [ -n "${mysqld_conf}" ] && [ -f "/etc/init.d/mysql-${mysqld_conf}" ]; then init_restart mysql-${mysqld_conf} else init_restart mysql fi continue ;; flow-capture) init_restart flowcapture continue ;; *) init_restart $command_base continue ;; esac ;; * ) case $cmd in tlsmgr|qmgr|/usr/lib/postfix/master|/usr/libexec/postfix/master ) init_restart postfix continue ;; /usr/lib*/postgresql-9.0/bin/postgres) init_restart postgresql-9.0 continue ;; /usr/lib*/postgresql-8.4/bin/postgres) init_restart postgresql-8.4 continue ;; # Workaround for "broken" proc titles with mod_perl/bugzilla /var/www/bugs.gen ) # Workaround the restart issues with the apache2 init script /etc/init.d/apache2 stop; sleep 5; /etc/init.d/apache2 zap start # init_restart apache2 continue ;; /var/www/bugs.gentoo.org/htdocs/jobqueue.pl ) init_restart bugzilla-queue continue ;; *) init_restart $command_base continue ;; esac ;; esac # Try based on the whole cmdline (cmdline+1) as well # This special case is useful when a interpreter is used, like perl # or python. # NOTE: We may need to tweak that more later to use /proc//cmdline[1] only if [ -n "${cmdline}" ]; then case $cmdline in /usr/local/sbin/apache-segfault-watch ) kill $pid /usr/local/sbin/apache-segfault-watch continue ;; esac fi fi done if [ "${mode}" = "check" ]; then if [ -n "${check_out}" ]; then echo "The following processes are using updated/deleted files: ${check_out}" exit_code=1 else echo "No updated/deleted files are used by running processes" exit_code=0 fi fi return $exit_code } while getopts d:vrcfh name; do case $name in d) DELETED=$OPTARG ;; v) VERBOSE=1 ;; r ) REPORT=1 CHECK=0 FIX=0 ;; c ) CHECK=1 REPORT=0 FIX=0 ;; f ) FIX=1 CHECK=0 REPORT=0 ;; ?|h) printf "Usage: %s: [-v] [-r] [-c] [-d DELETED]\n" $0 printf "\t-d DELETED - find/restart services still using the specified deleted file\n" printf "\t-v - verbose during report, will also show otherwise ignored procs\n" printf "\t-r - report only (default)\n" printf "\t-c - check only, output format and exit values are meant to be used for Nagios or Icinga\n" printf "\t-f - try to fix (restart) services\n" exit 2 ;; esac done shift $(($OPTIND - 1)) if [ $REPORT -eq 1 ]; then deleted_files "report" "${DELETED}" elif [ $FIX -eq 1 ]; then deleted_files "fix" "${DELETED}" deleted_files "report" "${DELETED}" elif [ $CHECK -eq 1 ]; then deleted_files "check" "${DELETED}" fi exit $?