How to suspend jobs with an administative command
Known to work with Condor version: 7.0
By running a command, you can suspend jobs running under Condor across the entire pool or portions of it. The following shell script is an example of how to do this. The necessary configuration changes are documented at the top of the script.
#!/bin/sh
# Author: Dan Bradley <dan@hep.wisc.edu>
# Date: 2007-12-21
#
# Example condor configuration required to make this script work:
# (You need to restart condor to enable runtime config modification.)
#
# SuspendedByAdmin = False
# SETTABLE_ATTRS_ADMINISTRATOR = SuspendedByAdmin
# ENABLE_RUNTIME_CONFIG = True
#
# START = ($(START)) && SuspendedByAdmin =!= True
# WANT_SUSPEND = ($(WANT_SUSPEND)) || SuspendedByAdmin =?= True
# SUSPEND = ($(SUSPEND)) || SuspendedByAdmin =?= True
# CONTINUE = ($(CONTINUE)) && SuspendedByAdmin =!= True
PrintUsage() {
echo "USAGE: $0 OPTIONS"
echo
echo "Suspend/unsuspend jobs on GLOW. This depends on the condor"
echo "configuration doing the right thing when SuspendedByAdmin"
echo "is remotely modified by this script."
echo
echo "OPTIONS:"
echo " --site=X (GLOW site name)"
echo " --constraint=X (arbitrary ClassAd constraint)"
echo " --unsuspend (remove suspension state set previously)"
echo " --dry-run (don't do anything; just show what would have been done)"
echo " --status (show suspension state)"
exit 2
}
OPTS=`getopt -o "h" -l "help,site:,constraint:,unsuspend,dry-run,status" -- "$@"`
if [ $? -ne 0 ]; then PrintUsage; fi
eval set -- "$OPTS"
SITE=
SUSPEND=True
DRY_RUN=
CONSTRAINT=
STATUS=
while [ ! -z "$1" ]
do
case "$1" in
-h) PrintUsage;;
--help) PrintUsage;;
--site) shift; SITE=$1;;
--constraint) shift; CONSTRAINT=$1;;
--unsuspend) SUSPEND=False;;
--dry-run) DRY_RUN="echo dry-run:";;
--status) STATUS=1;;
--) shift; break;;
*) echo "Unexpected option $1"; PrintUsage;;
esac
shift
done
if ! [ -z "$SITE" ]; then
if ! [ -z "$CONSTRAINT" ]; then
CONSTRAINT="$CONSTRAINT && "
fi
CONSTRAINT="${CONSTRAINT}Site =?= \"${SITE}\""
fi
if ! [ -z "$STATUS" ]; then
condor_status -constraint "$CONSTRAINT" -f "%s " Name -f "SuspendedByAdmin=%s" SuspendedByAdmin -f "\n" NewLine
exit 0
fi
if [ -z "$CONSTRAINT" ]; then
echo "You must specify --constraint or --site."
exit 2
fi
if [ "$SUSPEND" = "True" ]; then
action=Suspending
else
action=Unsuspending
fi
echo "$action jobs on machines matching constraint $CONSTRAINT"
condor_status -constraint "$CONSTRAINT" -f "%s\n" Machine | sort | uniq |
while read HOST; do
[ -z "$HOST" ] && continue;
echo $action $HOST
$DRY_RUN condor_config_val -startd -name $HOST -rset SuspendedByAdmin=$SUSPEND
$DRY_RUN condor_reconfig $HOST
done
