Page History

Turn Off History

To add backfill slots to your execute node that is using a single partitionable slot. Use the following config.

This config adds a set of static slots, that will only run Folding@Home via workfetch, and only run jobs when the partitionable slot is not using the resources.

pslot_backfill_folding_at_home.config 
#  Configuration to add Static backfill slots running Folding@Home to a HTCondor execute
#  Node that uses partitionable slots
#
# each backfill slot will have the following resources
#
BackfillCpusPerSlot = 1
BackfillMemoryPerSlot = 1000

# The local user to run FAH jobs as, you must create this user
#
FAH_Owner = backfill

# Folding at home settings, change to match your install path and work directory
# Each slot will use a subdirectory under this path named from the slotid of the slot
# e.g.  /opt/fah/slot2, /opt/fah/slot3, etc.
#
FAH_HOME = /opt/fah
FAH_Executable = /usr/bin/FAHClient

# Arguments to the Folding@Home client for each slot
#
# Checkpoint every N minutes, default is 15
FAH_CheckpointArg = 15
FAH_CpusArg       = $(BackfillCpusPerSlot)
FAH_MemArg        = $(BackfillMemoryPerSlot)*1024*1024

FAH_Arguments = --config $(FAH_HOME)/config.xml \
                --data-directory . \
                --checkpoint $INT(FAH_CheckpointArg) \
                --client-threads $INT(FAH_CpusArg) \
                --memory $INT(FAH_MemArg) \
                --cpus $(FAH_CpusArg)

# the script that HTCondor will use to get FAH jobs, steal this script from HTCondor website
#
# this script extracts slot id from stdin and
# puts it into the environment as _CONDOR_BACKFILL_SLOTID=<SlotId>
# then runs
#    condor_config_val -macro '$(FAH_JOB)'
# to generate a job classad for that slot
#
FAH_HOOK_FETCH_WORK = $(FAH_HOME)/fetch_work

#
# ----- configuration of the backfill slots and workfetch ------
#

#
# Template for the Folding@Home job that the FAH_FETCH_SCRIPT will return
#
FAH_JOB @=end
  Cmd = "$(FAH_Executable)"
  Iwd = "$(FAH_HOME)/slot$(BACKFILL_SLOTID)"
  Owner = "$(FAH_Owner)"
  User = "$(FAH_Owner)@$(UID_DOMAIN)"
  JobUniverse = 5
  Arguments = "$(FAH_Arguments)"
  Out = "fah.out"
  Err = "fah.err"
  NiceUser = true
  Requirements = BackfillSlot && (State == "Unclaimed")
  MaxJobRetirementTime = 0
  JobLeaseDuration = 604800
  RequestCpus = $(BackfillCpusPerSlot)
  RequestMemory = $(BackfillMemoryPerSlot)
@end


# First tell the startd we have double the amount of cpus and memory than we actually have
# and advertise some additional information into the slots (such as the amount
# of Cpus, Memory leftover in the pslot.
#
num_cpus = 2 * $(DETECTED_CPUS)
memory = 2 * $(DETECTED_MEMORY)
startd_attrs = $(startd_attrs) BackfillSlot
startd_slot_attrs = $(startd_slot_attrs) Cpus Memory

# Decrease startd polling internal so backfill jobs are killed quickly when
# production jobs arrive.
#
polling_interval = 2

# Set up a pslot for the production jobs, adding a START requirement to
# prohibit accepting backfill jobs.  If you already have a p-slot configuration
# you only need to add the slot_type_1_start expression below. It prevents
# workfetch jobs from starting on the p-slot.
#
slot_type_1_partitionable = true
slot_type_1 = cpus=$(DETECTED_CPUS) memory=$(DETECTED_MEMORY) gpus=100% disk=50% swap=50%
num_slots_type_1 = 1

# Disable workfetch for the p-slot
slot_type_1_STARTD_JOB_HOOK_KEYWORD =

#
# Set up a set of static slots for backfill jobs.  The number of static slots
# is determined by how many CPUs/Memory required per slot.  Set the START expression
# to only accept backfill jobs, and only if the pslot has enough free resources.
# Preempt backfill slots one at a time as unused resources in the pslot disappear.
# Give this slots a custom name of "backfillX@foo", and a custom attribute of
# BackfillSlot=True.  Disable vacate on these slots, so that jobs are immediately killed
# upon preemption (we want the resources freed up asap for the production jobs).
#
NumBackfillSlots = min( { $(DETECTED_CPUS) / $(BackfillCpusPerSlot), $(DETECTED_MEMORY) / $(BackfillMemoryPerSlot) } )
Backfill_Cpus_Exhausted = (slot1_Cpus / $(BackfillCpusPerSlot) < (SlotID - 1))
Backfill_Memory_Exhausted = (slot1_Memory / $(BackfillMemoryPerSlot) < (SlotID- 1))
Backfill_Resources_Exhausted = ( $(Backfill_Cpus_Exhausted) || $(Backfill_Memory_Exhausted) )
slot_type_2_partitionable = false
slot_type_2_name_prefix = backfill
slot_type_2 = cpus=$(BackfillCpusPerSlot) memory=$(BackfillMemoryPerSlot), gpus=0
num_slots_type_2 = $INT(NumBackfillSlots)
slot_type_2_BackfillSlot = True
slot_type_2_start = (Owner == "$(FAH_Owner)") && ($(Backfill_Resources_Exhausted) == False)
slot_type_2_preempt = $(Backfill_Resources_Exhausted)
slot_type_2_want_vacate = False

# enable work fetch for this slot type
slot_type_2_STARTD_JOB_HOOK_KEYWORD = FAH
slot_type_2_FetchWorkDelay = 120
slot_type_2_Rank =