Page History
- 2020-Apr-13 10:47 johnkn
- 2020-Apr-13 10:42 johnkn
- 2020-Apr-02 16:44 johnkn
- 2020-Apr-02 16:43 johnkn
- 2020-Apr-02 16:41 johnkn
- 2020-Apr-02 16:25 johnkn
- 2020-Apr-02 16:25 tlmiller
- 2020-Apr-02 16:07 johnkn
- 2020-Apr-02 15:27 johnkn
- 2020-Apr-02 13:26 johnkn
- 2020-Apr-02 13:20 johnkn
- 2020-Apr-02 13:13 johnkn
This config adds a set of static slots, that will only run Folding@Home via workfetch, and only run jobs when the partitionable slot is not using the resources.
pslot_backfill_folding_at_home.config
# Configuration to add Static backfill slots running Folding@Home to a HTCondor execute # Node that uses partitionable slots # # each backfill slot will have the following resources # BackfillCpusPerSlot = 1 BackfillMemoryPerSlot = 1000 # The local user to run FAH jobs as, you must create this user # FAH_Owner = backfill # Folding at home settings, change to match your install path and work directory # Each slot will use a subdirectory under this path named from the slotid of the slot # e.g. /opt/fah/slot2, /opt/fah/slot3, etc. # FAH_HOME = /opt/fah FAH_Executable = /usr/bin/FAHClient # Arguments to the Folding@Home client for each slot # # Checkpoint every N minutes, default is 15 FAH_CheckpointArg = 15 FAH_CpusArg = $(BackfillCpusPerSlot) FAH_MemArg = $(BackfillMemoryPerSlot)*1024*1024 FAH_Arguments = --config $(FAH_HOME)/config.xml \ --data-directory . \ --checkpoint $INT(FAH_CheckpointArg) \ --client-threads $INT(FAH_CpusArg) \ --memory $INT(FAH_MemArg) \ --cpus $(FAH_CpusArg) # the script that HTCondor will use to get FAH jobs, steal this script from HTCondor website # # this script extracts slot id from stdin and # puts it into the environment as _CONDOR_BACKFILL_SLOTID=<SlotId> # then runs # condor_config_val -macro '$(FAH_JOB)' # to generate a job classad for that slot # FAH_HOOK_FETCH_WORK = $(FAH_HOME)/fetch_work # # ----- configuration of the backfill slots and workfetch ------ # # # Template for the Folding@Home job that the FAH_FETCH_SCRIPT will return # FAH_JOB @=end Cmd = "$(FAH_Executable)" Iwd = "$(FAH_HOME)/slot$(BACKFILL_SLOTID)" Owner = "$(FAH_Owner)" User = "$(FAH_Owner)@$(UID_DOMAIN)" JobUniverse = 5 Arguments = "$(FAH_Arguments)" Out = "fah.out" Err = "fah.err" NiceUser = true Requirements = BackfillSlot && (State == "Unclaimed") MaxJobRetirementTime = 0 JobLeaseDuration = 604800 RequestCpus = $(BackfillCpusPerSlot) RequestMemory = $(BackfillMemoryPerSlot) @end # First tell the startd we have double the amount of cpus and memory than we actually have # and advertise some additional information into the slots (such as the amount # of Cpus, Memory leftover in the pslot. # num_cpus = 2 * $(DETECTED_CPUS) memory = 2 * $(DETECTED_MEMORY) startd_attrs = $(startd_attrs) BackfillSlot startd_slot_attrs = $(startd_slot_attrs) Cpus Memory # Decrease startd polling internal so backfill jobs are killed quickly when # production jobs arrive. # polling_interval = 2 # Set up a pslot for the production jobs, adding a START requirement to # prohibit accepting backfill jobs. If you already have a p-slot configuration # you only need to add the slot_type_1_start expression below. It prevents # workfetch jobs from starting on the p-slot. # slot_type_1_partitionable = true slot_type_1 = cpus=$(DETECTED_CPUS) memory=$(DETECTED_MEMORY) gpus=100% disk=50% swap=50% num_slots_type_1 = 1 # Disable workfetch for the p-slot slot_type_1_STARTD_JOB_HOOK_KEYWORD = # # Set up a set of static slots for backfill jobs. The number of static slots # is determined by how many CPUs/Memory required per slot. Set the START expression # to only accept backfill jobs, and only if the pslot has enough free resources. # Preempt backfill slots one at a time as unused resources in the pslot disappear. # Give this slots a custom name of "backfillX@foo", and a custom attribute of # BackfillSlot=True. Disable vacate on these slots, so that jobs are immediately killed # upon preemption (we want the resources freed up asap for the production jobs). # NumBackfillSlots = min( { $(DETECTED_CPUS) / $(BackfillCpusPerSlot), $(DETECTED_MEMORY) / $(BackfillMemoryPerSlot) } ) Backfill_Cpus_Exhausted = (slot1_Cpus / $(BackfillCpusPerSlot) < (SlotID - 1)) Backfill_Memory_Exhausted = (slot1_Memory / $(BackfillMemoryPerSlot) < (SlotID- 1)) Backfill_Resources_Exhausted = ( $(Backfill_Cpus_Exhausted) || $(Backfill_Memory_Exhausted) ) slot_type_2_partitionable = false slot_type_2_name_prefix = backfill slot_type_2 = cpus=$(BackfillCpusPerSlot) memory=$(BackfillMemoryPerSlot), gpus=0 num_slots_type_2 = $INT(NumBackfillSlots) slot_type_2_BackfillSlot = True slot_type_2_start = (Owner == "$(FAH_Owner)") && ($(Backfill_Resources_Exhausted) == False) slot_type_2_preempt = $(Backfill_Resources_Exhausted) slot_type_2_want_vacate = False # enable work fetch for this slot type slot_type_2_STARTD_JOB_HOOK_KEYWORD = FAH slot_type_2_FetchWorkDelay = 120 slot_type_2_Rank =