COLLECTOR_NAME = collector COLLECTOR_HOST = $(CONDOR_HOST):9999 -# No idea where these numbers came from. +# These numbers came from CMS scale test set-up. Not clear if they're right. COLLECTOR_MAX_FILE_DESCRIPTORS = 80000 SHARED_PORT_MAX_FILE_DESCRIPTORS = 80000 @@ -90,7 +90,7 @@ # No UDP over the WAN, please. SCHEDD_SEND_VACATE_VIA_TCP = True -# I have no idea waht this means. +# I have no idea what this means. STARTD_SENDS_ALIVES = True # We probably don't need any of these for the EC2-only submitter. @@ -102,3 +102,68 @@ MAX_DEFAULT_LOG = 100 Mb EC2_GAHP_DEBUG = D_PERF_TRACE D_SUB_SECOND {endverbatim} + +We repeated the following section 256 times. + +=--- $(LOCAL_CONFIG_DIR)/99-collector-tree ---= +{verbatim} +# We picked port 10000 as our base port completely arbitrarily. +COLLECTOR10000 = $(COLLECTOR) +# We didn't set CONDOR_VIEW_HOST in the base configuration because it caused random delays in the root colletor. Not sure why. +COLLECTOR10000_ENVIRONMENT = "_CONDOR_COLLECTOR_LOG=$(LOG)/Collector10000Log _CONDOR_USE_SHARED_PORT=FALSE _CONDOR_CONDOR_VIEW_HOST=127.0.0.1:9999 " +COLLECTOR10000_ARGS = -f -p 10000 +DAEMON_LIST = $(DAEMON_LIST), COLLECTOR10000 +# Useless, but shuts up the master. +COLLECTOR10000_LOG = $(LOG)/10000Log +{endverbatim} + +{section: CCB} + +We dedicated a host to CCB, worried that it would be too much to expect the collector tree to both collect and broker. (That did not appear to be the case.) + +=--- $(LOCAL_CONFIG_DIR)/02-ccb ---= + +{verbatim} +# We're the CCB host. We run a full collector tree, because we're lazy. +DAEMON_LIST = MASTER, COLLECTOR +CONDOR_HOST = $(FULL_HOSTNAME) + +# Shared-nothing. +UID_DOMAIN = $(FULL_HOSTNAME) +FILESYSTEM_DOMAIN = $(FULL_HOSTNAME) + +# Security. +SEC_DEFAULT_AUTHENTICATION = REQUIRED +SEC_DEFAULT_AUTHENTICATION_METHODS = FS, PASSWORD +SEC_ENABLE_MATCH_PASSWORD_AUTHENTICATION = TRUE +SEC_PASSWORD_FILE = $(LOCAL_DIR)/password_file +ALLOW_WRITE = condor_pool@*/* $(FULL_HOSTNAME) $(IP_ADDRESS) 127.0.0.1 + +# Primary collector +COLLECTOR_NAME = collector +COLLECTOR_HOST = $(CONDOR_HOST):9999 + +# These numbers came from CMS scale test set-up. Probably not necessary for a CCB-only machine. +COLLECTOR_MAX_FILE_DESCRIPTORS = 80000 +SHARED_PORT_MAX_FILE_DESCRIPTORS = 80000 + +# Scaling tweaks. Not known if necessary. +COLLECTOR_QUERY_WORKERS = 16 + +# Collector tree. +CONDOR_VIEW_HOST = 127.0.0.1:9999 +{endverbatim} + +We used the full collector-tree set-up for the CCB machine. This certainly didn't help, but probably didn't hurt, and saved me the effort of writing a new config file. + +=--- $(LOCAL_CONFIG_DIR)/99-collector-tree) ---= +{verbatim} +# We picked port 10000 as our base port completely arbitrarily. +COLLECTOR10000 = $(COLLECTOR) +# We didn't set CONDOR_VIEW_HOST in the base configuration because it caused random delays in the root colletor. Not sure why. +COLLECTOR10000_ENVIRONMENT = "_CONDOR_COLLECTOR_LOG=$(LOG)/Collector10000Log _CONDOR_USE_SHARED_PORT=FALSE _CONDOR_CONDOR_VIEW_HOST=127.0.0.1:9999 " +COLLECTOR10000_ARGS = -f -p 10000 +DAEMON_LIST = $(DAEMON_LIST), COLLECTOR10000 +# Useless, but shuts up the master. +COLLECTOR10000_LOG = $(LOG)/10000Log +{endverbatim}