Public Types | |
typedef std::vector < EL::Algorithm * > ::const_iterator | algsIter |
typedef OutputStream * | outputMIter |
typedef const OutputStream * | outputIter |
Public Member Functions | |
void | testInvariant () const |
Job () | |
Job (const Job &that) | |
~Job () | |
Job & | operator= (const Job &that) |
const SH::SampleHandler & | sampleHandler () const |
void | sampleHandler (const SH::SampleHandler &val_sampleHandler) |
algsIter | algsBegin () const |
algsIter | algsEnd () const |
void | algsAdd (Algorithm *alg_swallow) |
void | algsAddClone (const Algorithm &alg) |
add a clone of the given algorithm | |
bool | algsHas (const std::string &name) const |
outputMIter | outputBegin () |
outputIter | outputBegin () const |
outputMIter | outputEnd () |
outputIter | outputEnd () const |
void | outputAdd (const OutputStream &val_output) |
bool | outputHas (const std::string &name) const |
void | useXAOD () |
SH::MetaObject * | options () |
const SH::MetaObject * | options () const |
Static Public Attributes | |
static const std::string | optRemoveSubmitDir = "nc_EventLoop_RemoveSubmitDir" |
static const std::string | optMaxEvents = "nc_EventLoop_MaxEvents" |
static const std::string | optSkipEvents = "nc_EventLoop_SkipEvents" |
static const std::string | optFilesPerWorker = "nc_EventLoop_FilesPerWorker" |
static const std::string | optEventsPerWorker = "nc_EventLoop_EventsPerWorker" |
static const std::string | optSubmitFlags = "nc_EventLoop_SubmitFlags" |
static const std::string | optCondorConf = "nc_EventLoop_CondorConf" |
static const std::string | optCacheSize = "nc_cache_size" |
static const std::string | optCacheLearnEntries = "nc_EventLoop_CacheLearnEntries" |
static const std::string | optD3PDPerfStats = "nc_EventLoop_D3PDPerfStats" |
static const std::string | optD3PDReadStats = "nc_EventLoop_D3PDReadStats" |
static const std::string | optXAODPerfStats = "nc_EventLoop_XAODPerfStats" |
static const std::string | optXAODReadStats = "nc_EventLoop_XAODReadStats" |
static const std::string | optD3PDCacheMinEvent = "nc_EventLoop_D3PDCacheMinEvent" |
static const std::string | optD3PDCacheMinEventFraction = "nc_EventLoop_D3PDCacheMinEventFraction" |
static const std::string | optD3PDCacheMinByte = "nc_EventLoop_D3PDCacheMinByte" |
static const std::string | optD3PDCacheMinByteFraction = "nc_EventLoop_D3PDCacheMinByteFraction" |
static const std::string | optPerfTree = "nc_EventLoop_PerfTree" |
static const std::string | optXaodAccessMode = "nc_EventLoop_XaodAccessMode" |
static const std::string | optXaodAccessMode_branch = "branch" |
static const std::string | optXaodAccessMode_class = "class" |
static const std::string | optXaodAccessMode_athena = "athena" |
static const std::string | optXAODSummaryReport = "nc_xaod_summary_report" |
the option to turn on/off the xAOD summary reporting at the end of the job | |
static const std::string | optPrintPerFileStats = "nc_print_per_file_stats" |
static const std::string | optDisableMetrics = "nc_disable_metrics" |
description: the option to turn off collection of performance data | |
static const std::string | optResetShell = "nc_reset_shell" |
static const std::string | optLocalNoUnsetup = "nc_local_no_unsetup" |
the option not to unsetup the environment in LocalDriver | |
static const std::string | optBackgroundProcess = "nc_background_process" |
the option to do processing in a background process in PROOF | |
static const std::string | optGridDestSE = "nc_destSE" |
static const std::string | optGridSite = "nc_site" |
static const std::string | optGridCloud = "nc_cloud" |
static const std::string | optGridExcludedSite = "nc_excludedSite" |
static const std::string | optGridNGBPerJob = "nc_nGBPerJob" |
static const std::string | optGridMemory = "nc_memory" |
static const std::string | optGridMaxCpuCount = "nc_maxCpuCount" |
static const std::string | optGridNFiles = "nc_nFiles" |
static const std::string | optGridNFilesPerJob = "nc_nFilesPerJob" |
static const std::string | optGridNJobs = "nc_nJobs" |
static const std::string | optGridMaxFileSize = "nc_maxFileSize" |
static const std::string | optGridMaxNFilesPerJob = "nc_maxNFilesPerJob" |
static const std::string | optGridUseChirpServer = "nc_useChirpServer" |
static const std::string | optGridExpress = "nc_express" |
static const std::string | optGridNoSubmit = "nc_noSubmit" |
static const std::string | optGridMergeOutput = "nc_mergeOutput" |
static const std::string | optTmpDir = "nc_tmpDir" |
static const std::string | optRootVer = "nc_rootVer" |
static const std::string | optCmtConfig = "nc_cmtConfig" |
static const std::string | optGridDisableAutoRetry = "nc_disableAutoRetry" |
static const std::string | optOfficial = "nc_official" |
static const std::string | optVoms = "nc_voms" |
static const std::string | optBatchSharedFileSystem = "nc_sharedFileSystem" |
static const std::string | optRetries = SH::MetaNames::openRetries() |
static const std::string | optRetriesWait = SH::MetaNames::openRetriesWait() |
Friends | |
void | swap (Job &a, Job &b) |
typedef std::vector<EL::Algorithm*>::const_iterator EL::Job::algsIter |
description: the list of algorithms used guarantee: no-fail / strong failures: out of memory II invariant: alg != 0
typedef OutputStream* EL::Job::outputMIter |
description: the list of output datasets used guarantee: no-fail / strong failures(3): out of memory II requires(3,soft): !outputHas (val_output.label()) note: while it is not a requirement it is highly recommended that you add the outputs from Algorithm::doSetupJob. That way they get automatically enabled/disabled when you enable/disable the algorithms. Plus your code will be slightly less spread out.
EL::Job::Job | ( | ) |
effects: standard default constructor guarantee: strong failures: low level errors I
EL::Job::Job | ( | const Job & | that | ) |
effects: standard copy constructor guarantee: strong failures: out of memory II
EL::Job::~Job | ( | ) |
effects: standard destructor guarantee: no-fail
void EL::Job::algsAddClone | ( | const Algorithm & | alg | ) |
add a clone of the given algorithm
This is to be used when the user wants to reuse an algorithm for multiple Job objects or for some reason needs to delete it himself.
bool EL::Job::algsHas | ( | const std::string & | name | ) | const |
returns: whether we have the algorithm with the given name guarantee: no-fail
effects: standard assignment operator returns: *this guarantee: strong failures: out of memory II
SH::MetaObject * EL::Job::options | ( | ) |
description: the list of options to the job guarantee: no-fail postcondition: result != 0
bool EL::Job::outputHas | ( | const std::string & | name | ) | const |
returns: whether we have an output with the given name guarantee: no-fail
const SH::SampleHandler & EL::Job::sampleHandler | ( | ) | const |
description: the sample handler used guarantee: no-fail / strong failures: out of memory II
void EL::Job::testInvariant | ( | ) | const |
effects: test the invariant of this object guarantee: no-fail
void EL::Job::useXAOD | ( | ) |
effects: register this job to use XAODs guarantee: strong failures: out of memory II failures: TEventSvc not available
const std::string EL::Job::optBatchSharedFileSystem = "nc_sharedFileSystem" [static] |
description: batch-specific options rationale: these options are for configuring batch drivers
const std::string EL::Job::optCacheLearnEntries = "nc_EventLoop_CacheLearnEntries" [static] |
description: this option allows to configure the number of tree entries used for learning cache behavior. rationale: there is a trade-off here, if you set this too low you will fail to cache more rarely used variables. if you set it too high it will take too long until the cache kicks in. warning: right now this is performed on a per-file basis, which at some point will be raised to a per-job basis, which is the limit of how far caching can go in this approach. if you need multi-job support, either use D3PDReader or contact me on how to do this.
const std::string EL::Job::optCacheSize = "nc_cache_size" [static] |
description: this option allows to configure the TTreeCache size for this job. if it is smaller or equal to 0, the cache is turned off. rationale: if you read data from across the network using the cache mechanism will greatly increase your performance. for local disks you may gain or lose performance.
warning: this has to be synchronized with SampleHandler::MetaFields. I can't just copy it here, because the order of initialization is undefined
const std::string EL::Job::optCondorConf = "nc_EventLoop_CondorConf" [static] |
description: the name of the option for supplying extra parameters for condor systems
const std::string EL::Job::optD3PDCacheMinEvent = "nc_EventLoop_D3PDCacheMinEvent" [static] |
description: these options configure the D3PDReader TTreeCache settings. if you use more than one option, the result is undefined. warning: this only works if you use D3PDReader rationale: the idea is that in your first job you create the D3PDReadStats object, which you then pass to subsequent jobs.
const std::string EL::Job::optD3PDPerfStats = "nc_EventLoop_D3PDPerfStats" [static] |
description: the name of the option for turning on D3PDPerfStats. To use D3PDPerfStats set this to a non-zero value. warning: this only works if you read data through D3PDReader rationale: this can be used for fine-tuning TTreeCache or generally optimizing i/o performance
const std::string EL::Job::optD3PDReadStats = "nc_EventLoop_D3PDReadStats" [static] |
description: the name of the D3PDPerfStats object produced as I gather it, as well as the name of the option passed into the job. rationale: I use the same name in two places to allow reading the stats object from one output and passing it to the next
const std::string EL::Job::optEventsPerWorker = "nc_EventLoop_EventsPerWorker" [static] |
description: the name of the option for selecting the number of events per batch job. (only BatchDriver and derived drivers). warning: this option will be ignored unless you have called SH::scanNEvents first. rationale: this allows to make your jobs all approximately equal size and gives you much finer control of the lengths of your jobs. if you did run SH::scanNEvents, but didn't specify a number of events per worker this will produce the same number of jobs as you would have otherwise, but spread the workload more evenly.
const std::string EL::Job::optFilesPerWorker = "nc_EventLoop_FilesPerWorker" [static] |
description: the name of the option for selecting the number of files per batch job. (only BatchDriver and derived drivers). rationale: this is typically used if you are either processing fairly small input files or are producing a skim with a very high reduction factor. it will reduce the inefficiency associated with starting a lot of jobs or reading a lot of output files
const std::string EL::Job::optGridDestSE = "nc_destSE" [static] |
description: grid-specific options rationale: these are named so as to correspond to prun equivalents, bare the optGrid prefix.
const std::string EL::Job::optMaxEvents = "nc_EventLoop_MaxEvents" [static] |
description: the name of the option used for setting the maximum number of events to process per sample rationale: this is used for test runs where you don't want to process all events, but just want a quick look
const std::string EL::Job::optPerfTree = "nc_EventLoop_PerfTree" [static] |
description: the option to turn on the performance tree in PROOF. if this is set to 1, it will write out the tree rationale: this can help in tuning your PROOF cluster or tuning what you do in PROOF, but it may have some overhead, which is why we don't do it by default
const std::string EL::Job::optPrintPerFileStats = "nc_print_per_file_stats" [static] |
description: the option to turn on printing of i/o statistics at the end of each file rationale: while this is not as complete as whole sample statistics, it can be helpful in some circumstances when debugging the performance warning: this is not supported for all drivers
const std::string EL::Job::optRemoveSubmitDir = "nc_EventLoop_RemoveSubmitDir" [static] |
description: the name of the option for overwriting the submission directory. if you set this to a non-zero value it will remove any existing submit-directory before trying to create a new one. rationale: normally you don't want to silently remove an existing submission directory, since it may contain valuable data, but for some cases like debugging you may be annoyed to delete it manually.
const std::string EL::Job::optResetShell = "nc_reset_shell" [static] |
description: the option to reset the shell on the worker nodes rationale: this is currently only used by the LSFDriver where it is enabled by default to reset it on lxbatch.
const std::string EL::Job::optRetries = SH::MetaNames::openRetries() [static] |
these options are defined in SH::MetaNames
const std::string EL::Job::optSkipEvents = "nc_EventLoop_SkipEvents" [static] |
description: the name of the option used for skipping a certain number of events in the beginning rationale: this is used for test runs where you are only interested in a particular set of events
const std::string EL::Job::optSubmitFlags = "nc_EventLoop_SubmitFlags" [static] |
description: the name of the option for supplying extra submit parameters to batch systems rationale: the primary purpose of this is to allow selecting the right queue on your batch system, but it can be used for all kind of submission options that EventLoop doesn't support directly.
const std::string EL::Job::optXaodAccessMode = "nc_EventLoop_XaodAccessMode" [static] |
description: the option to select the access mode for xAODs. this can be "branch" for branch access, or "class" for access. if this option isn't specified EventLoop will pick whatever is currently recommended.
const std::string EL::Job::optXAODPerfStats = "nc_EventLoop_XAODPerfStats" [static] |
description: the name of the option for turning on XAODPerfStats. To use XAODPerfStats set this to a non-zero value. warning: this only works if you read data through XAODReader rationale: this can be used for fine-tuning TTreeCache or generally optimizing i/o performance
const std::string EL::Job::optXAODReadStats = "nc_EventLoop_XAODReadStats" [static] |
description: the name of the XAODPerfStats object produced as I gather it, as well as the name of the option passed into the job. rationale: I use the same name in two places to allow reading the stats object from one output and passing it to the next