nordugrid-arc-nagios-plugins-3.1.1/0000755000175000002070000000000015002373741020133 5ustar mockbuildmock00000000000000nordugrid-arc-nagios-plugins-3.1.1/arcnagios/0000755000175000002070000000000015002373741022101 5ustar mockbuildmock00000000000000nordugrid-arc-nagios-plugins-3.1.1/arcnagios/ce/0000755000175000002070000000000015002373741022470 5ustar mockbuildmock00000000000000nordugrid-arc-nagios-plugins-3.1.1/arcnagios/ce/jobplugins/0000755000175000002070000000000015002373741024644 5ustar mockbuildmock00000000000000nordugrid-arc-nagios-plugins-3.1.1/arcnagios/ce/jobplugins/__init__.py0000644000175000002070000000063115002373741026755 0ustar mockbuildmock00000000000000"""Probe plugins for job submission and monitoring.""" from arcnagios.ce.jobplugins.scripted import ScriptedJobPlugin from arcnagios.ce.jobplugins.staging import StagingJobPlugin _jobplugins_by_name = { 'scripted': ScriptedJobPlugin, 'staging': StagingJobPlugin, } def register_jobplugin(cls): _jobplugins_by_name[cls.name] = cls def load_jobplugin(name): return _jobplugins_by_name[name] nordugrid-arc-nagios-plugins-3.1.1/arcnagios/ce/jobplugins/scripted.py0000644000175000002070000000671515002373741027044 0ustar mockbuildmock00000000000000"""Plugin for adding checks to run on the worker node.""" import os import re from arcnagios.ce.jobplugin import JobPlugin from arcnagios.nagutils import OK, CRITICAL, UNKNOWN _PRESENCE_SCRIPT = """\ missing= for prog in %(required_programs)s; do type -p $prog >/dev/null 2>&1 || missing="$missing $prog" done if test -n "$missing"; then echo "__missing$missing" >%(output_file)s else %(script_line)s err=$? [ $err -eq 0 ] || echo "__exit $err" >>%(output_file)s fi """ _PLAIN_SCRIPT = """\ %(script_line)s err=$? [ $err -eq 0 ] || echo "__exit $err" >>%(output_file)s """ class ScriptedJobPlugin(JobPlugin): _missing_re = re.compile(r'__missing\s+(.*)') _exit_re = re.compile(r'__exit\s+(\d+)') _status_re = re.compile(r'__status\s+(\d+)\s+(.*)') _log_re = re.compile(r'__log\s+(\d+)\s+(.*)') def write_script(self, fh): script_line = self.getconf('script_line') fh.write('# Scripted test %s\n'%self.service_description) output_file = self.getconf('output_file') env = { 'script_line': script_line, 'output_file': output_file } if self.hasconf('required_programs'): env['required_programs'] = self.getconf('required_programs') fh.write(_PRESENCE_SCRIPT % env) else: fh.write(_PLAIN_SCRIPT % env) fh.write('\n') def staged_inputs(self): return self.getconf_strlist('staged_inputs', default = []) def staged_outputs(self): return [(self.getconf('output_file'), None, [])] def check(self, report, jobdir, stored_urls): output_file = os.path.join(jobdir, self.getconf('output_file')) if self.hasconf('output_pattern'): pattern_re = re.compile(self.getconf('output_pattern')) else: pattern_re = None try: with open(output_file, encoding='utf-8') as fh: for line in fh: if pattern_re: mo = re.match(pattern_re, line) if mo: msg = self.getconf('status_ok', vars=mo.groupdict()) report.update_status(OK, msg) fh.close() return mo = re.match(self._missing_re, line) if mo: msg = 'Missing program(s) %s'%mo.group(1) report.update_status(CRITICAL, msg) break mo = re.match(self._exit_re, line) if mo: code = int(mo.group(1)) if code: msg = 'Script exited with code %d.'%code report.update_status(CRITICAL, msg) continue mo = re.match(self._status_re, line) if mo: report.update_status(int(mo.group(1)), mo.group(2)) continue mo = re.match(self._log_re, line) if mo: report.log.log(int(mo.group(1)), mo.group(2)) continue except IOError: report.update_status(UNKNOWN, 'Did not receive output file %s.'%output_file) return if pattern_re: report.update_status(CRITICAL, self.getconf('status_critical', default = 'Pattern not found.')) nordugrid-arc-nagios-plugins-3.1.1/arcnagios/ce/jobplugins/staging.py0000644000175000002070000000624015002373741026654 0ustar mockbuildmock00000000000000"""Plugin for testing that data staging works.""" import os import tempfile from arcnagios import arcutils, nagutils from arcnagios.ce.jobplugin import JobPlugin from arcnagios.utils import log_process_error def _split_url_urloptions(urlspec): if ';' in urlspec: xs = urlspec.split(';') return (xs[0], xs[1:]) else: return (urlspec, []) class StagingJobPlugin(JobPlugin): def staged_outputs(self): urls = self.getconf_strlist('staged_outputs', default = []) urls = list(map(_split_url_urloptions, urls)) return [('%s-out-%d'%(self.test_name, i), url, urloptions) for i, (url, urloptions) in zip(range(0, len(urls)), urls)] def staged_inputs(self): urls = self.getconf_strlist('staged_inputs', default = []) urls = list(map(_split_url_urloptions, urls)) return [('%s-in-%d'%(self.test_name, i), url, urloptions) for i, (url, urloptions) in zip(range(0, len(urls)), urls)] def upload_test_file(self, url): try: fd, path = tempfile.mkstemp(prefix = 'tmp-check_arcce_submit-', text = True) fh = os.fdopen(fd, 'w') fh.write('This file was uploaded for use by ARC-CE ' 'staging probes.\n') fh.close() except OSError as exn: self.log.warning('Failed to create a temporary file ' 'for upload, using /dev/null: %s' % exn) path = '/dev/null' self.log.info('Uploading %s to %s.', path, url) r = self.arcclient.arccp(path, url) if path != '/dev/null': os.unlink(path) if r.is_error(): self.log.warning('Failed to create %s: %s', url, r.error) def upload_missing_test_files(self): urls = self.getconf_strlist('upload_if_missing', default = []) for url in urls: try: if self.arcclient.arcls(url).get() == []: self.upload_test_file(url) except arcutils.CalledProcessError as exn: log_process_error(self.log, exn, prefix = 'arcls') self.upload_test_file(url) def write_script(self, fh): self.upload_missing_test_files() fh.write('# Test "%s" handled by the "staging" job-plugin.\n' % self.test_name) for filename, _, _ in self.staged_outputs(): fh.write('hostname >%s\n'%filename) fh.write('\n') def check(self, report, jobdir, stored_urls): for url in stored_urls: try: filenames = self.arcclient.arcls(url).get() if filenames == []: report.log.error('Could not list %s.', url) report.update_states(nagutils.CRITICAL) elif len(filenames) > 1: report.log.warning('Got multiple entries from %s:', url) for filename in filenames: report.log.warning(' %s', filename) except arcutils.CalledProcessError as exn: log_process_error(report.log, exn) report.update_status(nagutils.CRITICAL) nordugrid-arc-nagios-plugins-3.1.1/arcnagios/ce/__init__.py0000644000175000002070000000000015002373741024567 0ustar mockbuildmock00000000000000nordugrid-arc-nagios-plugins-3.1.1/arcnagios/ce/check_arcce_submit.py0000644000175000002070000003364715002373741026654 0ustar mockbuildmock00000000000000"""check_arcce_submit - Submits test jobs to ARC.""" import asyncio import os import time from arcnagios import arcutils, nagutils from arcnagios.utils import log_process_error from arcnagios.ce.jobutils import JobNagiosPlugin, JobDescription, JobInfo class Check_arcce_submit(JobNagiosPlugin): def __init__(self): JobNagiosPlugin.__init__(self) argp = self.argparser.add_argument_group('Options for Job Submission') argp.add_argument('-H', dest = 'host', required = True, help = 'The host name of the CE to test. This will be used ' 'to connect to the CE unless --ce is given. ' 'This option is required.') argp.add_argument('-S', dest = 'submissioninterface', required = False, help = 'The submission interface, passed on to arcsub.') argp.add_argument('-I', dest = 'infointerface', required = False, help = 'The information interface, passed on to arcsub.') argp.add_argument('-p', dest = 'port', type = int, help = 'An optional port number at which to connect.') argp.add_argument('--prev-status', dest = 'prev_status', type = int, default = 0, metavar = '{0..3}', help = 'The previous Nagios status for this metric.') argp.add_argument('--termination-service', dest = 'termination_service', default = '', help = 'The name (NAGIOS "description") of the passive ' 'service to which to submit the results.') argp.add_argument('--progress-service', metavar = 'SVC', help = 'Publish state timeout alerts to SVC.') argp.add_argument('--submission-service', dest = 'submission_service', default = '', help = 'Report submission-related alerts to this service ' 'instead of raising the alert on the active service.') argp.add_argument('--submission-service-threshold', default = 2, type = int, help = 'Minimum severity before the submission result is ' 'submitted to the passive service specified by ' '--submission-service. This is the numeric status, ' '0 for OK, 1 for WARNING, 2 for ERROR (default), ' 'and 3 for UNKNOWN.') argp.add_argument('--job-submit-timeout', dest = 'job_submit_timeout', type = int, default = 600, help = 'Timeout for job submission.') argp.add_argument('--job-discard-timeout', dest = 'job_discard_timeout', type = int, default = 6*3600, help = 'Timeout before discarding a job.') argp.add_argument('--ce', dest = 'ce', help = 'URL for connecting to the CE, using the same format ' 'as the -C option of arcsub(1).') argp.add_argument('--queue', dest = 'queue', help = 'Target queue name. If unspecified, let ARC choose it.') argp.add_argument('--job-tag', dest = 'job_tag', help = 'A short string suitable in directory names to ' 'distinguish different submission services for the ' 'same hostname.') argp.add_argument('--job-description', dest = 'job_description', help = 'Use this job description instead of generating one. ' 'In this case --stage-input options are ignored and ' 'URLs passed to --stage-output will be deleted when ' 'the job finishes.') argp.add_argument('--keep-failed-jobdata', dest = 'keep_failed_jobdata', action = 'store_true', default = False, help = 'Keep the job descriptions and output directories for ' 'failed jobs. These will not be removed automatically.') argp.add_argument('--test', dest = 'tests', action='append', default=[], metavar = 'TESTNAME', help = 'Add an additional test described in the configuration ' 'file under the section "arcce.TESTNAME"') argp.add_argument('--runtime-environment', dest = 'runtime_environments', action = 'append', default = [], metavar = 'RTE', help = 'Request the given runtime environment.') argp.add_argument('--wall-time-limit', dest = 'wall_time_limit', type = int, default = 600, help = 'Soft limit of execution wall-time.') argp.add_argument('--memory-limit', dest = 'memory_limit', type = int, default = 536870912, help = 'The max. about of memory used by the job in bytes. ' 'Default: 536870912 (512 MiB)') argp.add_argument('--enable-gmlog', dest = 'enable_gmlog', action = 'store_true', default = False, help = 'Request debug information from the CE. This will be ' 'stored in a subdirectory log of the output directory.') argp.add_argument('--arcsub-loglevel', type = str, default = None, help = 'The log level to pass to arcsub (-d).') self._staged_inputs = [] self._staged_outputs = [] def parse_args(self, args): JobNagiosPlugin.parse_args(self, args) def _report_submission(self, status, msg): if status < self.opts.submission_service_threshold \ or not self.opts.submission_service: report = self.nagios_report else: self.nagios_report.update_status(nagutils.OK, 'Reporting to passive service.') report = self.nagios_report_for(self.opts.host, self.opts.submission_service) report.update_status(status, msg) def check(self): # calls asyncio.run """Submit a job to a CE.""" self.require_voms_proxy() workdir = self.workdir_for(self.opts.host, self.opts.job_tag) jobid_file = os.path.join(workdir, self.JOBID_FILENAME) jobinfo = self.load_active_job(self.opts.host, self.opts.job_tag) if not jobinfo is None: t_sub = jobinfo.submission_time job_state = jobinfo.job_state if not job_state.is_final(): s_sub = time.strftime('%FT%T', time.localtime(t_sub)) self.log.info('Last job was submitted %s.', s_sub) t_dis = t_sub + self.opts.job_discard_timeout if int(time.time()) >= t_dis: self.log.warning('Discarding last job due to timeout.') asyncio.run(self.discard_job(jobinfo)) self._report_submission(nagutils.WARNING, 'Re-submitting due to timeout of %s from %s' % (jobinfo.job_id, s_sub)) else: s_dis = time.strftime('%FT%T', time.localtime(t_dis)) self.log.info('Job will be discarded %s.', s_dis) status = self.opts.prev_status or 0 self.log.info('Keeping previous status %d.', status) self._report_submission(status, 'Job not finished.') return else: self.log.debug('Job in terminal state %s.\n', job_state) self._report_submission(nagutils.OK, 'Waiting for monitoring service to fetch the job.') return # Prepare the working directory for a new job. job_output_dir = os.path.join(workdir, self.JOB_OUTPUT_DIRNAME) if not os.path.exists(job_output_dir): try: os.makedirs(job_output_dir) except OSError as exn: msg = 'Failed to create working directory: %s' % exn self.nagios_report.update_status(nagutils.UNKNOWN, msg) return self.log.debug('Submitting new job.') job_script_file = os.path.join(workdir, self.JOB_SCRIPT_FILENAME) # Create job script. with open(job_script_file, 'w', encoding='utf-8') as fh: fh.write('#! /bin/sh\n\n' 'status=0\n' 'echo "Job started `date -Is`."\n') runtime_environments = set(self.opts.runtime_environments) fh.write('\n') for test_name in self.opts.tests: test = self.load_jobtest(test_name, hostname = self.opts.host) test.write_script(fh) def adjust_staged(spec): if isinstance(spec, tuple): filename, spec, urloptions = spec else: if ';' in spec: xs = spec.split(';') spec, urloptions = xs[0], xs[1:] else: urloptions = [] filename = os.path.basename(spec) if spec is None or ':/' in spec: url = spec elif os.path.isabs(spec): url = 'file:' + spec else: url = 'file:' + os.path.join(workdir, spec) return filename, url, urloptions for stagespec in test.staged_inputs(): self._staged_inputs.append(adjust_staged(stagespec)) for stagespec in test.staged_outputs(): self._staged_outputs.append(adjust_staged(stagespec)) runtime_environments.update(test.runtime_environments()) fh.write('echo "Present files before termination:"\n' 'ls -l\n' 'echo "Job finished `date -Is`, status = $status."\n' 'exit $status\n') # Create job description file. if self.opts.job_description: jobdesc_file = self.opts.job_description else: jobdesc_file = os.path.join(workdir, self.JOB_DESCRIPTION_FILENAME) jobdesc = JobDescription( script_path = job_script_file, application_name = 'ARCCE-probe', logdir = self.opts.enable_gmlog and 'log', job_name = self.opts.termination_service, output = 'stdout.txt', error = 'stderr.txt', staged_inputs = self._staged_inputs, staged_outputs = self._staged_outputs, runtime_environments = runtime_environments, wall_time_limit = self.opts.wall_time_limit, memory_limit = self.opts.memory_limit, queue_name = self.opts.queue) self.write_job_description(jobdesc_file, jobdesc) # Submit the job. if self.opts.ce: connection_url = self.opts.ce elif self.config.has_option('arcce.connection_urls', self.opts.host): connection_url = self.config.get('arcce.connection_urls', self.opts.host) elif self.config.has_option('arcce.connection_urls', 'default'): connection_url = \ self.config.get('arcce.connection_urls', 'default') \ % {'ce_host': self.opts.host} else: if self.opts.port: connection_url = self.opts.host + ':' + str(self.opts.port) else: connection_url = self.opts.host arcsub_result = \ self.arcclient.arcsub([jobdesc_file], cluster = connection_url, jobids_to_file = jobid_file, timeout = self.opts.job_submit_timeout, loglevel = self.opts.arcsub_loglevel, submissioninterface = self.opts.submissioninterface, infointerface = self.opts.infointerface) try: with open(jobid_file, encoding='utf-8') as fh: job_id = fh.readline().strip() except FileNotFoundError: job_id = None if arcsub_result.is_error(): self.cleanup_job_files(self.opts.host, self.opts.job_tag, archive = self.opts.keep_failed_jobdata) log_process_error(self.log, arcsub_result.error, prefix = 'arcsub', synopsis = 'failed to submit job') if not job_id: self._report_submission(nagutils.CRITICAL, 'Job submission failed.') return self.log.error('Received a JID despite the error, proceeding.') self._report_submission(nagutils.WARNING, 'Job seems to be submitted but: %s' % arcsub_result.error) elif not job_id: self.cleanup_job_files(self.opts.host, self.opts.job_tag) self.log.error('The job ID was not found in %s.', jobid_file) for line in arcsub_result.get().strip().split('\n'): self.log.error('arcsub: %s', line) self._report_submission(nagutils.CRITICAL, 'Failed to submit job.') return else: self._report_submission(nagutils.OK, 'Job submitted.') t_now = time.time() jobinfo = JobInfo( submission_time = t_now, host = self.opts.host, job_tag = self.opts.job_tag, termination_service = self.opts.termination_service, progress_service = self.opts.progress_service, job_id = job_id, job_state = arcutils.J_NOT_SEEN, job_state_time = int(time.time()), check_time = t_now, stored_urls = [url for _, url, _ in self._staged_outputs if url], tests = self.opts.tests, reputation_choices = self._reputation_tracker.choices()) self.save_active_job(jobinfo, self.opts.host, self.opts.job_tag) nordugrid-arc-nagios-plugins-3.1.1/arcnagios/ce/jobplugin.py0000644000175000002070000001045415002373741025037 0ustar mockbuildmock00000000000000"""Abstract base class and loader for probe plugins for jobs.""" import re from arcnagios import substitution from arcnagios.nagutils import ServiceUnknown from arcnagios.utils import ident, unspecified def boolean(s): s_lc = s.lower() if s_lc in ['0', 'false', 'no', 'off']: return False if s_lc in ['1', 'true', 'yes', 'on']: return True raise ValueError('invalid literal for boolean: %r' % s) _interp_re = re.compile(r'%\(([a-zA-Z0-9_]+)\)') class JobPlugin: """A base-class for tests to run within a job script. Implementations provide commands to run, and how to extract the result. Optionally it may specify staging and cleanup.""" def __init__( self, name, config, config_section, reputation_tracker, log, arcclient, env = None): self.name = name self.config = config self.config_section = config_section self.reputation_tracker = reputation_tracker self.test_name = config_section[6:] # Strip "arcce." self.log = log self.arcclient = arcclient self.environment = env or {} def _import_interpolations(self, var): if not var in self.environment \ and self.config.has_option(self.config_section, var): raw_value = self.config.get(self.config_section, var, raw = True) for mo in re.finditer(_interp_re, raw_value): v = mo.group(1) if not v in self.environment \ and self.config.has_section('variable.' + v): substitution.import_variable( self.config, v, self.reputation_tracker, self.environment) def _update_vars(self, kwargs): if 'vars' in kwargs: kwargs['vars'].update(self.environment) else: kwargs['vars'] = self.environment def hasconf(self, var): return self.config.has_option(self.config_section, var) def getconf(self, var, default = unspecified, typ = ident, **kwargs): if default is not unspecified and not self.hasconf(var): return default self._import_interpolations(var) self._update_vars(kwargs) try: return typ(self.config.get(self.config_section, var, **kwargs)) except ValueError as exn: raise ServiceUnknown( 'Bad value for configuration parameter %s in section %s: %s' % (var, self.config_section, exn)) from exn def getconf_int(self, var, default = unspecified, **kwargs): self.getconf(var, default = default, type = int, **kwargs) def getconf_bool(self, var, default = unspecified, **kwargs): self.getconf(var, default = default, type = boolean, **kwargs) def getconf_float(self, var, default = unspecified, **kwargs): self.getconf(var, default = default, type = float, **kwargs) def getconf_strlist(self, var, default = unspecified, sep = None, **kwargs): if default is not unspecified and not self.hasconf(var): return default self._import_interpolations(var) self._update_vars(kwargs) raw = self.config.get(self.config_section, var, **kwargs) return [s.strip() for s in raw.split(sep)] @property def service_description(self): return self.getconf('service_description', None) def staged_inputs(self): """Override this method to specify files used by the script.""" return [] def staged_outputs(self): """Override this method to specify files produced by the script, which are needed by `extract_result`.""" return [] def runtime_environments(self): rtes = self.getconf('runtime_environments', '') return [x for x in map(str.strip, rtes.split(',')) if x] def write_script(self, fh): """This method may write commands to run in the job script. The commands are run in the standard shell (/bin/sh), and must not call exit or otherwise disrupt the control flow of the script, since other commands are run in the same script.""" def check(self, report, jobdir, stored_urls): """This method is run to check the output of a job.""" raise NotImplementedError('extract_result') def cleanup(self, job_state): pass nordugrid-arc-nagios-plugins-3.1.1/arcnagios/ce/jobutils.py0000644000175000002070000004171215002373741024702 0ustar mockbuildmock00000000000000"""Base class for ARC monitoring and utilities for state tracking.""" import asyncio from functools import cached_property from glob import glob import os import shutil import time from typing import Dict, List, Optional, Set, Tuple import jinja2 from arcnagios import arcclients, nagutils, persistence, vomsutils from arcnagios.arcutils import jobstate_of_str, ArcClient, JobState, ParseError from arcnagios.nagutils import NagiosPerflogTimer from arcnagios.persistence import ObsoletePersistentObject from arcnagios.reputation import ReputationTracker from arcnagios.rescheduler import Rescheduler from arcnagios.utils import host_of_uri from arcnagios.ce.jobplugin import JobPlugin from arcnagios.ce.jobplugins import load_jobplugin class JobDescription: _required_attributes = ["job_name", "application_name", "script_path"] def __init__(self, job_name = None, application_name = None, logdir = None, script_path = None, script_args = None, output = 'stdout.txt', error = 'stderr.txt', wall_time_limit = None, memory_limit = None, staged_inputs = None, staged_outputs = None, runtime_environments = None, queue_name = None, template = 'default.xrsl.j2'): self.template = template self.job_name = job_name or 'ARC Probe' self.application_name = application_name self.logdir = logdir self.script_path = script_path self.script_name = os.path.basename(script_path) self.script_args = script_args or [] self.output = output self.error = error self.wall_time_limit = wall_time_limit self.memory_limit = memory_limit self.staged_inputs = staged_inputs or [] self.staged_outputs = staged_outputs or [] self.runtime_environments = runtime_environments or [] self.queue_name = queue_name def verify(self): for attr in self._required_attributes: if getattr(self, attr) is None: raise nagutils.ServiceUnknown('Missing %s for job description.' % attr) pt_jobstate = persistence.PersistentType(jobstate_of_str) pt_jobstate_opt = persistence.PersistentType(jobstate_of_str, str, False) class JobInfo(persistence.PersistentObject): persistence_version = 1 # TODO: Check if we can use introspection of the below declarations instead. persistent_attributes = { 'submission_time': persistence.pt_float, 'host': persistence.pt_str, 'job_tag': persistence.pt_str_opt, 'progress_service': persistence.pt_str_opt, 'termination_service': persistence.pt_str_opt, 'job_id': persistence.pt_str, 'job_state': pt_jobstate, 'job_specific_state': pt_jobstate_opt, 'job_state_time': persistence.pt_float_opt, 'job_state_alert': persistence.pt_int_opt, 'check_time': persistence.pt_float_opt, 'check_attempts': persistence.pt_int_opt, 'fetch_attempts': persistence.pt_int_opt, 'stored_urls': persistence.pt_str_list, 'tests': persistence.pt_str_list, 'reputation_choices': persistence.pt_json_opt, } submission_time: float host: str job_tag: Optional[str] progress_service: Optional[str] termination_service: Optional[str] job_id: str job_state: JobState job_specific_state: Optional[str] job_state_time: Optional[float] job_state_alert: Optional[int] check_time: Optional[float] check_attempts: Optional[int] fetch_attempts: Optional[int] stored_urls: List[str] tests: List[str] reputation_choices: Optional[Dict[str, str]] @property def host_and_tag(self): if self.job_tag: return '%s#%s' % (self.host, self.job_tag) return self.host def __eq__(self, other): return self.host_and_tag == other.host_and_tag def __lt__(self, other): return self.host_and_tag < other.host_and_tag def key_value(s: str) -> Tuple[str, str]: kv = s.split('=', 1) if len(kv) != 2: raise ValueError('Expecting an argument of the form KEY=VALUE.') return (kv[0], kv[1]) class JobNagiosPlugin(nagutils.NagiosPlugin, vomsutils.NagiosPluginVomsMixin): """Nagios probe to test ARC CEs. The probe has two sub-commands implemented by `check_submit` and `check_monitor`. The former is run on all CEs, while the latter is run to collect submitted jobs.""" # pylint: disable=abstract-method,super-init-not-called probe_name = 'ARCCE' main_config_section = ['arcce'] JOB_DESCRIPTION_FILENAME = 'job.xrsl' JOB_SCRIPT_FILENAME = 'job.sh' JOBID_FILENAME = 'active.jobid' ACTIVE_JOB_FILENAME = 'active.map' JOB_OUTPUT_DIRNAME = 'job_output' _archive_filenames = [ JOBID_FILENAME, JOB_DESCRIPTION_FILENAME, JOB_SCRIPT_FILENAME, ACTIVE_JOB_FILENAME ] prev_status = None # Timeout in seconds for cleaner tasks. cleaner_arcrm_timeout = 5 cleaner_arcclean_timeout = 5 cleaner_arckill_timeout = 5 def _disconnect_reputation_tracker(self): if self._reputation_tracker is not None: self._reputation_tracker.disconnect() def __init__(self, **kwargs): default_template_dirs = glob(os.path.join(self.config_dir(),'*.d')) default_template_dirs.sort() nagutils.NagiosPlugin.__init__(self, **kwargs) self.arcclient = None argp = self.argparser argp.add_argument('--fqan', dest = 'fqan') argp.add_argument('-O', dest = 'jobtest_options', action = 'append', type = key_value, default = [], help = 'Given a value of the form VAR=VALUE, binds VAR to ' 'VALUE in the environment of the job tests.') argp.add_argument('--template-dir', dest = 'template_dirs', action = 'append', default = default_template_dirs, help = 'Add a directory from which job description templates ' 'can be loaded.') argp.add_argument('--granular-perfdata', dest = 'granular_perfdata', default = False, action = 'store_true', help = 'Report ARC command timing performance data per host ' 'using labels of the form ARCCMD[HOST]. By default ' 'report the aggretate time across hosts.') argp.add_argument('--concurrency', type = int, default = 20, help = 'The maximum number of ARC client processes to run ' 'concurrently.') self._reputation_tracker = None self.at_exit(self._disconnect_reputation_tracker) def parse_args(self, args: List[str]) -> None: nagutils.NagiosPlugin.parse_args(self, args) self.opts.template_dirs.reverse() self.arcclient = ArcClient(self.perflog) self._reputation_tracker = ReputationTracker( self.config, os.path.join(self.opts.arcnagios_spooldir, 'reputation.db')) @cached_property def _arcclients_semaphore(self): return asyncio.Semaphore(self.opts.concurrency or 20) def top_vopath(self, suffix: str) -> str: return os.path.join(self.opts.arcnagios_spooldir, self.voms_suffixed('ce') + '-' + suffix) @cached_property def top_workdir(self) -> str: return self.top_vopath('state') def workdir_for(self, host: str, job_tag: Optional[str]): if job_tag: return os.path.join(self.top_workdir, host + '#' + job_tag) else: return os.path.join(self.top_workdir, host) def archivedir_for(self, host: str, job_tag: Optional[str]): return os.path.join(self.top_vopath('archive'), time.strftime('%Y-%m/%Y-%m-%d/%H:%M:%S-') + host + (job_tag and ('#' + job_tag) or '')) @cached_property def template_environment(self) -> jinja2.Environment: return jinja2.Environment( loader=jinja2.FileSystemLoader(self.opts.template_dirs), autoescape=False) def write_job_description(self, out_path: str, jobdesc: JobDescription) \ -> None: jobdesc.verify() try: templ = self.template_environment.get_template(jobdesc.template) except jinja2.TemplateNotFound as exn: raise nagutils.ServiceUnknown('%s. The searched included %s.' % (exn, ', '.join(self.opts.template_dirs))) content = templ.render(jd=jobdesc) with open(out_path, 'w', encoding='utf-8') as fh: fh.write(content) async def _cleaner_arcrm(self, url: str, n_attempts: int) -> bool: se_host = host_of_uri(url) with NagiosPerflogTimer(self.perflog, "arcrm_time", se_host): result = await arcclients.arcrm( url, force = n_attempts > 8, timeout = self.cleaner_arcrm_timeout, log = self.log) if result.is_ok(): self.log.info('Removed test file %s.', url) return True else: self.log.warning('Failed to remove %s.', url) return False async def _cleaner_arcclean(self, job_id: str, n_attempts: int) -> bool: ce_host = host_of_uri(job_id) with NagiosPerflogTimer(self.perflog, "arcrm_time", ce_host): result = await arcclients.arcclean( job_id, force = n_attempts > 8, timeout = self.cleaner_arcclean_timeout, log = self.log) if result.is_ok(): self.log.info('Removed job %s', job_id) return True else: self.log.warning('Failed to clean %s', job_id) return False async def _cleaner_arckill(self, job_id: str, n_attempts: int) -> bool: ce_host = host_of_uri(job_id) with NagiosPerflogTimer(self.perflog, "arckill_time", ce_host): result = await arcclients.arckill( job_id, timeout = self.cleaner_arckill_timeout, log = self.log) if result.is_ok(): return True result = await arcclients.arcclean( job_id, force = n_attempts > 8, timeout = self.cleaner_arcclean_timeout, log = self.log) if result.is_ok(): self.log.info('Killed %s', job_id) return True else: self.log.warning('Failed to kill %s', job_id) return False @cached_property def cleaner(self) -> Rescheduler: cleaner = Rescheduler(self.top_vopath('state.db'), 'cleaner', nagios_report = self.nagios_report) cleaner.register('arcrm', self._cleaner_arcrm) cleaner.register('arcclean', self._cleaner_arcclean) cleaner.register('arckill', self._cleaner_arckill) self.at_exit(cleaner.close) return cleaner def cleanup_job_files(self, host: str, job_tag: Optional[str], archive: bool = False) -> None: self.log.debug('Cleaning up job files for %s.', host) workdir = self.workdir_for(host, job_tag) # archdir = os.path.join(workdir, # time.strftime('archive/%Y-%m-%d/%H:%M:%S')) archdir = self.archivedir_for(host, job_tag) archdir_created = False for filename in self._archive_filenames: try: if archive: if os.path.exists(os.path.join(workdir, filename)): if not archdir_created: os.makedirs(archdir) archdir_created = True os.rename(os.path.join(workdir, filename), os.path.join(archdir, filename)) else: os.unlink(os.path.join(workdir, filename)) except OSError: pass try: job_output_dir = os.path.join(workdir, self.JOB_OUTPUT_DIRNAME) if os.path.exists(job_output_dir) and os.listdir(job_output_dir): if archive: if not archdir_created: os.makedirs(archdir) archdir_created = True os.rename(job_output_dir, os.path.join(archdir, self.JOB_OUTPUT_DIRNAME)) else: last_dir = job_output_dir + '.LAST' shutil.rmtree(last_dir, ignore_errors = True) os.rename(job_output_dir, last_dir) except OSError as exn: self.log.warning('Error clearing %s: %s', job_output_dir, exn) def load_active_job(self, host: str, job_tag: Optional[str]) \ -> Optional[JobInfo]: """Load information about the current job on `host : str` tagged with `job_tag : str`, or `None` if no information is found.""" workdir = self.workdir_for(host, job_tag) ajf = os.path.join(workdir, self.ACTIVE_JOB_FILENAME) if os.path.exists(ajf): self.log.debug('Loading job info from %s.', ajf) jobinfo = JobInfo() try: jobinfo.persistent_load( ajf, log=self.log, persistence_version=jobinfo.persistence_version) return jobinfo except ObsoletePersistentObject: self.log.warning('Ignoring outdated job file %s.') return None except (OSError, ParseError) as exn: self.log.error('Cannot load job file %s: %s', ajf, exn) return None return None def save_active_job(self, jobinfo: JobInfo, host: str, job_tag: Optional[str]) -> None: """Save information about the current job running on `host : str` tagged with `job_tag : str`.""" workdir = self.workdir_for(host, job_tag) ajf = os.path.join(workdir, self.ACTIVE_JOB_FILENAME) self.log.debug('Saving active job info.') jobinfo.persistent_save( ajf, log=self.log, persistence_version=jobinfo.persistence_version) def collect_active_jobids(self) -> Set[str]: jobids = set() for path in glob(os.path.join(self.top_workdir, '*', 'active.jobid')): try: with open(path, encoding='utf-8') as fh: jobids.add(fh.readline().strip()) except IOError as exn: self.log.error('Failed to read %s: %s', path, exn) return jobids async def _discard_stored_urls(self, jobinfo: JobInfo) -> None: for url in jobinfo.stored_urls: if not url.startswith('file:'): await self.cleaner.call('arcrm', url) def cleanup_job_tests(self, jobinfo: JobInfo) -> None: for test_name in jobinfo.tests: try: test = self.load_jobtest(test_name, hostname = jobinfo.host) test.cleanup(jobinfo.job_state) except Exception as exn: # pylint: disable=broad-except self.log.error('Error in cleanup %s for %s: %s', test_name, jobinfo.job_id, exn) async def cleanup_job( self, jobinfo: JobInfo, archive: bool = False ) -> None: """Clean up job state from a fetched job.""" await self._discard_stored_urls(jobinfo) self.cleanup_job_tests(jobinfo) self.cleanup_job_files(jobinfo.host, jobinfo.job_tag, archive = archive) async def discard_job( self, jobinfo: JobInfo, archive: bool = False ) -> None: """Discard the job described by `jobinfo : JobInfo`.""" if jobinfo.job_state.is_final(): await self.cleaner.call('arcclean', jobinfo.job_id) else: await self.cleaner.call('arckill', jobinfo.job_id) await self.cleanup_job(jobinfo, archive = archive) def load_jobtest(self, jobtest_name: str, **env) -> JobPlugin: """Load a plugin-based job-test from the section of the configuration specified by `jobtest_name`. The result is an instance of `JobPlugin` subclass specified by the ``jobplugin`` variable of the given section.""" env['config_dir'] = self.config_dir() if self.voms: env['voms'] = self.voms env.update(self.opts.jobtest_options) jobplugin_section = 'arcce.%s' % jobtest_name if not self.config.has_section(jobplugin_section): raise nagutils.ServiceUnknown( 'Missing configuration section %s for ' 'job-plugin test.' % jobplugin_section) jobplugin_name = self.config.get(jobplugin_section, 'jobplugin') jobplugin_cls = load_jobplugin(jobplugin_name) return jobplugin_cls(jobplugin_name, self.config, jobplugin_section, self._reputation_tracker, self.log, self.arcclient, env) nordugrid-arc-nagios-plugins-3.1.1/arcnagios/ce/check_arcce_clean.py0000644000175000002070000002041615002373741026421 0ustar mockbuildmock00000000000000"""check_arcce_clean - Re-runs failed cleanup tasks.""" import asyncio from datetime import datetime from enum import Enum import os import random import time import sqlite3 from typing import Tuple from arcnagios import arcutils, arcclients from arcnagios.ce.jobutils import JobNagiosPlugin from arcnagios.nagutils import OK, WARNING, CRITICAL, ServiceReport, ServiceOk from arcnagios.utils import counted_adjectives, log_process_error, \ Result, ResultOk, ResultError # FIXME: This is a workaround for dealing with an overfull jobs file, in which # case arcstat spends a bit over 60 seconds querying CEs, independent of the # timeout, while we really only need to know the list of orphaned job IDs. We # could keep an independent list of job IDs or CEs (to limit the arcstat query) # or a future version of arcstat could provide an option to disable the probing. def read_jobs_file(): db_path = os.path.join(os.getenv("HOME"), ".arc/jobs.dat") with sqlite3.connect(db_path) as db: for row in db.execute("SELECT id, localsubmissiontime, state FROM jobs"): submitted = \ datetime.fromtimestamp(int(row[1])).strftime('%Y-%m-%d %H:%M:%S') state = arcutils.jobstate_of_str(row[2]) stat = arcutils.Arcstat( state=state, specific_state="", submitted=submitted, job_error=None, exit_code=None) yield (row[0], stat) PruneResult = Enum( "PruneResult", [("SKIPPED", 1), ("PRUNED", 2), ("FAILED", 3), ("TIMEDOUT", 4)]) class Check_arcce_clean(JobNagiosPlugin): def __init__(self): JobNagiosPlugin.__init__(self) argp = self.argparser.add_argument_group('Job Cleaner Options') argp.add_argument('--timeout', dest = 'timeout', type = int, default = 120, help = 'Overall timeout for probe, but currently does not limit ' 'scheduled cleanup.') argp.add_argument('--max-age', dest = 'max_age', type = int, default = 604800, help = 'Max age before jobs info is cleaned.') argp.add_argument('--arcclean-timeout-min', type=int, default=20, help = 'Minimum timeout to pass to arcclean when approaching ' 'the deadline before postponing the task.') argp.add_argument('--arcclean-timeout-max', type=int, default=20, help = 'Maximum timeout to pass to arcclean when enough time is ' 'available.') argp.add_argument('--arcstat-timeout', dest = 'arcstat_timeout', type = int, default = 5, metavar = 'T', help = 'Passed to arcstat --timeout.') argp.add_argument('--bypass-arcstat', action = 'store_true', default = False, help = 'Read jobs directly from ~/.arc/jobs.dat instead of ' 'invoking arcstat. This is a workaround to avoid ' 'timeout if the job file has become too big, esp. when ' 'there are many unavailable CEs.') argp.add_argument('-w', dest = 'warning_load', type = float, default = 10, help = 'Ratio of remaining work to processed work above which \ to issue a warning alert.') argp.add_argument('-c', dest = 'critical_load', type = float, default = 20, help = 'Ratio of remaining work to processed work above which \ to issue a critical alert.') self._t_start = time.time() def time_left(self) -> float: return self.opts.timeout - time.time() + self._t_start async def _prune_job(self, active_jobids, jobid, jobstat) -> PruneResult: if jobstat.submitted: tm_sub = time.strptime(jobstat.submitted, '%Y-%m-%d %H:%M:%S') t_sub = time.mktime(tm_sub) if self._t_start - t_sub <= self.opts.max_age: self.log.debug('Skipping too recent job %s.', jobid) return PruneResult.SKIPPED elif jobstat.state == arcutils.J_UNDEFINED and jobid in active_jobids: self.log.info('Skipping unavailable but active %s.', jobid) return PruneResult.SKIPPED async with self._arcclients_semaphore: t_left = self.time_left() if t_left < self.opts.arcclean_timeout_min: return PruneResult.TIMEDOUT arcclean_result = \ await arcclients.arcclean( jobid, force=True, timeout=min(t_left, self.opts.arcclean_timeout_max), log=self.log) if isinstance(arcclean_result, ResultError): synopsis = 'Failed to clean job %s (state %s, submitted %sZ).' \ % (jobid, jobstat.state, jobstat.submitted) log_process_error(self.log, arcclean_result.error, synopsis=synopsis, prefix='arcclean') return PruneResult.FAILED self.log.info('Cleaned job %s (state %s, submitted %sZ).', jobid, jobstat.state, jobstat.submitted) return PruneResult.PRUNED async def prune_jobs(self) -> Result[Tuple[int, int, int], Exception]: active_jobids = self.collect_active_jobids() t_left = self.time_left() if t_left < 1: self.log.warning('Timeout before querying jobs to prune.') return ResultError(RuntimeError('Timeout')) if self.opts.bypass_arcstat: jobstats = list(read_jobs_file()) else: jobstats = list( arcutils.arcstat( log=self.log, timeout=min(t_left, self.opts.arcstat_timeout), show_unavailable=True).items()) random.shuffle(jobstats) tasks = [ asyncio.create_task(self._prune_job(active_jobids, jobid, jobstat)) for jobid, jobstat in jobstats ] results = list(await asyncio.gather(*tasks)) pruned_count = sum(1 for r in results if r == PruneResult.PRUNED) failed_count = sum(1 for r in results if r == PruneResult.FAILED) rest_count = sum(1 for r in results if r == PruneResult.TIMEDOUT) return ResultOk((pruned_count, failed_count, rest_count)) def _check_load(self, load: float, msg: str) -> Tuple[int, str]: if load > self.opts.critical_load: msg += ', critical load!' return (CRITICAL, msg) elif load > self.opts.warning_load: msg += ', high load!' return (WARNING, msg) else: msg += '.' return (OK, msg) async def _check_async(self) -> ServiceReport: if not os.path.exists(self.top_workdir): self.log.info('The work directory is %s.', self.top_workdir) return ServiceOk('No jobs to clean since the working directory ' 'has not yet been created.') self.require_voms_proxy() # Run scheduled work. coroutine = self.cleaner.run( timeout = self.time_left() * 2 / 3, semaphore = self._arcclients_semaphore) s_ok, s_retry, s_failed, s_postponed = await coroutine s_load = s_postponed / float(s_ok + s_failed + 1) s_msg = 'Sched: ' + counted_adjectives( [(s_ok, 'ok'), (s_retry, 'to retry'), (s_failed, 'failed'), (s_postponed, 'postponed')], if_empty = 'no work') s_service_state, s_msg = self._check_load(s_load, s_msg) # Prune ARC jobs if there is time. j_result = await self.prune_jobs() if j_result.is_ok(): j_cleaned, j_failed, j_postponed = j_result.get() # pylint: disable=E1111 j_load = j_postponed / float(j_cleaned + j_failed + 1) j_msg = 'Jobfile: ' + counted_adjectives( [(j_cleaned, 'cleaned'), (j_failed, 'failed'), (j_postponed, 'postponed')], if_empty = 'no work') j_service_state, j_msg = self._check_load(j_load, j_msg) else: j_service_state = CRITICAL j_msg = "No time left for checking ARC jobs." # Announce result. return ServiceReport(max(s_service_state, j_service_state), s_msg + ' ' + j_msg) def check(self) -> ServiceReport: return asyncio.run(self._check_async()) nordugrid-arc-nagios-plugins-3.1.1/arcnagios/ce/check_arcce_monitor.py0000644000175000002070000005530115002373741027027 0ustar mockbuildmock00000000000000"""check_arcce_monitor - Fetches ARC jobs and posts results.""" import asyncio import os import random import re import time from typing import Optional, Tuple from arcnagios import arcutils, nagutils, utils from arcnagios.arcutils import Arcstat from arcnagios.arcclients import arcget, arcstat from arcnagios.nagutils import \ ServiceReport, ServiceOk, ServiceUnknown, OK, WARNING, CRITICAL, \ NagiosPerflogTimer from arcnagios.utils import nth, counted_adjectives, host_of_uri from arcnagios.ce.jobutils import JobInfo, JobNagiosPlugin _SPECIFIC_STATE_TR = dict((ord(c), None) for c in "'=[],:") _ARCGET_HARMLESS_RE = re.compile('|'.join([ r'$', r'Results stored at:.*', r'Warning: Some jobs were not removed.*', ])) def check_arcget_output(output: str) -> Tuple[bool, bool]: need_arcclean = False for line in output.split('\n'): line = line.strip() if line.startswith('Use arcclean to remove'): need_arcclean = True elif not re.match(_ARCGET_HARMLESS_RE, line): return False, need_arcclean return True, need_arcclean class JobActionStats: def __init__(self): self.error_count = 0 self.discarded_count = 0 self.unseen_count = 0 self.running_count = 0 self.cleaned_count = 0 self.retry_count = 0 self.postponed_count = 0 def load(self) -> float: return self.postponed_count \ / float(self.discarded_count + self.cleaned_count + 1) class Check_arcce_monitor(JobNagiosPlugin): def __init__(self): JobNagiosPlugin.__init__(self) self.start_time = time.time() argp = self.argparser argp.add_argument('--ce', dest = 'ces', default = [], action = 'append', metavar = 'CE', help = 'Pass one or more times to restrict monitoring ' 'to the given CEs.') argp.add_argument('--job-tag', dest = 'job_tags', default = [], action = 'append', metavar = 'TAG', help = 'Pass one or more times to restrict monitoring ' 'to the given job tags. Use "default" to match ' 'jobs started without a --job-tag option.') argp.add_argument('--termination-service', dest = 'termination_service', default = 'ARCCE Job Termination', help = 'Default service to submit result to if not specified ' 'when submitting the job. ' 'Deprecated: Should be passed on submission.') argp.add_argument('--max-sysinfo-lag', dest = 'max_infosys_lag', default = 3600.0, metavar = 'T', help = 'The maximum time to wait for a job to turn up in ' 'the arcstat listing before abandoning it.') argp.add_argument('--max-check-attempts', dest = 'max_check_attempts', default = 12, metavar = 'N', help = 'The maximum number of consecutive times a job in ' 'post-SUBMITTED state is absent from arcstat listing ' 'before it is abandoned.') argp.add_argument('--max-fetch-attempts', dest = 'max_fetch_attempts', default = 8, metavar = 'N', help = 'The maximum number of attempts to fetch a job before ' 'abandoning it.') argp.add_argument('--keep-failed-jobdata', dest = 'keep_failed_jobdata', action = 'store_true', default = False, help = 'Keep the job descriptions and output directories for ' 'failed jobs. These will not be removed automatically.') argp.add_argument('--keep-all-jobdata', dest = 'keep_all_jobdata', action = 'store_true', default = False, help = 'Keep the job descriptions and output directories for ' 'all jobs. These will not be removed automatically.') argp.add_argument('--timeout', dest = 'timeout', type = int, default = 110, help = 'Approximate overall timeout.') argp.add_argument('--arcstat-timeout', dest = 'arcstat_timeout', type = int, default = 5, metavar = 'T', help = 'Passed to arcstat --timeout.') argp.add_argument('--arcget-timeout', dest = 'arcget_timeout', type = int, default = 10, help = 'Maximum value to pass to arcget --timeout.') argp.add_argument('--show-arcget-output-here', dest = 'show_arcget_output_here', action = 'store_true', default = False, help = 'Include output from failed arcget in the this service ' 'in addition to posting it to the termination service.') argp.add_argument('-c', dest = 'critical_load', type = float, default = 20, help = 'Ratio of remaining work to processed work above which \ to issue a critical alert.') argp.add_argument('-w', dest = 'warning_load', type = float, default = 10, help = 'Ratio of remaining work to processed work above which \ to issue a warning alert.') def time_left(self) -> float: return self.opts.timeout - time.time() + self.start_time def parse_args(self, args) -> None: """Parse ARCCE-specific command-line options.""" JobNagiosPlugin.parse_args(self, args) def _clean_output_dir(self, output_dir: str) -> None: conflict = '.conflict-%d' % int(time.time()) for entry in os.listdir(output_dir): if not '.conflict-' in entry: subdir = os.path.join(output_dir, entry) self.log.warning('Moving away partially fetched output %s.', subdir) os.rename(subdir, subdir + conflict) def prepare_top_output_dir(self, jobinfo: JobInfo) -> str: workdir = self.workdir_for(jobinfo.host, jobinfo.job_tag) job_output_dir = os.path.join(workdir, self.JOB_OUTPUT_DIRNAME) if os.path.exists(job_output_dir): self._clean_output_dir(job_output_dir) return job_output_dir def locate_output_dir(self, top_output_dir: str) -> Optional[str]: for subdir in os.listdir(top_output_dir): if not subdir in ['.', '..'] or '.conflict-' in subdir: return os.path.join(top_output_dir, subdir) return None async def fetch_job(self, jobinfo: JobInfo, job_error: Optional[str] =None): """Fetch the job described by `jobinfo : JobInfo`, submit passive results, and return a tuple `(did_fetch, check_ok, status_code)`, where `did_fetch` indicates whether the job was fetched, `check_ok` indicates whether checking went well, and `status_code` is the overall Nagios status reported to the passive services for this job. """ service_name = jobinfo.termination_service \ or self.opts.termination_service termination_report = self.nagios_report_for(jobinfo.host, service_name) termination_report.update_status(nagutils.OK, 'Job succeeded.') # Report the final job state if the job failed. if jobinfo.job_state != arcutils.J_FINISHED: termination_report.update_status(nagutils.CRITICAL, 'Job terminated as %s.' % jobinfo.job_state) if job_error: self.log.error(job_error) termination_report.log.error(job_error) # Try to fetch the job. Exit if no files where fetched. self.log.info('Fetching job %s in terminal state %s.', jobinfo.job_id, jobinfo.job_state) top_output_dir = self.prepare_top_output_dir(jobinfo) async with self._arcclients_semaphore: with NagiosPerflogTimer( self.perflog, "arcget_time", host_of_uri(jobinfo.job_id)): timeout = min(self.opts.arcget_timeout, self.time_left()) arcget_result = await arcget( jobinfo.job_id, top_output_dir = top_output_dir, timeout = timeout, log = self.log) job_output_dir = self.locate_output_dir(top_output_dir) if job_output_dir is None: if arcget_result.is_ok(): if termination_report.status_code == nagutils.OK: self.log.error('Subdirectory from arcget not found, it ' 'should have been under %s.', top_output_dir) termination_report.update_status(nagutils.UNKNOWN, 'Output directory from arcget not found.') termination_report.log.error('JID: %s', jobinfo.job_id) did_fetch = True ok_check = termination_report.status_code != nagutils.OK else: self.log.error('Failed to fetch %s.', jobinfo.job_id) termination_report.update_status(nagutils.WARNING, 'Failed to fetch job.') if arcget_result.get_error().output: details = 'Output from arcget:\n%s' \ % arcget_result.get_error().output if self.opts.show_argget_output_here: self.log.error(details) termination_report.log.error(details) termination_report.log.error('JID: %s', jobinfo.job_id) did_fetch = False ok_check = True return (did_fetch, ok_check, termination_report.status_code) # Check if arcget returned non-zero despite having fetched something. if arcget_result.is_error(): is_harmless, need_arcclean \ = check_arcget_output(arcget_result.get_error().output) if need_arcclean: termination_report.log.warning('Separate arcclean needed.') await self.cleaner.call('arcclean', jobinfo.job_id) if not is_harmless: termination_report.update_status(nagutils.WARNING, '%s: %s' % (jobinfo.job_id, arcget_result)) if jobinfo.job_state != arcutils.J_FINISHED: errors = \ utils.file_contents(os.path.join(job_output_dir, 'stderr.txt')) if not errors is None and errors.strip() != '': self.log.error('stderr.txt for %s:', jobinfo.job_id) for line in errors.strip().split('\n'): self.log.error('.. %s', line) details = 'stderr.txt:\n%s' % errors termination_report.log.error(details) termination_report.log.error('JID: %s', jobinfo.job_id) return (True, True, termination_report.status_code) # Run check and publish results from job tests. termination_report.log.info('JID: %s', jobinfo.job_id) status_code = termination_report.status_code for test_name in jobinfo.tests: test = self.load_jobtest(test_name, hostname = jobinfo.host) if test.service_description: report = self.nagios_report_for(jobinfo.host, test.service_description) else: report = self.nagios_report test.check(report, job_output_dir, jobinfo.stored_urls) if report.status_code > status_code: status_code = report.status_code if status_code != nagutils.OK: termination_report.log.error('JID: %s', jobinfo.job_id) return (True, True, status_code) def check_job_progress(self, jobinfo: JobInfo) -> None: if jobinfo.job_state_time is None or jobinfo.progress_service is None: return attrs = {} for ck_state in [jobinfo.job_specific_state, jobinfo.job_state.name]: if ck_state and \ self.config.has_option('arcce.job-states', str(ck_state)): specs = self.config.get('arcce.job-states', str(ck_state)) attrs = dict(kv.split(':', 1) for kv in specs.split() if ':' in kv) break job_state_age = time.time() - jobinfo.job_state_time if 'c' in attrs and job_state_age > int(attrs['c']): status = nagutils.CRITICAL msg = 'Stuck in state %s (%s).' \ % (jobinfo.job_specific_state, jobinfo.job_state.name) elif 'w' in attrs and job_state_age > int(attrs['w']): status = nagutils.WARNING msg = 'Stuck in state %s (%s).' \ % (jobinfo.job_specific_state, jobinfo.job_state.name) else: status = nagutils.OK msg = 'Normal progress.' # This also triggers in the initial case when jobinfo.job_state_alert is # None, to clear any lingering alerts. if status != jobinfo.job_state_alert: report = \ self.nagios_report_for(jobinfo.host, jobinfo.progress_service) report.update_status(status, msg) jobinfo.job_state_alert = status def report_job_state_time(self, jobinfo: JobInfo, t_now: float) -> None: if not self.opts.granular_perfdata: return assert jobinfo.job_state_time dt_state = t_now - jobinfo.job_state_time perf_indices = [ 'host:%s' % jobinfo.host, 'arc_internal_job_state:%s' % jobinfo.job_state, ] if not jobinfo.job_specific_state is None: s = str(jobinfo.job_specific_state).translate(_SPECIFIC_STATE_TR) perf_indices.append('arc_middleware_job_state:%s' % s) perf_label = 'arc_job_state_time[%s]' % ','.join(perf_indices) self.add_perfdata(perf_label, dt_state, uom = 's', limit_min = 0) async def process_missing_job( self, jobinfo: JobInfo, job_action_stats: JobActionStats ) -> None: # Job missing from from arcstat output can happen # a) right after submission before it becomes available, # b) temporarily if the CE infosys is unavailable, or # c) if the job has been permanently removed. jobinfo.check_attempts = jobinfo.check_attempts or 0 if jobinfo.job_state == arcutils.J_NOT_SEEN \ and time.time() - jobinfo.submission_time \ < self.opts.max_infosys_lag: # We hope it's case a and give it more time. self.log.info('Job %s of kind %s on %s not found yet.', jobinfo.job_id, jobinfo.job_tag, jobinfo.host) job_action_stats.unseen_count += 1 elif jobinfo.check_attempts < self.opts.max_check_attempts: # We hope it's case a or b and make a fixed number of # attempts. jobinfo.check_attempts = jobinfo.check_attempts + 1 self.log.info('Job %s of kind %s on %s missing for ' 'the %s time in state %s, still checking.', jobinfo.job_id, jobinfo.job_tag, jobinfo.host, nth(jobinfo.check_attempts), jobinfo.job_state) self.save_active_job(jobinfo, jobinfo.host, jobinfo.job_tag) job_action_stats.unseen_count += 1 elif self.time_left() < 1: job_action_stats.postponed_count += 1 else: # We give up, assuming c) the job has been removed, # but discard_job schedules repeated attemts to remove # the job and any staged files while new jobs are run. self.log.info('Job %s of kind %s on %s disappeared in ' 'state %s, removing active job info.', jobinfo.job_id, jobinfo.job_tag, jobinfo.host, jobinfo.job_state) await self.discard_job(jobinfo, archive = self.opts.keep_failed_jobdata) job_action_stats.discarded_count += 1 async def process_found_job( self, jobinfo: JobInfo, jobstat: Arcstat, job_action_stats: JobActionStats ) -> None: jobinfo.check_attempts = 0 self.log.debug('Checking job on %s.', jobinfo.host) # Update job data. t_now = time.time() if jobinfo.job_state != jobstat.state \ or jobinfo.job_specific_state != jobstat.specific_state: self.report_job_state_time(jobinfo, t_now) jobinfo.job_state = jobstat.state jobinfo.job_specific_state = jobstat.specific_state jobinfo.job_state_time = t_now jobinfo.check_time = t_now self.check_job_progress(jobinfo) if not jobinfo.job_state.is_final(): self.save_active_job(jobinfo, jobinfo.host, jobinfo.job_tag) job_action_stats.running_count += 1 elif self.time_left() < 1: job_action_stats.postponed_count += 1 else: did_fetch, ok_check, passive_status_code = \ await self.fetch_job(jobinfo, jobstat.job_error) if not ok_check: job_action_stats.error_count += 1 archive = self.opts.keep_failed_jobdata \ and passive_status_code != nagutils.OK \ or self.opts.keep_all_jobdata if did_fetch: reputation_choices = jobinfo.reputation_choices or {} for dist_name, choice_name in reputation_choices.items(): ok_rep = passive_status_code == nagutils.OK self.log.debug('Reputation for %s choice %s is %s.', dist_name, choice_name, ok_rep and 'good' or 'bad') self._reputation_tracker.submit( dist_name, choice_name, ok_rep) await self.cleanup_job(jobinfo, archive = archive) job_action_stats.cleaned_count += 1 elif (jobinfo.fetch_attempts or 0) < self.opts.max_fetch_attempts: jobinfo.fetch_attempts = (jobinfo.fetch_attempts or 0) + 1 self.log.info('Will retry fetching %s.', jobinfo.job_id) self.save_active_job(jobinfo, jobinfo.host, jobinfo.job_tag) job_action_stats.retry_count += 1 else: self.log.warning('Giving up on fetching %s.', jobinfo.job_id) await self.discard_job(jobinfo, archive = archive) job_action_stats.discarded_count += 1 async def _check_async(self) -> ServiceReport: """Monitor submitted jobs.""" if not os.path.exists(self.top_workdir): self.log.info('The work directory is %s.', self.top_workdir) return ServiceOk('No jobs to monitor since the working directory ' 'has not yet been created.') self.require_voms_proxy() if self.opts.ces == []: ces = None else: ces = set(self.opts.ces) if self.opts.job_tags == []: job_tags = None else: job_tags = set(self.opts.job_tags) # Collect the list of active jobs. error_count = 0 jobinfo_by_id = {} for svc_dir in os.listdir(self.top_workdir): if not os.path.isdir(os.path.join(self.top_workdir, svc_dir)): continue if '#' in svc_dir: host, job_tag = svc_dir.split('#', 1) else: host, job_tag = svc_dir, None if not ces is None and not host in ces: continue if not job_tags is None and not (job_tag or 'default') in job_tags: continue jobinfo = self.load_active_job(host, job_tag) if jobinfo is None: self.log.debug('No active job info for %s.', host) self.cleanup_job_files( host, job_tag, archive=self.opts.keep_failed_jobdata) else: jobinfo.host = host jobinfo.job_tag = job_tag jobinfo_by_id[jobinfo.job_id] = jobinfo query_jobids = [jobinfo.job_id for jobinfo in jobinfo_by_id.values()] random.shuffle(query_jobids) if query_jobids == []: msg = 'No jobs to query, found %d in terminal states.' \ % len(jobinfo_by_id) return ServiceOk(msg) # Obtains information from CEs about the active jobs. self.log.debug('Querying job IDs %s', ', '.join(query_jobids)) arcstat_result = await arcstat(query_jobids, timeout = self.opts.arcstat_timeout, log = self.log) if arcstat_result.is_error(): exn = arcstat_result.get_error() return ServiceUnknown("Failed to query status of jobs: %s" % exn) arcstat_response = arcstat_result.get() self.log.info('Queried %d jobs, found %d.', len(query_jobids), len(arcstat_response.jobs)) # Process jobs. job_action_stats = JobActionStats() futures = [] for jobid in query_jobids: jobinfo = jobinfo_by_id[jobid] jobstat = arcstat_response.jobs.get(jobid) if jobstat: futures.append(asyncio.ensure_future( self.process_found_job(jobinfo, jobstat, job_action_stats))) else: futures.append(asyncio.ensure_future( self.process_missing_job(jobinfo, job_action_stats))) if futures != []: await asyncio.gather(*futures) error_count += job_action_stats.error_count # Summary and report. status_code = OK msg = counted_adjectives( [(job_action_stats.discarded_count, "discarded"), (job_action_stats.unseen_count, "unseen"), (job_action_stats.running_count, "running"), (job_action_stats.cleaned_count, "cleaned"), (job_action_stats.retry_count, "to retry"), (job_action_stats.postponed_count, "postponed")], if_empty = 'Nothing to do') if error_count > 0: msg += ', %d errors' % error_count status_code = CRITICAL load = job_action_stats.load() if load > self.opts.critical_load: msg += ', critical load!' status_code = CRITICAL elif load > self.opts.warning_load: msg += ', high load!' status_code = max(status_code, WARNING) else: msg += '.' self.log.info('') self.log.info('Summary:') jobinfos = list(jobinfo_by_id.values()) jobinfos.sort() for jobinfo in jobinfos: self.log.info('- %s: %s', jobinfo.host_and_tag, jobinfo.job_state) return ServiceReport(status_code, msg) def check(self): return asyncio.run(self._check_async()) nordugrid-arc-nagios-plugins-3.1.1/arcnagios/nagutils.py0000644000175000002070000005310415002373741024304 0ustar mockbuildmock00000000000000"""Base class and auxiliary classes for NAGIOS probes.""" from glob import glob import argparse from functools import cached_property from configparser import ConfigParser, InterpolationError, NoOptionError import logging import os from io import StringIO import sys import tempfile import time import traceback from typing import Any, Callable, Dict, List, NoReturn, Optional, Tuple from arcnagios import confargparse from arcnagios.utils import counted_noun _module_log = logging.getLogger(__name__) OK = 0 WARNING = 1 CRITICAL = 2 UNKNOWN = 3 _STATUS_NAME_BY_CODE = ['OK', 'WARNING', 'CRITICAL', 'UNKNOWN'] _STATUS_CODE_BY_NAME = { 'OK': OK, 'WARNING': WARNING, 'CRITICAL': CRITICAL, 'UNKNOWN': UNKNOWN, } def status_name(status): return _STATUS_NAME_BY_CODE[status] def status_by_name(name): try: return _STATUS_CODE_BY_NAME[name.upper()] except KeyError as exn: raise ValueError('%s is not a Nagios status name.' % name) from exn class ServiceReport(Exception): def __init__(self, status: int, message: str): self.status = status self.message = message Exception.__init__(self, status_name(status) + ': ' + message) class ServiceOk(ServiceReport): def __init__(self, message: str): ServiceReport.__init__(self, OK, message) class ServiceWarning(ServiceReport): def __init__(self, message: str): ServiceReport.__init__(self, WARNING, message) class ServiceCritical(ServiceReport): def __init__(self, message: str): ServiceReport.__init__(self, CRITICAL, message) class ServiceUnknown(ServiceReport): def __init__(self, message: str): ServiceReport.__init__(self, UNKNOWN, message) class NagiosPerfdata: def __init__( self, label: str, value: float, uom: Optional[str] = None, limit_warn: Optional[float] = None, limit_crit: Optional[float] = None, limit_min: Optional[float] = None, limit_max: Optional[float] = None): self.label = label self.value = value self.uom = uom self.limit_warn = limit_warn self.limit_crit = limit_crit self.limit_min = limit_min self.limit_max = limit_max def __str__(self): limits = [ self.limit_warn, self.limit_crit, self.limit_min, self.limit_max ] def to_str(x): if x is None: return '' else: return '%g' % x return '\'%s\'=%s%s;' % (self.label, self.value, self.uom or '') \ + ';'.join(map(to_str, limits)) def __iadd__(self, other): assert self.label == other.label assert self.uom == other.uom self.value += other.value if other.limit_warn: self.limit_warn += other.limit_warn if other.limit_crit: self.limit_crit += other.limit_crit if other.limit_min: self.limit_min += other.limit_min if other.limit_max: self.limit_max += other.limit_max return self class NagiosPerflog: def __init__(self, granular: bool = False): self._granular = granular self._perfdatas: Dict[str, NagiosPerfdata] = {} def add(self, label: str, value: float, **kwargs) -> None: perfdata = NagiosPerfdata(label, value, **kwargs) if label in self._perfdatas: perfdata += self._perfdatas[label] self._perfdatas[label] = perfdata def addi(self, label: str, index: Optional[str], value: float, **kwargs) \ -> None: if self._granular and not index is None: self.add(label + '[' + index + ']', value, **kwargs) else: self.add(label, value, **kwargs) def is_empty(self) -> bool: return self._perfdatas == {} def __str__(self) -> str: return ' '.join(map(str, self._perfdatas.values())) class NagiosPerflogTimer: def __init__(self, perflog: NagiosPerflog, label: str, index: Optional[str]): self._perflog: NagiosPerflog = perflog self._label: str = label self._index: Optional[str] = index self._start_time: Optional[float] = None self._stop_time: Optional[float] = None def __enter__(self): self._start_time = time.time() def __exit__(self, exc_type, exc_value, exc_tb): if self._start_time: self._stop_time = time.time() elapsed = self._stop_time - self._start_time self._perflog.addi(self._label, self._index, elapsed, uom='s', limit_min=0) class NagiosReport: """Instances of this class collects information to be reported to Nagios. You should use one instance for the active check result, and one instance for each passive (``host_name``, ``service_description``) kombination to target.""" def __init__(self, host_name: str, service_description: str): self._log_buffer = StringIO() self.host_name = host_name self.service_description = service_description self.log = logging.Logger('%s/%s'%(host_name, service_description)) self.log.addHandler(logging.StreamHandler(self._log_buffer)) self.status_code = OK self.status_messages: List[List[str]] = [[], [], [], []] self.status_code_counts: List[int] = [0, 0, 0, 0] def update_status_code(self, status_code: int): """Update the Nagios exit code to the maximum of ``status_code`` and the current code.""" assert OK <= status_code <= UNKNOWN self.status_code_counts[status_code] += 1 if status_code > self.status_code: self.status_code = status_code def update_status( self, status_code: int, status_message: Optional[str] = None) \ -> None: """Update the Nagios exit code to the maximum of ``status_code`` and the current code, and add ``status_message`` to the messages to be used in case no higher exit codes overrides this call.""" self.update_status_code(status_code) if status_message: self.status_messages[status_code].append(status_message) def status_message(self, subject: Optional[str] = None) -> str: """Format a status message suitable as the first line of output to Nagios. This will be based on the calls to `update_status` which are relevant for the final exit code. If no messages are registered for the code, make a generic message, possibly referring to `subject`.""" if self.status_messages[self.status_code] == []: name = subject or 'service' count = self.status_code_counts[self.status_code] if self.status_code == OK: return '%s OK'%(name.capitalize()) if self.status_code == WARNING: return '%s in %s'%(counted_noun(count, 'warning'), name) if self.status_code == CRITICAL: return '%s in %s'%(counted_noun(count, 'error'), name) return 'Failed to check %s'%name return ' '.join(self.status_messages[self.status_code]) @property def status_details(self) -> str: """A string containing messages logged to `self.log`.""" return self._log_buffer.getvalue() _LOG_LEVEL_BY_NAME = { 'DEBUG': logging.DEBUG, 'INFO': logging.INFO, 'WARN': logging.WARN, 'WARNING': logging.WARNING, 'ERROR': logging.ERROR, 'CRITICAL': logging.CRITICAL, } class NagiosPlugin: probe_name: str bugtracker_url: str = 'http://bugzilla.nordugrid.org/' main_config_section: List[str] = [] def __init__( self, use_host: bool = False, use_port: bool = False, default_port: Optional[int] = None): # Set up a logger for collecting messages and initialize perfdata. self.perflog: NagiosPerflog self._passive_reports: Dict[Tuple[str, str], NagiosReport] = {} def scan_loglevel(s): try: return int(s) except ValueError: pass try: return _LOG_LEVEL_BY_NAME[s.upper()] except KeyError: # pylint: disable=W0707 raise argparse.ArgumentTypeError('Invalid loglevel.') # Define the base argument parsers. Plug-in implementations will # extend it or the sub-parsers. self.argparser = confargparse.ConfigArgumentParser() argp = self.argparser.add_argument_group('General Options') argp.add_argument('--loglevel', dest = 'loglevel', type = scan_loglevel, default = 'WARNING', help = 'Set the log level for NAGIOS messages. ' 'Use either numeric or symbolic names corresponding to ' 'the definitions in the python logging module.') seems_like_manual = os.getenv('NAGIOS_HOSTNAME') is None argp.add_argument('--how-invoked', dest = 'how_invoked', choices = ['manual', 'nagios'], default = seems_like_manual and 'manual' or 'nagios', help = 'Indicate how invoked. ' '(Default: Check a Nagios enviroment variable).') argp.add_argument('--command-file', dest = 'command_file', default = os.getenv('NAGIOS_COMMANDFILE', '/var/spool/nagios/cmd/nagios.cmd'), help = 'The Nagios command file. By default the ' '$NAGIOS_COMMANDFILE environment variable is used, ' 'which is usually what you want.') argp.add_argument('--multiline-separator', dest = 'multiline_separator', default = ' - ', help = 'Replacement for newlines when submitting multiline ' 'results to passive services. Pass the empty string ' 'drop extra lines.') argp.add_argument('--clip-passive-status', type=int, default = None, help = 'The maximum number of characters to publish to a ' 'passive service status, including the ellipses which ' 'will be added to indicate the that it is clipped.') if use_host: argp.add_argument('-H', '--host', dest = 'host', help = 'Host of service to check.') if use_port: argp.add_argument('-p', '-P', '--port', dest = 'port', type = int, default = default_port, help = 'Port number of service to connect to. ' 'OBS! The capital -P alternative is deprecated.') argp.add_argument('--dump-options', dest = 'dump_options', default = False, action = 'store_true', help = 'Dump options to standard output. ' 'Believed to be useful for debugging.') argp.add_argument('--arcnagios-spooldir', dest = 'arcnagios_spooldir', default = '/var/spool/arc/nagios', help = 'Top-level spool directory to be used by the ARC ' 'Nagios plugins.') argp.add_argument('--home-dir', dest = 'home_dir', help = 'Override $HOME at startup. This is a workaround ' 'for external commands which store things under ' '$HOME on systems where the nagios account does ' 'not have an appropriate or writable home directory.') argp.add_argument('--import', dest = 'imports', action = 'append', default = [], metavar = 'MODULE', help = 'Import the given module. The module initializer can ' 'register implementations for arcnagios.substitution ' 'or arcnagios.jobplugins, or modify the enviroment.') self._remove_at_exit: List[str] = [] self._at_exit: List[Callable[[], None]] = [] self._created_tmpdir: bool = False self.opts: Any = None self.nagios_report: NagiosReport self.perflog = NagiosPerflog() self.log: logging.Logger # Initialize mix-ins if any. super().__init__() def config_dir(self) -> str: return os.getenv('ARCNAGIOS_CONFIG_DIR', '/etc/arc/nagios') def config_paths(self) -> List[str]: """A list of paths to search for the configuration.""" config_paths_env = os.getenv('ARCNAGIOS_CONFIG') if not config_paths_env is None: return config_paths_env.split(':') else: config_paths = glob(os.path.join(self.config_dir(), '*.ini')) config_paths.sort() return config_paths @cached_property def config(self) -> ConfigParser: environment = {} environment['epoch_time'] = str(int(time.time())) parser = ConfigParser(defaults = environment) parser.read(self.config_paths()) return parser def at_exit(self, f: Callable[[], None]) -> None: self._at_exit.append(f) def remove_at_exit(self, *paths: str) -> None: self._remove_at_exit += paths def tmpdir(self) -> str: tmpdir = os.path.join(self.opts.arcnagios_spooldir, 'tmp') if not self._created_tmpdir and not os.path.exists(tmpdir): try: os.makedirs(tmpdir) except OSError as exn: self.nagios_exit(UNKNOWN, 'Cannot create %s: %s'%(tmpdir, exn)) return tmpdir def mkstemp(self, suffix: str = '', prefix: str = 'tmp') -> Tuple[int, str]: tmpdir = self.tmpdir() fd, path = tempfile.mkstemp(suffix, prefix, tmpdir) self.remove_at_exit(path) return fd, path def mktemp(self, suffix: str = '', prefix: str = 'tmp') -> str: tmpdir = self.tmpdir() path = tempfile.mktemp(suffix, prefix, tmpdir) self.remove_at_exit(path) return path def parse_args(self, args: List[str]) -> None: try: if self.main_config_section: self.argparser.configure_defaults( self.config, self.main_config_section) self.opts = self.argparser.parse_args(args) except Exception as exn: # pylint: disable=broad-except # TODO: Reconsider, we should rather consider it invalid. self.nagios_report = NagiosReport('UNKNOWN', 'ACTIVE') self.log = self.nagios_report.log raise exn host_name = getattr(self.opts, 'host', '') self.nagios_report = NagiosReport(host_name, 'ACTIVE') self.log = self.nagios_report.log if hasattr(self.opts, 'granular_perfdata'): self.perflog = NagiosPerflog(self.opts.granular_perfdata) if not self.opts.home_dir is None: os.environ['HOME'] = self.opts.home_dir # Set the log level. The level of self.log determines how much is # reported as additional lines to Nagios. self.log.setLevel(self.opts.loglevel) if self.opts.how_invoked == 'manual': # File-level loggers are used for debugging. Only increase the # level if manually invoked, since we may otherwise obscure the # input to Nagios. _module_log.root.setLevel(self.opts.loglevel) for module_name in self.opts.imports: try: __import__(module_name) except ImportError as exn: raise ServiceUnknown('Error importing %s: %s' % (module_name, exn)) from exn def check(self) -> ServiceReport: """Override this method in your plugin to implement the test. You should not call this directly, but use `nagios_run`, which will handle argument parsing and report results to Nagios.""" raise NotImplementedError('The `check` method has not been implemented.') def nagios_run(self) -> NoReturn: """This is the method to invoke from the probe script. It parses command-line options, calls the probe, prints Nagios-formatted output to stdout, and exits with an appropriate code.""" # Parse command-line arguments and configuration file. try: self.parse_args(sys.argv[1:]) except confargparse.UsageError as exn: self.log.error(str(exn)) self.log.error('Need --help?') self.nagios_exit(UNKNOWN, 'Invalid command-line options: %s' % exn) except confargparse.ConfigError as exn: self.log.error(str(exn)) self.nagios_exit(UNKNOWN, 'Invalid configuration: %s' % exn) if self.opts.dump_options: for k, v in vars(self.opts).items(): self.log.info('%s = %r', k, v) # Run the metric and report. try: service_report = self.check() except ServiceReport as exn: service_report = exn except SystemExit as exn: raise exn except NoOptionError as exn: self.nagios_exit(UNKNOWN, str(exn)) except InterpolationError as exn: for line in str(exn).split('\n'): self.log.error(line) self.nagios_exit(UNKNOWN, 'Error in configuration file.') except Exception as exn: # pylint: disable=broad-except _, _, trace = sys.exc_info() self.log.error('----%<----') self.log.error('Please report this bug to %s including ' 'the following:', self.bugtracker_url) self.log.error('%r', exn) self.log.error('Traceback:') for line in traceback.format_tb(trace): self.log.error(str(line.strip())) self.log.error('----%<----') self.nagios_exit(UNKNOWN, 'Bug in Nagios probe.') if service_report is None: self.nagios_exit() if not isinstance(service_report, ServiceReport): msg = 'Invalid value %r returned by plugin.' % service_report self.nagios_exit(UNKNOWN, msg) self.nagios_exit(service_report.status, service_report.message) def add_perfdata(self, label: str, value: float, **kwargs): self.perflog.add(label, value, **kwargs) def nagios_report_for( self, host_name: str, service_description: str, create = True): """Return a `NagiosReport` instance which will be submitted to the passive service `service_description` on `host_name`. Each time you call this with the same `host_name` and `service_description`, you will get the same instance.""" key = (host_name, service_description) report = self._passive_reports.get(key, None) if report is None and create: report = NagiosReport(host_name, service_description) self._passive_reports[key] = report return report def nagios_exit( self, status_code: int = OK, status_message: Optional[str] = None, subject: Optional[str] = None) -> NoReturn: """Submit all passives check results, update `self.nagios_report` with the given `status_code` and `status_message` if specified, then communicate `self.nagios_report` as the active check result. In particular, this writes out status messages, perfdatas and logging written to `self.nagios_report`, and calls `sys.exit` with `self.nagios_report.status_code`.""" for f in self._at_exit: f() for path in self._remove_at_exit: try: os.remove(path) except OSError: pass for report in self._passive_reports.values(): self.submit_passive_service_result( report.host_name, report.service_description, report.status_code, report.status_message(subject), report.status_details) self.nagios_report.update_status(status_code, status_message) sys.stdout.write(self.nagios_report.status_message(subject)) if self.perflog and not self.perflog.is_empty(): sys.stdout.write('|' + str(self.perflog)) sys.stdout.write('\n') sys.stdout.write(self.nagios_report.status_details) sys.exit(self.nagios_report.status_code) def submit_passive_service_result( self, host_name: str, svc_description: str, return_code: int, plugin_output: str, details = None): """Manually submit a passive service result. It is in general more convenient to use `nagios_report_for` and let `nagios_run` submit the results.""" t_chk = int(time.time()) if details and self.opts.multiline_separator: # FIXME: How do we submit multi-line results to passive services? # Currently it seems not to be supported. This will look ugly, # but it's better than leaving the operator clueless. sep = self.opts.multiline_separator plugin_output += sep + details.strip().replace('\n', sep) if self.opts.clip_passive_status \ and len(plugin_output) > self.opts.clip_passive_status: plugin_output = plugin_output[:self.opts.clip_passive_status-4] \ .rsplit(None, 1)[0] + ' ...' rstr = '[%d] PROCESS_SERVICE_CHECK_RESULT;%s;%s;%d;%s\n' % \ (t_chk, host_name, svc_description, return_code, plugin_output) if self.opts.how_invoked == 'manual': _module_log.info(rstr) return try: with open(self.opts.command_file, 'w', encoding='utf-8') as fd: fd.write(rstr) except IOError as exn: raise ServiceUnknown('Cannot write to NAGIOS command file %s: %s' % (self.opts.command_file, exn)) from exn nordugrid-arc-nagios-plugins-3.1.1/arcnagios/persistence.py0000644000175000002070000001215515002373741025003 0ustar mockbuildmock00000000000000"""Simple (key, value)-persistence using plain text files.""" import fcntl import json import logging import os from typing import Callable, Dict, List, Generic, TypeVar from arcnagios.arcutils import ParseError Alpha = TypeVar("Alpha") class ObsoletePersistentObject(RuntimeError): pass _module_log = logging.getLogger(__name__) # File Locking # class locked_open: # pylint: disable=R1732,C0103 def __init__(self, path: str, mode: str = 'r', encoding: str = 'utf-8'): assert mode in ['r', 'w'] self._fh = open(path, mode, encoding=encoding) if mode == 'r': fcntl.lockf(self._fh, fcntl.LOCK_SH) else: fcntl.lockf(self._fh, fcntl.LOCK_EX) def __enter__(self): return self._fh def __exit__(self, exc_type=None, exc_value=None, exc_tb=None): self._fh.flush() os.fsync(self._fh.fileno()) fcntl.lockf(self._fh, fcntl.LOCK_UN) self._fh.close() # Type Descriptors of the form (decode, encode, required). # class PersistentType(Generic[Alpha]): def __init__( self, decode: Callable[[str], Alpha], encode: Callable[[Alpha], str] = str, required: bool = True): self.decode = decode self.encode = encode self.required = required def pt_list(pt_elt): def decode(s: str) -> List[Alpha]: if s.strip(): return list(map(pt_elt.decode, s.split(', '))) return [] def encode(xs: List[Alpha]): return ', '.join(map(pt_elt.encode, xs)) return PersistentType(decode, encode, False) pt_int = PersistentType(int, str, True) pt_int_opt = PersistentType(int, str, False) pt_float = PersistentType(float, str, True) pt_float_opt = PersistentType(float, str, False) pt_str = PersistentType(str, str, True) pt_str_opt = PersistentType(str, str, False) pt_str_list = pt_list(pt_str) pt_json = PersistentType(json.loads, json.dumps, True) pt_json_opt = PersistentType(json.loads, json.dumps, False) # Persistent Objects # class PersistentObject: """A class which represents a simple collection of attributes with human-readable pickeling. The pickeling is limited to what is needed by the Nagios plugins. The main point of this is to make the data presentable to Nagios operator.""" # pylint: disable=C0103 persistent_attributes: Dict[str, PersistentType] def __init__(self, **kwargs): for k, _ in self.persistent_attributes.items(): v = kwargs.pop(k, None) setattr(self, k, v) if kwargs: raise TypeError('Invalid keyword argument(s) %s.' % ', '.join(kwargs)) def persistent_load(self, path: str, log: logging.Logger = _module_log, persistence_version: int = 0): with locked_open(path) as fh: persistence_version_of_file = 0 line_number = 0 for line in fh: line_number += 1 kv = line.split(': ', 1) if len(kv) != 2: log.error('%s:%d: Invalid or old file format.', path, line_number) raise ParseError('Invalid format for PersistentObject.') k, v = kv v = v.strip() if k == 'persistence_version': persistence_version_of_file = int(v) continue if not k in self.persistent_attributes: log.warning('%s:%d: Ignoring unknown attribute.', path, line_number) continue try: setattr(self, k, self.persistent_attributes[k].decode(v)) except UnicodeDecodeError as exn: log.error('%s:%d: %s', path, line_number, exn) for k, pt in self.persistent_attributes.items(): if pt.required: if getattr(self, k) is None: if persistence_version_of_file == persistence_version: raise ParseError('Missing required attribute %s.'%k) raise ObsoletePersistentObject( 'Missing required attribute %s in ' 'outdated %s with version %d.' % (k, path, persistence_version_of_file)) def persistent_save(self, path: str, log: logging.Logger = _module_log, persistence_version: int = 0): # pylint: disable=unused-argument for k, pt in self.persistent_attributes.items(): if pt.required: if getattr(self, k) is None: raise ValueError('Tried to save incomplete persistent ' 'object; missing attribute %s.' % k) with locked_open(path, 'w') as fh: fh.write('persistence_version: %d\n' % persistence_version) for k, pt in self.persistent_attributes.items(): v = getattr(self, k) if not v is None: fh.write('%s: %s\n'%(k, pt.encode(v))) nordugrid-arc-nagios-plugins-3.1.1/arcnagios/__init__.py0000644000175000002070000000017115002373741024211 0ustar mockbuildmock00000000000000import sqlite3 # Make sure we're using serialized mode; it is not the default e.g. under EL-9. sqlite3.threadsafety = 3 nordugrid-arc-nagios-plugins-3.1.1/arcnagios/rescheduler.py0000644000175000002070000001462215002373741024765 0ustar mockbuildmock00000000000000"""Rescheduling of failed cleanup tasks.""" import asyncio import logging import random import sqlite3 import time from typing import Any, Awaitable, Callable, Dict, Optional, Sequence, Tuple from arcnagios.nagutils import NagiosReport, WARNING from arcnagios.utils import nth DB_BUSY_TIMEOUT = 10 _default_log = logging.getLogger(__name__) _CREATE_SQL = """\ CREATE TABLE %s ( n_attempts integer NOT NULL, t_sched integer NOT NULL, task_type varchar(16) NOT NULL, arg text NOT NULL )""" def _format_time(t: float): return time.strftime('%Y-%m-%d %H:%M', time.localtime(t)) Handler = Callable[[str, int], Awaitable[bool]] class TaskType: def __init__(self, handler: Handler, min_delay: int = 3600, max_attempts: int = 12, delay_dev: float = 0.1): self.handler = handler self.min_delay = min_delay self.max_attempts = max_attempts self.delay_dev = delay_dev def next_delay(self, n_attempts: int) -> float: return (self.min_delay << n_attempts) * random.gauss(1.0, self.delay_dev) class Rescheduler: def __init__(self, db_path: str, table_name: str, nagios_report: Optional[NagiosReport] = None, log: Optional[logging.Logger] = None): self._db = sqlite3.connect(db_path, DB_BUSY_TIMEOUT) self._table = table_name self._task_types: Dict[str, TaskType] = {} self._nagios_report = nagios_report if log is None: if nagios_report is None: log = _default_log else: log = nagios_report.log self._log = log try: self._db.execute(_CREATE_SQL % self._table) except sqlite3.OperationalError: pass def close(self) -> None: self._db.close() def _report_warning(self, msg: str = 'Check rescheduler errors.') -> None: if self._nagios_report: self._nagios_report.update_status(WARNING, msg) def _update(self, stmt: str, *args) -> None: try: self._db.execute(stmt, args) self._db.commit() except sqlite3.OperationalError as exn: self._log.error('Failed to update rescheduled work: %s', exn) self._report_warning() def _query(self, stmt: str, *args) -> Sequence[Any]: try: return self._db.execute(stmt, args).fetchall() except sqlite3.OperationalError as exn: self._log.error('Failed to fetch rescheduled work: %s', exn) self._report_warning() return [] def register(self, task_type_name: str, h: Handler, min_delay: int = 3600, max_attempts: int = 12, delay_dev: float = 0.1) -> None: self._task_types[task_type_name] \ = TaskType(h, min_delay, max_attempts, delay_dev) def schedule(self, task_type_name: str, arg: str, n_attempts: int = 0) \ -> None: handler = self._task_types[task_type_name] t_sched = time.time() + handler.next_delay(n_attempts) self._update('INSERT INTO %s (n_attempts, t_sched, task_type, arg) ' 'VALUES (?, ?, ?, ?)' % self._table, n_attempts, t_sched, task_type_name, arg) def _unschedule_rowid(self, rowid: int) -> None: self._update('DELETE FROM %s WHERE ROWID = ?' % self._table, rowid) def _reschedule_rowid(self, rowid: int, n_attempts: int, t_sched: float) \ -> None: self._update('UPDATE %s SET n_attempts = ?, t_sched = ? ' 'WHERE ROWID = ?' % self._table, n_attempts, t_sched, rowid) async def call(self, task_type_name: str, arg: str) -> bool: if await self._task_types[task_type_name].handler(arg, 0): return True self.schedule(task_type_name, arg, n_attempts = 1) return False async def run( self, timeout: float, semaphore: asyncio.Semaphore ) -> Tuple[int, int, int, int]: """Run pending jobs. Currently timeout is the deadline for starting jobs, so the maximum full running time will be timeout plus the maximum time of an individual job.""" t_now = time.time() t_deadline = t_now + timeout success_count = 0 failed_count = 0 resched_count = 0 postponed_count = 0 async def process(rowid, n_attempts, t_sched, task_type_name, arg): nonlocal success_count, failed_count, resched_count, postponed_count if not task_type_name in self._task_types: self._log.warning('No task type %s.', task_type_name) return if time.time() >= t_deadline: postponed_count += 1 return task_type = self._task_types[task_type_name] try: is_ok = await task_type.handler(arg, n_attempts) except Exception as exn: # pylint: disable=broad-except self._log.error('Task %s(%r) raised exception: %s', task_type_name, arg, exn) is_ok = False if is_ok: self._log.info('Finished %s(%r)', task_type_name, arg) self._unschedule_rowid(rowid) success_count += 1 elif n_attempts >= task_type.max_attempts: self._log.info('Giving up on %s(%r)', task_type_name, arg) self._unschedule_rowid(rowid) failed_count += 1 else: t_sched = t_now + task_type.next_delay(n_attempts) n_attempts += 1 self._log.info('Scheduling %s attempt at %s to %s(%r)', nth(n_attempts), _format_time(t_sched), task_type_name, arg) self._reschedule_rowid(rowid, n_attempts, t_sched) resched_count += 1 async def process_with_semaphore(row): async with semaphore: await process(*row) rows = self._query( 'SELECT ROWID, n_attempts, t_sched, task_type, arg ' 'FROM %s WHERE t_sched <= ?' % self._table, t_now) if rows: tasks = [ asyncio.create_task(process_with_semaphore(row)) for row in rows ] await asyncio.gather(*tasks) return (success_count, resched_count, failed_count, postponed_count) nordugrid-arc-nagios-plugins-3.1.1/arcnagios/utils.py0000644000175000002070000000704615002373741023622 0ustar mockbuildmock00000000000000"""General utilities.""" import logging from urllib.parse import urlsplit from subprocess import CalledProcessError from typing import Callable, Generic, List, Optional, Tuple, TypeVar Alpha = TypeVar('Alpha') Beta = TypeVar('Beta') class Unspecified: pass unspecified = Unspecified() def ident(x: Alpha) -> Alpha: return x def map_option(f: Callable[[Alpha], Beta], opt: Optional[Alpha]) \ -> Optional[Beta]: if opt is None: return None return f(opt) class Result(Generic[Alpha, Beta]): def is_ok(self) -> bool: raise NotImplementedError def is_error(self) -> bool: raise NotImplementedError def get(self) -> Alpha: raise NotImplementedError def get_error(self) -> Beta: raise NotImplementedError def __str__(self) -> str: raise NotImplementedError class ResultOk(Result[Alpha, Beta]): def __init__(self, value): Result.__init__(self) self.value = value def is_ok(self) -> bool: return True def is_error(self) -> bool: return False def get(self) -> Alpha: return self.value def get_error(self) -> Beta: raise ValueError("ResultOk.get_error: Not an error.") def __str__(self) -> str: return str(self.value) class ResultError(Result[Alpha, Beta]): def __init__(self, error): Result.__init__(self) self.error = error def is_ok(self) -> bool: return False def is_error(self) -> bool: return True def get(self) -> Alpha: if isinstance(self.error, Exception): raise self.error raise ValueError('ResultError.get: Not ok.') def get_error(self) -> Beta: return self.error def __str__(self) -> str: return 'ResultError: %s' % self.error def counted_noun( count: int, sing_word: str, pl_word: Optional[str] = None) -> str: if count == 1: return '%d %s'%(count, sing_word) else: return '%d %s'%(count, pl_word or sing_word + 's') def counted_adjectives(cws: List[Tuple[int, str]], if_empty: str = '') -> str: return ', '.join(map(lambda cw: '%d %s' % cw, filter(lambda cw: cw[0] != 0, cws))) or if_empty def nth(n: int) -> str: if n % 100 > 3 and n % 100 < 21: return str(n) + 'th' else: return str(n) + {1: 'st', 2: 'nd', 3: 'rd'}.get(n % 10, 'th') def file_contents(path, encoding: str = 'utf-8') -> Optional[str]: try: with open(path, encoding=encoding) as fh: return fh.read() except IOError: return None def host_of_uri(jobid: str) -> str: netloc = urlsplit(jobid)[1] if ':' in netloc: return netloc.split(':', 1)[0] else: return netloc def log_process_error( log: logging.Logger, exn: CalledProcessError, synopsis: Optional[str] = None, prefix: Optional[str] = None) -> None: if exn.output: hint = 'stderr follows' else: hint = 'no stderr' if synopsis: log.error('%s: %s (%s)', synopsis.capitalize(), str(exn), hint) else: log.error('%s (%s)', str(exn), hint) if exn.output: if prefix: prefix = '[%s] ' % prefix else: prefix = '' # exn.output may be str or bytes depending on how the subprocess # function is invoked. if isinstance(exn.output, str): output = exn.output else: output = exn.output.decode('utf-8') for line in output.strip().split('\n'): log.error(prefix + line) nordugrid-arc-nagios-plugins-3.1.1/arcnagios/arcutils.py0000644000175000002070000004100315002373741024277 0ustar mockbuildmock00000000000000"""Wrappers around ARC client commands and related utilities.""" from datetime import datetime import logging import os import re import time from typing import \ Any, Callable, Dict, Generic, List, NewType, Optional, Tuple, TypeVar from subprocess import Popen, PIPE, CalledProcessError from arcnagios.utils import map_option, host_of_uri from arcnagios.utils import Result, ResultOk, ResultError from arcnagios.nagutils import NagiosPerflog Alpha = TypeVar("Alpha") class ParseError(Exception): """Exception raised on unrecognized command output.""" # Job States # JobStage = NewType("JobStage", int) S_UNKNOWN = JobStage(0) S_ENTRY = JobStage(1) S_PRERUN = JobStage(2) S_INLRMS = JobStage(3) S_POSTRUN = JobStage(4) S_FINAL = JobStage(5) class JobState: def __init__(self, name: str, stage: JobStage): self.name = name self.stage = stage def __str__(self) -> str: return self.name def is_final(self) -> bool: return self.stage == S_FINAL class InlrmsJobState(JobState): def __init__(self, name: str): JobState.__init__(self, name, S_INLRMS) class PendingJobState(JobState): def __init__(self, name: str, stage: JobStage, pending: JobState): JobState.__init__(self, name, stage) self.pending = pending _JOB_STATE_OF_STR = {} def _add_job_state(name: str, stage: JobStage = S_UNKNOWN) -> JobState: job_state: JobState if name.startswith('INLRMS:'): assert stage in (S_UNKNOWN, S_INLRMS) job_state = InlrmsJobState(name) elif name.startswith('PENDING:'): pending = jobstate_of_str(name[8:]) job_state = PendingJobState(name, stage, pending) else: job_state = JobState(name, stage) _JOB_STATE_OF_STR[name] = job_state return job_state def jobstate_of_str(name: str) -> JobState: if not name in _JOB_STATE_OF_STR: _JOB_STATE_OF_STR[name] = _add_job_state(name) return _JOB_STATE_OF_STR[name] J_NOT_SEEN = _add_job_state("NOT_SEEN", stage = S_ENTRY) J_ACCEPTED = _add_job_state("Accepted", stage = S_ENTRY) J_PREPARING = _add_job_state("Preparing", stage = S_PRERUN) J_SUBMITTING = _add_job_state("Submitting", stage = S_PRERUN) J_HOLD = _add_job_state("Hold", stage = S_PRERUN) J_QUEUING = _add_job_state("Queuing", stage = S_INLRMS) J_RUNNING = _add_job_state("Running", stage = S_INLRMS) J_FINISHING = _add_job_state("Finishing", stage = S_POSTRUN) J_FINISHED = _add_job_state("Finished", stage = S_FINAL) J_KILLED = _add_job_state("Killed", stage = S_FINAL) J_FAILED = _add_job_state("Failed", stage = S_FINAL) J_DELETED = _add_job_state("Deleted", stage = S_FINAL) J_UNDEFINED = _add_job_state("Undefined", stage = S_UNKNOWN) J_OTHER = _add_job_state("Other", stage = S_UNKNOWN) # ARC Commands # def _time_arg(x: float) -> str: return str(int(x + 0.5)) class Arcstat: def __init__( self, *, state: JobState, specific_state: str, submitted: Optional[str], job_error: Optional[str], exit_code: Optional[int]): self.state = state self.specific_state = specific_state self.submitted = submitted self.job_error = job_error self.exit_code = exit_code def arcstat( jobids: Optional[List[int]] = None, log: Optional[logging.Logger] = None, timeout: int = 5, show_unavailable: bool = False) -> Dict[str, Arcstat]: cmd = ['arcstat', '-l', '--timeout', str(timeout)] if jobids is None: cmd.append('-a') else: cmd.extend(map(str, jobids)) if show_unavailable: cmd.append('-u') with Popen(cmd, stdout = PIPE, encoding = 'utf-8') as process: jobstats = {} line_number = 0 def parse_error(msg): if log: log.error('Unexpected output from arcstat at line %d: %s' % (line_number, msg)) else: raise ParseError('Unexpected output from arcstat at line %d: %s' % (line_number, msg)) def convert(jobid, jobstat): if jobstat['State'] == 'Undefined': state = J_UNDEFINED specific_state = None elif 'Specific state' in jobstat: state = jobstate_of_str(jobstat['State']) specific_state = jobstat['Specific state'] else: raise ParseError('Missing "State" or "Specific state" for %s.' % jobid) return Arcstat(state = state, specific_state = specific_state, exit_code = map_option(int, jobstat.get('Exit code')), submitted = jobstat.get('Submitted'), job_error = jobstat.get('Job Error')) jobid: Optional[str] = None jobfield: Optional[str] = None jobstat: Dict[str, str] = {} assert process.stdout for line in process.stdout: line_number += 1 if line.endswith('\n'): line = line[0:-1] if line.startswith('No jobs') or line.startswith('Status of '): break if line.startswith('Job:'): if not jobid is None: jobstats[jobid] = convert(jobid, jobstat) jobid = line[4:].strip() jobstat = {} jobfield = None elif line.startswith('Warning:'): if log: log.warning(line) elif line == '': pass elif line.startswith(' '): if jobfield is None: parse_error('Continuation line %r before job field.') continue jobstat[jobfield] += '\n' + line elif line.startswith(' '): kv = line.strip() try: jobfield, v = kv.split(':', 1) if jobid is None: parse_error('Missing "Job: ..." header before %r' % kv) continue jobstat[jobfield] = v.strip() except ValueError: parse_error('Expecting ": ", got %r' % line) continue else: parse_error('Unrecognized output %r' % line) if not jobid is None: jobstats[jobid] = convert(jobid, jobstat) return jobstats FileType = NewType('FileType', int) DIR = FileType(0) FILE = FileType(1) NEITHER_DIR_NOR_FILE = FileType(2) def file_type_of_str(s) -> FileType: if s == 'dir': return DIR if s == 'file': return FILE return NEITHER_DIR_NOR_FILE class ArclsEntry: def __init__( self, *, name: str, type_: FileType, size: int, modified: Optional[datetime] = None, checksum: Optional[str] = None, latency: Optional[str] = None): self.filename = name self.entry_type = type_ self.size = size self.modified = modified self.checksum = checksum self.latency = latency class PerfProcess(Generic[Alpha]): program: str def __init__(self, args: List[str], perflog: Optional[NagiosPerflog], perfindex: Optional[str]): # pylint: disable=R1732 self._command = [self.program] + list(map(str, args)) self._perflog = perflog self._perfindex = perfindex self._start_time = time.time() env = os.environ.copy() env["TZ"] = "UTC" self._popen = \ Popen(self._command, stdout=PIPE, stderr=PIPE, encoding='utf-8', env=env) self._result: Optional[Result[Alpha, CalledProcessError]] = None def __enter__(self): self._popen = self._popen.__enter__() return self def __exit__(self, exc_type, exc_value, exc_tb): self._popen.__exit__(exc_type, exc_value, exc_tb) def _ok(self, stdout: str) \ -> Result[Alpha, CalledProcessError]: return ResultOk(stdout) def _error(self, returncode: int, stderr: str) \ -> Result[Alpha, CalledProcessError]: exn = CalledProcessError(returncode, self._command, stderr) return ResultError(exn) def communicate(self) -> Result[Alpha, CalledProcessError]: if self._result is None: stdout, stderr = self._popen.communicate() returncode = self._popen.returncode run_time = time.time() - self._start_time if self._perflog: label = self.program + '_time' if self._perfindex: self._perflog.addi( label, self._perfindex, run_time, uom = 's', limit_min = 0) else: self._perflog.add(label, run_time, uio = 's', limit_min = 0) if returncode == 0: self._result = self._ok(stdout) else: self._result = self._error(returncode, stderr) return self._result class ArcsubProcess(PerfProcess): program = 'arcsub' def __init__( self, jobdesc_files: List[str], *, cluster: Optional[str] = None, jobids_to_file: Optional[str] = None, timeout: Optional[float] = None, perflog: NagiosPerflog, loglevel: Optional[str] = None, submissioninterface: Optional[str] = None, infointerface: Optional[str] = None): args = jobdesc_files if cluster: args += ['-C', cluster] if ':' in cluster: perfindex = host_of_uri(cluster) else: perfindex = cluster else: perfindex = None if jobids_to_file: args += ['-o', jobids_to_file] if timeout: args += ['-t', _time_arg(timeout)] if loglevel: args += ['-d', loglevel] if submissioninterface: args += ['-S', submissioninterface] if infointerface: args += ['-I', infointerface] PerfProcess.__init__(self, args, perflog, perfindex) class ArcgetProcess(PerfProcess): program = 'arcget' def __init__( self, job_id: str, top_output_dir: Optional[str] = None, timeout: Optional[float] = None, perflog: Optional[NagiosPerflog] = None): ce_host = host_of_uri(job_id) args = [job_id] if not timeout is None: args += ['-t', _time_arg(timeout)] if not top_output_dir is None: args += ['-D', top_output_dir] PerfProcess.__init__(self, args, perflog, ce_host) class ArckillProcess(PerfProcess): program = 'arckill' def __init__( self, job_id: str, force: bool = False, timeout: Optional[float] = None, perflog: Optional[NagiosPerflog] = None): # pylint: disable=unused-argument ce_host = host_of_uri(job_id) args = [job_id] if not timeout is None: args += ['-t', _time_arg(timeout)] PerfProcess.__init__(self, args, perflog, ce_host) class ArccleanProcess(PerfProcess): program = 'arcclean' def __init__( self, job_id: str, force: bool = False, timeout: Optional[float] = None, perflog: Optional[NagiosPerflog] = None): ce_host = host_of_uri(job_id) args = [job_id] if not timeout is None: args += ['-t', _time_arg(timeout)] if force: args.append('-f') PerfProcess.__init__(self, args, perflog, ce_host) class ArcrmProcess(PerfProcess): program = 'arcrm' def __init__( self, url: str, force: bool = False, timeout: Optional[float] = None, perflog: Optional[NagiosPerflog] = None): se_host = host_of_uri(url) args = [url] if not timeout is None: args += ['-t', _time_arg(timeout)] if force: args.append('-f') PerfProcess.__init__(self, args, perflog, se_host) class ArccpProcess(PerfProcess): program = 'arccp' def __init__( self, src_url: str, dst_url: str, timeout: Optional[float] = 20, transfer: bool = True, perflog: Optional[NagiosPerflog] = None): se_host = None if ':' in src_url: se_host = host_of_uri(src_url) elif ':' in dst_url: se_host = host_of_uri(dst_url) args = [src_url, dst_url] if not timeout is None: args += ['-t', _time_arg(timeout)] if not transfer: args.append('-T') PerfProcess.__init__(self, args, perflog, se_host) def _str_or_na(arg: str) -> Optional[str]: if arg == '(n/a)': return None return arg def _parse_modified(arg: str) -> Optional[datetime]: try: return datetime.fromisoformat(arg) except ValueError: return None # Maps arcls headers to ArclsEntry.__init__ arguments. _ARCLS_COLUMNS: Dict[str, Tuple[str, Callable[[str], Any]]] = { '': ('name', str), '': ('type_', file_type_of_str), '': ('size', int), '': ('modified', _parse_modified), '': ('checksum', _str_or_na), '': ('latency', _str_or_na) } _ARCLS_DATE_RE = re.compile(r'(\d{4}-\d{2}-\d{2}) (\d{2}:\d{2}:\d{2})') _ARCLS_DATE_REPL = r'\1T\2+00:00' # datetime.fromisoformat does not support Z class ArclsProcess(PerfProcess[List[ArclsEntry]]): program = 'arcls' def __init__(self, url: str, timeout: Optional[float] = 20, perflog: Optional[NagiosPerflog] = None): se_host = host_of_uri(url) args = ['-l', url] if not timeout is None: args += ['-t', _time_arg(timeout)] PerfProcess.__init__(self, args, perflog, se_host) def _ok(self, stdout: str) -> Result[List[ArclsEntry], CalledProcessError]: entries = [] lines = stdout.split('\n')[:-1] header_line = lines[0] header_fields = header_line.split() columns = [] for column, header_field in enumerate(header_fields): if header_field in _ARCLS_COLUMNS: key = _ARCLS_COLUMNS[header_field][0] convert = _ARCLS_COLUMNS[header_field][1] columns.append((key, column, convert)) for line in lines[1:]: # The column contains a space, so convert it to RFC 3339. line = _ARCLS_DATE_RE.sub(_ARCLS_DATE_REPL, line) # Limit the split in case the first field, which should be the file # name, contains spaces. fields = line.rsplit(None, len(header_fields) - 1) if len(fields) != len(header_fields): raise RuntimeError( 'Line %r does not match header %r in output from %s' % (line, header_line, ' '.join(self._command))) kwargs = dict((key, convert(fields[column])) for key, column, convert in columns) entries.append(ArclsEntry(**kwargs)) return ResultOk(entries) class ArcClient: def __init__(self, perflog: Optional[NagiosPerflog] = None): self._perflog = perflog def arcsub(self, *args, **kwargs) -> str: kwargs['perflog'] = self._perflog with ArcsubProcess(*args, **kwargs) as process: return process.communicate() def arcget(self, *args, **kwargs) -> str: kwargs['perflog'] = self._perflog with ArcgetProcess(*args, **kwargs) as process: return process.communicate() def arcrm(self, *args, **kwargs) -> str: kwargs['perflog'] = self._perflog with ArcrmProcess(*args, **kwargs) as process: return process.communicate() def arcclean(self, *args, **kwargs) -> str: kwargs['perflog'] = self._perflog with ArccleanProcess(*args, **kwargs) as process: return process.communicate() def arckill(self, *args, **kwargs) -> str: kwargs['perflog'] = self._perflog with ArckillProcess(*args, **kwargs) as process: return process.communicate() def arccp(self, *args, **kwargs) -> str: kwargs['perflog'] = self._perflog with ArccpProcess(*args, **kwargs) as process: return process.communicate() def arcls(self, *args, **kwargs) \ -> Result[List[ArclsEntry], CalledProcessError]: kwargs['perflog'] = self._perflog with ArclsProcess(*args, **kwargs) as process: return process.communicate() nordugrid-arc-nagios-plugins-3.1.1/arcnagios/substitution.py0000644000175000002070000002352215002373741025233 0ustar mockbuildmock00000000000000"""Expanding substitutions in the configuration file.""" import os import random import re import time from typing import Callable, Dict, Optional from configparser import ConfigParser import ldap # type:ignore from arcnagios.nagutils import ServiceUnknown from arcnagios.reputation import ReputationTracker Environment = Dict[str, str] SubstitutionFunction = \ Callable[[ConfigParser, str, str, ReputationTracker, Environment], None] def get_interp_opt(config: ConfigParser, section: str, var: str, reputation_tracker: ReputationTracker, target_env: Environment) -> Optional[str]: if config.has_option(section, var): import_interpolated_variables( config, section, var, reputation_tracker, target_env) return config.get(section, var, vars = target_env) return None def get_interp(config: ConfigParser, section: str, var: str, reputation_tracker: ReputationTracker, target_env: Environment, default: Optional[str] = None) -> str: value = \ get_interp_opt(config, section, var, reputation_tracker, target_env) \ or default if value is None: raise ServiceUnknown('Configuration error: Missing %s in [%s].' % (var, section)) return value def _subst_option(config: ConfigParser, section: str, var: str, reputation_tracker: ReputationTracker, target_env: Environment) -> None: default = get_interp_opt( config, section, 'default', reputation_tracker, target_env) if default is None: raise ServiceUnknown('Missing required option (-O %s=...).'%var) target_env[var] = default def _subst_getenv(config: ConfigParser, section: str, var: str, reputation_tracker: ReputationTracker, target_env: Environment) -> None: default = get_interp_opt( config, section, 'default', reputation_tracker, target_env) envvar = get_interp_opt( config, section, 'envvar', reputation_tracker, target_env) if envvar is None: prefix = get_interp( config, section, 'prefix', reputation_tracker, target_env, '') envvar = prefix + var v = os.getenv(envvar, default) if v is None: raise ServiceUnknown('Missing required option (-O) or ' 'enviroment variable %s.' % envvar) target_env[var] = v def _subst_ldap(config: ConfigParser, section: str, var: str, reputation_tracker: ReputationTracker, target_env: Environment) -> None: basedn = get_interp( config, section, 'basedn', reputation_tracker, target_env) filterstr = get_interp( config, section, 'filter', reputation_tracker, target_env) attribute = get_interp( config, section, 'attribute', reputation_tracker, target_env) attrlist = list(map(str.strip, attribute.split(','))) scope = ldap.SCOPE_SUBTREE errors = [] entries = [] for uri in get_interp(config, section, 'uri', reputation_tracker, target_env).split(): try: conn = ldap.initialize(uri) entries = conn.search_s(basedn, scope, filterstr, attrlist=attrlist) break except (ldap.BUSY, ldap.CONNECT_ERROR, ldap.OTHER, ldap.PROTOCOL_ERROR, ldap.TIMEOUT, ldap.TIMELIMIT_EXCEEDED) as exn: # Exceptions related to presumed server-side issues. errors.append('%s gives %s' % (uri, exn)) except ldap.LDAPError as exn: # Exceptions related to presumed client-side issues. raise ServiceUnknown('LDAP query to %s from %s failed with: %s' % (uri, section, exn)) from exn else: raise ServiceUnknown('LDAP query %s failed with: %s' % (section, exn)) for _, entry in entries: for attr in attrlist: if attr in entry: target_env[var] = entry[attr][0].decode('utf-8') return if config.has_option(section, 'default'): target_env[var] = get_interp( config, section, 'default', reputation_tracker, target_env) else: raise ServiceUnknown('LDAP query %s did not provide a value for %s.' % (filterstr, section)) def _subst_pipe(config: ConfigParser, section: str, var: str, reputation_tracker: ReputationTracker, target_env: Environment) -> None: cmd = get_interp(config, section, 'command', reputation_tracker, target_env) fh = os.popen(cmd) target_env[var] = fh.read().strip() fh.close() def _subst_random_line(config: ConfigParser, section: str, var: str, reputation_tracker: ReputationTracker, target_env: Environment) -> None: path = get_interp( config, section, 'input_file', reputation_tracker, target_env) include = None exclude = set() if config.has_option(section, 'exclude'): exclude = set(get_interp( config, section, 'exclude', reputation_tracker, target_env).split()) if config.has_option(section, 'include'): include = set(get_interp( config, section, 'include', reputation_tracker, target_env).split()) include.difference_update(exclude) rng = random.Random(time.time()) if config.has_option(section, 'reputation_dist'): # Load distribution from a file. dist_name = config.get(section, 'reputation_dist', vars=target_env) with open(path, encoding='utf-8') as fh: lines = set(line for line in map(str.strip, fh) if line != '' and line[0] != '#') if not include is None: lines.intersection_update(include) else: lines.difference_update(exclude) if not lines: raise ServiceUnknown('%s must contain at least one non-excluded ' 'line' % path) target_env[var] = reputation_tracker.choose(dist_name, lines) else: # Uniform distribution. chosen_line = None try: with open(path, encoding='utf-8') as fh: seen_count = 0 for line in fh: line = line.strip() if not line or line.startswith('#') \ or not include is None and not line in include \ or include is None and line in exclude: continue if rng.randint(0, seen_count) == 0: chosen_line = line seen_count += 1 except IOError as exn: raise ServiceUnknown(str(exn)) from exn if chosen_line is None: raise ServiceUnknown('%s must contain at least one non-excluded ' 'line' % path) target_env[var] = chosen_line def _subst_strftime(config: ConfigParser, section: str, var: str, reputation_tracker: ReputationTracker, target_env: Environment) -> None: if config.has_option(section, 'raw_format'): fmt = config.get(section, 'raw_format', vars = target_env, raw=True) else: fmt = get_interp( config, section, 'format', reputation_tracker, target_env) target_env[var] = time.strftime(fmt) def _subst_switch(config: ConfigParser, section: str, var: str, reputation_tracker: ReputationTracker, target_env: Environment) -> None: case = 'case[%s]' % get_interp( config, section, 'index', reputation_tracker, target_env) if config.has_option(section, case): import_interpolated_variables( config, section, case, reputation_tracker, target_env) target_env[var] = get_interp( config, section, case, reputation_tracker, target_env) else: if not config.has_option(section, 'default'): raise ServiceUnknown( 'No %s and no default in section variable.%s.'%(case, var)) import_interpolated_variables( config, section, 'default', reputation_tracker, target_env) target_env[var] = get_interp( config, section, 'default', reputation_tracker, target_env) _METHODS_BY_NAME: Dict[str, SubstitutionFunction] = { 'getenv': _subst_getenv, 'ldap': _subst_ldap, 'option': _subst_option, 'pipe': _subst_pipe, 'random_line': _subst_random_line, 'strftime': _subst_strftime, 'switch': _subst_switch, } def register_substitution_method( name: str, f: SubstitutionFunction): _METHODS_BY_NAME[name] = f _INTERP_RE = re.compile(r'%\(([a-zA-Z0-9_]+)\)') def import_interpolated_variables( config: ConfigParser, section: str, var: str, reputation_tracker: ReputationTracker, target_env: Environment) \ -> None: """Import variables needed for expanding ``var`` in ``section``.""" raw_value = config.get(section, var, raw = True) for mo in re.finditer(_INTERP_RE, raw_value): v = mo.group(1) if not v in target_env: import_variable(config, v, reputation_tracker, target_env) def import_variable(config: ConfigParser, var: str, reputation_tracker: ReputationTracker, target_env: Environment) -> None: """Import ``var`` by executing its defining section, populating ``target_env`` with its value and the values of any dependent variables.""" section = 'variable.' + var method = config.get(section, 'method') try: return _METHODS_BY_NAME[method]\ (config, section, var, reputation_tracker, target_env) except KeyError: # pylint: disable=W0707 raise ServiceUnknown('Unknown substitution method %s.' % method) nordugrid-arc-nagios-plugins-3.1.1/arcnagios/arcclients.py0000644000175000002070000001561115002373741024606 0ustar mockbuildmock00000000000000import asyncio from logging import Logger, getLogger from typing import Awaitable, Callable, Dict, Optional, List from arcnagios.utils import Alpha, Result, ResultOk, ResultError, map_option from arcnagios.arcutils import Arcstat, J_UNDEFINED, jobstate_of_str, ParseError class ArcClientError: returncode: int prog: str stdout: str def __init__(self, returncode, prog, stdout, stderr): self.returncode = returncode self.prog = prog self.output = stdout # reconsider usage self._stderr = stderr # logged by this module _DEFAULT_LOG = getLogger(__name__) def _parse_nothing( returncode: int, stdout: str, stderr: str, log: Logger ) -> Result[None, None]: # pylint: disable=W0613 if returncode == 0: return ResultOk(None) else: return ResultError(None) class ArcstatResponse: jobs: Dict[str, Arcstat] def __init__(self, jobs: Dict[str, Arcstat]): self.jobs = jobs def _convert_arcstat(jobid, jobstat): if jobstat['State'] == 'Undefined': state = J_UNDEFINED specific_state = None elif 'Specific state' in jobstat: state = jobstate_of_str(jobstat['State']) specific_state = jobstat['Specific state'] else: raise ParseError('Missing "State" or "Specific state" for %s.' % jobid) return Arcstat(state = state, specific_state = specific_state, exit_code = map_option(int, jobstat.get('Exit code')), submitted = jobstat.get('Submitted'), job_error = jobstat.get('Job Error')) def _parse_arcstat_response( returncode: int, stdout: str, stderr: str, log: Logger ) -> Result[ArcstatResponse, None]: # pylint: disable=W0613 # A return code 1 can mean "No jobs found, try later" or a real error, but # at least the following should trigger if the system is unable to launch # the command. if returncode > 1: return ResultError(None) jobstats = {} line_number = 0 def parse_error(msg): log.warning('Unparsed line %s: %s', line_number, msg) jobid: Optional[str] = None jobfield: Optional[str] = None jobstat: Dict[str, str] = {} def flush(): nonlocal jobid, jobstat, jobstats if jobid is None: return if not jobstat: log.warning('Not further information for %s', jobid) jobid = None return jobstats[jobid] = _convert_arcstat(jobid, jobstat) jobid = None jobstat = {} for line in stdout.split('\n'): line_number += 1 if line.startswith('No jobs') or line.startswith('Status of '): break if line == '': continue if line.startswith('Job:'): flush() jobid = line[4:].strip() log.debug('Found %s', jobid) jobstat = {} jobfield = None elif line.startswith('Warning:'): log.warning('%s', line) elif line.startswith(' '): if jobfield is None: parse_error('Continuation line %r before job field.') continue jobstat[jobfield] += '\n' + line elif line.startswith(' '): kv = line.strip() try: jobfield, v = kv.split(':', 1) if jobid is None: parse_error('Missing "Job: ..." header before %r' % kv) continue jobstat[jobfield] = v.strip() except ValueError: parse_error('Expecting ": ", got %r' % line) continue else: parse_error('Unrecognized output %r' % line) flush() return ResultOk(ArcstatResponse(jobstats)) async def _call_arc_process( prog: str, args: List[str], parse: Callable[[int, str, str, Logger], Result[Alpha, None]], log: Logger ) -> Result[Alpha, ArcClientError]: # FIXME: Is there a proper way to pass a modified environment to # asyncio.subprocess.create_subprocess_exec? prog_utc, args_utc = "/usr/bin/env", ["TZ=UTC", prog] + args log.debug('Calling %s %s', prog, ' '.join(args)) # pylint: disable=E1101 # asyncio.subprocess is not subprocess proc = await asyncio.subprocess.create_subprocess_exec( prog_utc, *args_utc, stdin=asyncio.subprocess.DEVNULL, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE) stdout_, stderr_ = await proc.communicate() stdout = stdout_.decode('utf-8') stderr = stderr_.decode('utf-8') returncode = proc.returncode assert returncode is not None result = parse(returncode, stdout, stderr, log) if isinstance(result, ResultOk): return result for line in stderr.split('\n'): if line: log.error('[%s/%d] %s', prog, proc.pid, line) return ResultError(ArcClientError(returncode, prog, stdout, stderr)) # Jobs def arcget( job_id: str, top_output_dir: Optional[str] = None, timeout: Optional[float] = None, log: Logger = _DEFAULT_LOG, ) -> Awaitable[Result[None, ArcClientError]]: args = [job_id] if not timeout is None: args += ['-t', str(int(timeout + 0.5))] if not top_output_dir is None: args += ['-D', top_output_dir] return _call_arc_process('arcget', args, _parse_nothing, log) def arcstat( jobids: Optional[List[str]] = None, timeout: float = 5.0, show_unavailable: bool = False, log: Logger = _DEFAULT_LOG, ) -> Awaitable[Result[ArcstatResponse, ArcClientError]]: args = ['-l', '-t', str(int(timeout + 0.5))] if jobids is None: args.append('-a') else: args.extend(jobids) if show_unavailable: args.append('-u') return _call_arc_process('arcstat', args, _parse_arcstat_response, log) def arcclean( job_id: str, force: bool = False, timeout: Optional[float] = None, log: Logger = _DEFAULT_LOG, ) -> Awaitable[Result[None, ArcClientError]]: args = [job_id] if not timeout is None: args += ['-t', str(int(timeout + 0.5))] if force: args.append('-f') return _call_arc_process('arcclean', args, _parse_nothing, log) def arckill( job_id: str, timeout: Optional[float] = None, log: Logger = _DEFAULT_LOG, ) -> Awaitable[Result[None, ArcClientError]]: args = [job_id] if not timeout is None: args += ['-t', str(int(timeout + 0.5))] return _call_arc_process('arckill', args, _parse_nothing, log) # Storage def arcrm( url: str, force: bool, timeout: float, log: Logger = _DEFAULT_LOG, ) -> Awaitable[Result[None, ArcClientError]]: args = [url] if not timeout is None: args += ['-t', str(int(timeout + 0.5))] if force: args.append('-f') return _call_arc_process('arcrm', args, _parse_nothing, log) nordugrid-arc-nagios-plugins-3.1.1/arcnagios/reputation.py0000644000175000002070000001552515002373741024655 0ustar mockbuildmock00000000000000# pylint: disable=C0103 import logging import math import random import sqlite3 import time from typing import Callable, Dict, List, Optional, Set from configparser import ConfigParser _default_log = logging.getLogger(__name__) def _solve(f: Callable[[float], float], eps: float, x0: float, x1: float) \ -> float: """Solves f(x) = 0 for x ∈ (x0, x1], assuming a unique solution exists.""" u0 = f(x0) u1 = f(x1) while abs(u1) > eps: if u0 * u1 > 0.0: raise RuntimeError('Cannot solve, same sign.') x = 0.5 * (x0 + x1) u = f(x) if u * u0 < 0.0: x1 = x u1 = u else: x0 = x u0 = u return x1 def _func(lambda_: float) -> Callable[[float], float]: def f(pS: float): return math.exp(1.0 - lambda_ / pS) return f def _norm(success_dist: Dict[str, float]) -> Callable[[float], float]: def f(lambda_: float): return sum(_func(lambda_)(pS) for pS in success_dist.values()) - 1.0 return f def choice_dist_of_success_dist(success_dist: Dict[str, float]) \ -> Dict[str, float]: success_dist = dict((k, p) for (k, p) in success_dist.items() if p > 0.0) lambda0 = _solve(_norm(success_dist), 1e-6, 0.0, 20.0) f0 = _func(lambda0) return dict((term, f0(pS)) for (term, pS) in success_dist.items()) _CREATE_SQL = """\ CREATE TABLE arcnagios_reputation ( dist_name text NOT NULL, choice_name text NOT NULL, update_time double precision NOT NULL, recent_count double precision NOT NULL, recent_success double precision NOT NULL, PRIMARY KEY (dist_name, choice_name) )""" _PAST_DIST_NAMES_SQL = """\ SELECT DISTINCT dist_name FROM arcnagios_reputation """ _PAST_CHOICE_NAMES_SQL = """\ SELECT choice_name FROM arcnagios_reputation WHERE dist_name = ? """ _FETCH_SQL = """\ SELECT choice_name, recent_count, recent_success FROM arcnagios_reputation WHERE dist_name = ? """ _SUBMIT_SELECT_SQL = """\ SELECT update_time, recent_count, recent_success FROM arcnagios_reputation WHERE dist_name = ? AND choice_name = ? """ _SUBMIT_INSERT_SQL = """\ INSERT INTO arcnagios_reputation \ (update_time, recent_count, recent_success, dist_name, choice_name) VALUES (?, ?, ?, ?, ?) """ _SUBMIT_UPDATE_SQL = """\ UPDATE arcnagios_reputation SET update_time = ?, recent_count = ?, recent_success = ? WHERE dist_name = ? AND choice_name = ? """ class ReputationTracker: def __init__(self, config: ConfigParser, db_path: str, log: logging.Logger = _default_log): self._log = log self._config = config self._db_path = db_path self._db: Optional[sqlite3.Connection] = None self._choices: Dict[str, str] = {} def _config_float(self, var: str, default: float) -> float: if self._config.has_section('reputation') \ and self._config.has_option('reputation', var): return self._config.getfloat('reputation', var) return default def _config_dist_float(self, dist_name: str, var: str, default: Optional[float] = None) -> Optional[float]: section_name = 'reputation_dist:' + dist_name if self._config.has_section(section_name) \ and self._config.has_option(section_name, var): return self._config.getfloat(section_name, var) return default @property def _busy_timeout(self) -> float: return self._config_float('busy_timeout', 10.0) @property def _default_sample_lifetime(self) -> float: return self._config_float('sample_lifetime', 172800.0) def _connect(self) -> sqlite3.Connection: if self._db is None: self._db = sqlite3.connect(self._db_path, self._busy_timeout) try: self._db.execute(_CREATE_SQL) except sqlite3.OperationalError: pass return self._db def past_dist_names(self) -> List[str]: db = self._connect() return [name for (name,) in db.execute(_PAST_DIST_NAMES_SQL)] def past_choice_names(self, dist_name: str) -> List[str]: db = self._connect() return [name for (name,) in db.execute(_PAST_CHOICE_NAMES_SQL, (dist_name,))] def disconnect(self) -> None: if not self._db is None: self._db.close() self._db = None def success_dist(self, dist_name: str) -> Dict[str, float]: db = self._connect() cur = db.execute(_FETCH_SQL, (dist_name,)) return dict((k, (nS + 0.25) / (n + 0.5)) for (k, n, nS) in cur) def choice_dist(self, dist_name: str, choice_names: Set[str], success_dist: Optional[Dict[str, float]] = None) \ -> Dict[str, float]: if success_dist is None: success_dist = self.success_dist(dist_name) if success_dist == {}: avg_success = 0.5 else: avg_success = sum(success_dist.values()) / len(success_dist) restricted_success_dist = \ dict((k, success_dist.get(k, avg_success)) for k in choice_names) return choice_dist_of_success_dist(restricted_success_dist) def submit(self, dist_name: str, choice_name: str, is_success: bool) \ -> None: db = self._connect() rows = db.execute(_SUBMIT_SELECT_SQL, (dist_name, choice_name)) \ .fetchall() t_now = time.time() if rows == []: t_past, recent_count, recent_success = (t_now, 0.0, 0.0) else: assert len(rows) == 1 t_past, recent_count, recent_success = rows[0] sample_lifetime = self._config_dist_float(dist_name, 'sample_lifetime') scale = math.exp((t_past - t_now) \ / (sample_lifetime or self._default_sample_lifetime)) recent_count = scale * recent_count + 1.0 recent_success *= scale if is_success: recent_success += 1.0 db.execute( rows == [] and _SUBMIT_INSERT_SQL or _SUBMIT_UPDATE_SQL, (t_now, recent_count, recent_success, dist_name, choice_name)) db.commit() def _choose_otr(self, dist_name: str, choice_names: Set[str]) -> str: choice_dist = self.choice_dist(dist_name, choice_names) p = random.uniform(0.0, 1.0) for (choice_name, pS) in choice_dist.items(): p -= pS if p < 0.0: return choice_name return choice_names.pop() # precision loss, return any def choose(self, dist_name: str, choice_names: Set[str]) -> str: if not choice_names: raise ValueError("ReputationTracker.choose expects a non-empty " "sequence of choices.") choice_name = self._choose_otr(dist_name, choice_names) self._choices[dist_name] = choice_name return choice_name def choices(self) -> Dict[str, str]: return self._choices nordugrid-arc-nagios-plugins-3.1.1/arcnagios/se/0000755000175000002070000000000015002373741022510 5ustar mockbuildmock00000000000000nordugrid-arc-nagios-plugins-3.1.1/arcnagios/se/__init__.py0000644000175000002070000000000015002373741024607 0ustar mockbuildmock00000000000000nordugrid-arc-nagios-plugins-3.1.1/arcnagios/se/check_gridstorage.py0000644000175000002070000002174315002373741026540 0ustar mockbuildmock00000000000000"""check_gridstorage - Attempts to read, write, and list files in grid storage.""" import os import datetime import time from typing import List from arcnagios import vomsutils from arcnagios.arcutils import ArcClient from arcnagios.nagutils import NagiosPlugin, OK, CRITICAL, UNKNOWN, \ ServiceCritical, ServiceReport from arcnagios.confargparse import UsageError from arcnagios.utils import log_process_error class Check_gridstorage(NagiosPlugin, vomsutils.NagiosPluginVomsMixin): # pylint: disable=super-init-not-called,invalid-name main_config_section = ['gridstorage'] def __init__(self): NagiosPlugin.__init__(self, use_host = True, use_port = True) self.arcclient = None argp = self.argparser.add_argument_group('Probe-Specific Options') argp.add_argument('--url', dest = 'url', help = 'The remote URL on which to perform the tests.') argp.add_argument('--write-url', dest = 'write_url', help = 'The URL for the initial write operation if different ' 'from the other URLs. This is primarily used for LFC.') argp.add_argument('--dir-url', dest = 'dir_url', help = 'The URL of the directory holding the test file. ' 'A file name including the host name and a time stamp ' 'will be appended. This option is only useful if you ' 'enable write operations, and it will not work ' 'correctly for LFC.') argp.add_argument('--disable-read', dest = 'enable_read', default = True, action = 'store_false', help = 'Disable the read check on the the URL.') argp.add_argument('--disable-list', dest = 'enable_list', default = True, action = 'store_false', help = 'Disable the list check on the the URL.') argp.add_argument('--list-dir', dest = 'list_dir', default = False, action = 'store_true', help = 'List the URL of the directory containing the file ' 'rather than the file itself. This will use ' '--dir-url if provided, otherwise it will use --url ' 'after stripping the last component.') argp.add_argument('--enable-write', dest = 'enable_write', default = False, action = 'store_true', help = 'Enable write and delete operations on the url. ' 'If enabled a file with a fairly unique content will ' 'be written before any list and read operations, ' 'and the file will be deleted after.') argp.add_argument('-t', '--timeout', dest = 'timeout', type = int, default = 120, help = 'Timeout. This is divided among the sub-tasks, so ' 'individual operations will get shorter times to ' 'complete. The minimum value is 5 seconds.') argp.add_argument('--granular-perfdata', dest = 'granular_perfdata', default = False, action = 'store_true', help = 'Report ARC command timing performance data per host ' 'using labels of the form ARCCMD[HOST]. By default ' 'report the aggretate time across hosts.') self._clean_url = None self._t_start = time.time() self._time_slots = 0 self._grace_time = None def parse_args(self, args: List[str]) -> None: NagiosPlugin.parse_args(self, args) if not self.opts.url and not self.opts.dir_url: raise UsageError('You must provide either a --dir-url or a --url.') self.arcclient = ArcClient(self.perflog) def time_left(self) -> float: return self.opts.timeout - time.time() + self._t_start def next_timeout(self, which: str) -> float: assert self._time_slots > 0 timeout = self.time_left() / self._time_slots - self._grace_time self._time_slots -= 1 if timeout < 1: if self._clean_url and self._time_slots > 0: timeout = self.time_left() self.arcclient.arcrm(self._clean_url, timeout = timeout).get() raise ServiceCritical('Insufficient time for %s.'%which) return int(timeout) def check(self) -> ServiceReport: # Allocate up to 4 time slots for the main subtasks, and reserve a # fraction of the total time for the script itself and fork/exec # overhead. self._time_slots = \ (self.opts.enable_read and 2 or 0) + \ (self.opts.enable_write and 1 or 0) + \ (self.opts.enable_list and 1 or 0) self._grace_time = self.opts.timeout * 0.02 host = self.opts.host or 'localhost' timestamp = datetime.datetime.now().strftime('%Y%m%dT%H%M%S') self.require_voms_proxy() if self.opts.url: url = self.opts.url filename = os.path.basename(url) else: filename = '%s_%s.data'%(host, timestamp) url = os.path.join(self.opts.dir_url, filename) self.log.info('Performing checks on %s', url) failed_ops = [] if self.opts.enable_write: timeout = self.next_timeout('write') uploaded_contents = 'Created by check_gridstorage %s for %s.\n' \ % (timestamp, self.opts.host or 'localhost') write_url = self.opts.write_url or url try: fd, outbound = self.mkstemp(prefix = 'outbound') fh = os.fdopen(fd, 'w') fh.write(uploaded_contents) fh.close() except OSError as exn: self.log.error('%s', exn) self.nagios_exit(UNKNOWN, 'Could not create test file.') arccp_result = \ self.arcclient.arccp(outbound, write_url, timeout = timeout) if arccp_result.is_ok(): self._clean_url = write_url self.log.info('Uploaded file.') else: log_process_error(self.log, arccp_result.error, synopsis = 'upload failed', prefix = 'arccp') failed_ops.append('upload') else: uploaded_contents = None if self.opts.enable_list: timeout = self.next_timeout('list') if self.opts.list_dir: list_url = self.opts.dir_url or os.path.dirname(url) else: list_url = url arcls_result = self.arcclient.arcls(list_url, timeout = timeout) if arcls_result.is_ok(): listing = arcls_result.value self.log.info('Listing contains %d entries.', len(listing)) if not any(os.path.basename(ent.filename) == filename for ent in listing): self.log.error('Did not find %s in listing.', filename) failed_ops.append('list') else: log_process_error(self.log, arcls_result.get_error(), synopsis = 'listing failed', prefix = 'arcls') failed_ops.append('list') fetched_contents = None if self.opts.enable_read: timeout = self.next_timeout('read') inbound = self.mktemp(prefix = 'inbound') arccp_result = self.arcclient.arccp(url, inbound, timeout = timeout) if arccp_result.is_ok(): self.remove_at_exit(inbound) try: with open(inbound, encoding='utf-8') as fh: fetched_contents = fh.read() self.log.info('Fetched file.') except OSError as exn: self.log.error('Could not open fetched file %s.', exn) failed_ops.append('open-read') else: log_process_error(self.log, arccp_result.error, synopsis = 'fetch failed', prefix = 'arccp') failed_ops.append('fetch') if uploaded_contents and fetched_contents: if fetched_contents != uploaded_contents: self.log.error('Mismatched content in fetched file.') failed_ops.append('verify') else: self.log.info('Verified content of fetched file.') if self.opts.enable_write: timeout = self.next_timeout('remove') arcrm_result = self.arcclient.arcrm(url, timeout = timeout) if arcrm_result.is_ok(): self.log.info('Removed file.') else: log_process_error(self.log, arcrm_result.error, synopsis = 'removal failed', prefix = 'arcrm') failed_ops.append('remove') if failed_ops: self.nagios_exit(CRITICAL, 'Failed to %s.'%', '.join(failed_ops)) else: self.nagios_exit(OK, 'Storage tests succeeded.') nordugrid-arc-nagios-plugins-3.1.1/arcnagios/confargparse.py0000644000175000002070000000327315002373741025132 0ustar mockbuildmock00000000000000"""Combined argparse and ConfigParser.""" import argparse from configparser import ConfigParser from typing import NoReturn, List, Union class UsageError(RuntimeError): pass class ConfigError(RuntimeError): pass class ConfigArgumentParser(argparse.ArgumentParser): def error(self, message: str) -> NoReturn: raise UsageError(message) def configure_defaults( self, config: ConfigParser, defaults_section: Union[List[str], str, None]) -> None: """Merge those configurations files in the list `config_paths` which exist and return the corresponding `ConfigParser` object. Later files take presedence over former. Variables from `defaults_section` will be used as defaults for the argument parser options.""" if not defaults_section: defaults_section = [] elif not isinstance(defaults_section, list): defaults_section = [defaults_section] for section in defaults_section: if config.has_section(section): defaults = [(var, config.get(section, var)) for var in config.options(section)] self.set_defaults(**dict(defaults)) break # TODO: Validation can be refined. Ideally we should check each config # variable individually and report a) the variable name rather than # the option name and b) the line number of the bad value. try: self.parse_args() except argparse.ArgumentTypeError as exn: raise ConfigError( 'The configuration corresponding to a command-line ' 'option failed: %s' % exn) from exn nordugrid-arc-nagios-plugins-3.1.1/arcnagios/vomsutils.py0000644000175000002070000001753415002373741024532 0ustar mockbuildmock00000000000000"""Helpers for maintaining a VOMS proxy.""" from datetime import datetime from configparser import ConfigParser import logging import os import subprocess import time from functools import cached_property from typing import Any, NoReturn, Optional, Protocol from cryptography import x509 from cryptography.hazmat.backends import default_backend from arcnagios import confargparse from arcnagios.nagutils import UNKNOWN log = logging.getLogger(__name__) class ProxyInitError(EnvironmentError): def __init__(self, msg, details = None): EnvironmentError.__init__(self, msg) self.details = details def _x509_certificate_not_valid_after(cert_path: str): with open(cert_path, mode='rb') as cert_fh: data = cert_fh.read() backend = default_backend() return x509.load_pem_x509_certificate(data, backend).not_valid_after def _require_voms_proxy(voms: Optional[str], key_path: Optional[str], cert_path: Optional[str], proxy_path: str, min_proxy_lifetime: Optional[float], max_proxy_age: Optional[float]): # Check for an existing proxy with sufficient time left. log.debug('Checking %s', proxy_path) if not os.path.exists(proxy_path): must_renew = True else: must_renew = False if not min_proxy_lifetime is None: t_xpr = _x509_certificate_not_valid_after(proxy_path) t_rem = t_xpr - datetime.now() if t_rem.total_seconds() >= min_proxy_lifetime: log.debug('The proxy certificate is valid until %s.', t_xpr) else: log.debug('The proxy certificate expired %s.', t_xpr) must_renew = True if not max_proxy_age is None: age = time.time() - os.stat(proxy_path).st_mtime if age > max_proxy_age: must_renew = True # Renew if needed. if must_renew: # Make sure the directory exists. proxy_dir = os.path.dirname(proxy_path) if not os.path.exists(proxy_dir): try: log.debug('Creading directory %s.', proxy_dir) os.makedirs(proxy_dir) except OSError as exn: raise ProxyInitError('Cannot create parent directory %s for ' 'storing X509 proxy: %s' % (proxy_dir, exn)) from exn log.debug('Renewing proxy.') cmd = ['arcproxy'] if not key_path is None: cmd.extend(['-K', key_path]) if not cert_path is None: cmd.extend(['-C', cert_path]) if not voms is None: cmd.extend(['-S', voms]) if not proxy_path is None: cmd.extend(['-P', proxy_path]) with subprocess.Popen( cmd, stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, encoding='utf-8') as process: output, _ = process.communicate() if process.returncode: raise ProxyInitError( 'Failed to initialize proxy: arcproxy exited with %d.' % process.returncode, details = 'Output from %s:\n%s' % (cmd, output)) class NagiosPluginVomsProtocol(Protocol): argparser: confargparse.ConfigArgumentParser opts: Any log: logging.Logger @cached_property def config(self) -> ConfigParser: ... def tmpdir(self) -> str: ... def nagios_exit(self, status_code: int, status_message: Optional[str] = None, subject: Optional[str] = None) -> NoReturn: ... class NagiosPluginVomsMixin(NagiosPluginVomsProtocol): def __init__(self): super().__init__() argp = self.argparser.add_argument_group('VOMS Proxy Options') argp.add_argument('--user-proxy', dest = 'user_proxy', help = 'Path to a possibly pre-initialized VOMS proxy.') argp.add_argument('--user-cert', dest = 'user_cert', help = 'Certificate to use for obtaining VOMS proxy.') argp.add_argument('--user-key', dest = 'user_key', help = 'Certificate key to use for obtaining VOMS proxy.') argp.add_argument('--voms', dest = 'voms', help = 'VOMS server for Proxy initialization.') argp.add_argument('--min-proxy-lifetime', dest = 'min_proxy_lifetime', # The default should be strictly larger than the time before # jobs are cancelled. We need at least the time between # monitor runs plus grace time. default = 7 * 3600, help = 'The minimum lifetime in seconds of the proxy ' 'certificate before it is renewed. ' 'X.509 extensions are not considered, so if they have ' 'a significatly different lifetime then the certificate ' 'itself, consider using --max-proxy-age with a ' 'conservative value. ' 'This is only effective if a user key and certificate is ' 'provided. ' 'This is also only effective if the platform supports ' 'the cryptography module, otherwise use ' '--max-proxy-age.') argp.add_argument('--max-proxy-age', dest = 'max_proxy_age', help = 'The maximum age of the proxy certificate file before ' 'renewing the proxy. ' 'If set, this will trigger a proxy renewal independent of ' '--min-proxy-lifetime. ' 'By default this is unset, but if the cryptography is ' 'missing, 4 * 3600 is assumed where needed.') @property def voms(self) -> Optional[str]: if self.opts.voms: return self.opts.voms if self.config.has_option('gridproxy', 'default_voms'): return self.config.get('gridproxy', 'default_voms') return None def voms_suffixed(self, name: str, ext: str = ''): if self.voms: return name + '-' + self.voms + ext else: return name + ext def require_voms_proxy(self) -> Optional[str]: key_path = self.opts.user_key cert_path = self.opts.user_cert proxy_path = self.opts.user_proxy voms = self.voms for section in ['gridproxy.%s' % voms, 'gridproxy']: if not key_path and self.config.has_option(section, 'user_key'): key_path = self.config.get(section, 'user_key') if not cert_path and self.config.has_option(section, 'user_cert'): cert_path = self.config.get(section, 'user_cert') if not proxy_path and self.config.has_option(section, 'user_proxy'): proxy_path = self.config.get(section, 'user_proxy') if key_path or cert_path: if not proxy_path: proxy_path = os.path.join(self.tmpdir(), self.voms_suffixed('proxy', '.pem')) try: _require_voms_proxy( voms = voms, key_path = key_path, cert_path = cert_path, proxy_path = proxy_path, min_proxy_lifetime = self.opts.min_proxy_lifetime, max_proxy_age = self.opts.max_proxy_age) except ProxyInitError as exn: if exn.details: self.log.error(exn.details) self.nagios_exit(UNKNOWN, str(exn)) if proxy_path: os.environ['X509_USER_PROXY'] = proxy_path return proxy_path else: return os.getenv('X509_USER_PROXY') nordugrid-arc-nagios-plugins-3.1.1/doc/0000755000175000002070000000000015002373741020700 5ustar mockbuildmock00000000000000nordugrid-arc-nagios-plugins-3.1.1/doc/gridstorage.rst0000644000175000002070000000425515002373741023752 0ustar mockbuildmock00000000000000******************** Grid Storage Probe ******************** The ``check_gridstorage`` probe can check file operations against grid storage protocols, including SRM, GridFTP, LFC, and other protocols supported by ``arccp`` and ``arcls``. The main configuration section of this probe is ``gridstorage``, see :ref:`configuration-files`. This probe requires an X509 proxy, see :ref:`x509-proxy`. Basic Invocation ---------------- To perform read-only checks against a URL pointing to an existing file, use:: check_gridstorage --url To perform read-write checks against an URL, use either:: check_gridstorage --url --enable-write check_gridstorage -H --dir-url --enable-write In the latter case, the probe will add a file name based on the host name and a time stamp. The ``--dir-url=`` option will not work correctly with the LFC protocol, since the file name needs to be encoded inside the URL. Performed Checks ---------------- The probe will do the following checks: * If writing is enabled, a small is generated at copied to the provided URL. The contents of the file includes the time and host name passed to ``-H`` or "``localhost``". * The URL is listed and it's checked that the listing contains the name of the uploaded file. * The file is read back. * If writing is enabled, the content is compared to what was written. Since the content contains as host name and a time stamp, it's unlikely that an old file is accidentally matched. * If writing is enabled, the file is deleted. Any failure in the above checks will return CRITICAL to Nagios. Additional Options ------------------ If you wish to do a more thorough list-test, you can ask the probe to list the whole directory containing the test file. This is done by passing ``--list-dir``. This will use ``--dir-url=`` if specified, otherwise it will guess the URL of the containing directory by stripping off the last component of ``--url=``. In any case, the listed URL must contain the test-file. Be aware that if the directory contains many entries, the probe may time out. You can disable the read or list tests with ``--disable-read`` and ``--disable-list``. nordugrid-arc-nagios-plugins-3.1.1/doc/Makefile0000644000175000002070000001107215002373741022341 0ustar mockbuildmock00000000000000# Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = BUILDDIR = _build # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " singlehtml to make a single large HTML file" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " devhelp to make HTML files and a Devhelp project" @echo " epub to make an epub" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " latexpdf to make LaTeX files and run them through pdflatex" @echo " text to make text files" @echo " man to make manual pages" @echo " changes to make an overview of all changed/added/deprecated items" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" clean: -rm -rf $(BUILDDIR)/* html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." singlehtml: $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/NorduGridARCNagiosPlugins.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/NorduGridARCNagiosPlugins.qhc" devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" @echo "# mkdir -p $$HOME/.local/share/devhelp/NorduGridARCNagiosPlugins" @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/NorduGridARCNagiosPlugins" @echo "# devhelp" epub: $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make' in that directory to run these through (pdf)latex" \ "(use \`make latexpdf' here to do that automatically)." latexpdf: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." make -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." text: $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." man: $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." nordugrid-arc-nagios-plugins-3.1.1/doc/sample_config.rst0000644000175000002070000000132515002373741024241 0ustar mockbuildmock00000000000000************************* Example Configuration ************************* .. _example-ini: Probe Configuration =================== This is not meant to be used as is. You will need to uncomment and edit as needed. .. literalinclude:: arcnagios.ini.example :language: ini Nagios Configuration for ``check_arcce_*`` ========================================== This configuration is not meant to be used as is. It is an example which illustrates how to use the entry points of the ``check_arcce_*`` probes and define the associated passive services. Other probes are omitted here, as they are configured as independent services similar to commonly available Nagios probes. .. literalinclude:: services.cfg.example nordugrid-arc-nagios-plugins-3.1.1/doc/arcnagios.ini.example0000644000175000002070000001004715002373741025003 0ustar mockbuildmock00000000000000# Grid Proxy Certificate and VOMS Attributes # ========================================== [gridproxy] # The default VOMS to use. You can override this for specific probes by # setting "voms" under the corresponding section. #default_voms = ops # Alternative 1: Use an externally generated proxy certificate. You can either # export X509_USER_PROXY or point to it with #user_proxy = /var/cache/nagios/gridproxy.pem # Alternative 2: Let the probe generate a proxy certificate on demand from # a robot certificate. #user_cert = /etc/grid-security/robotcert.pem #user_key = /etc/grid-security/robotkey.pem # Checking Storage Elements # ========================= [gridstorage] # Base directory where to store temporary files and runtime state information. #arcnagios_spooldir = /var/spool/arc/nagios # The ARC commands will store some files under $HOME/.arc/. Since the home # directory may not be set to something usable, set an appropriate value here # to instruct the Nagios plugins to override $HOME at startup. #home_dir = /var/spool/arc/nagios # The log-level to use for this probe. In valid values in order of # decreasing verbosity are DEBUG, INFO, WARNING, ERROR, CRITICAL, FATAL. #loglevel = WARNING # Checking Compute Elements: Job Submission # ========================================= [arcce] # Same as for [gridstorage]. #arcnagios_spooldir = /var/spool/arc/nagios #home_dir = /var/spool/arc/nagios # The log-level for this probe as described under [gridstorage]. #loglevel = WARNING [arcce.connection_urls] # This section can be used to force specific flavours and connection URLs for # individual CEs. Each line takes the form # # ce.example.org = FLAVOUR:URL # # where the right hand side corresponds to the -C argument of arcsub(1). # Example Scripted Job Tests # -------------------------- # # These checks are enabled by passing "--test NAME" to the submit command, # where NAME is the section name without the "arcce." prefix. They injects # pieces of shell to to the remote script and checks the output using # regular expression patterns. [arcce.python] jobplugin = scripted required_programs = python script_line = python -V >python.out 2>&1 output_file = python.out output_pattern = Python\s+(?P\S+) status_ok = Found Python version %(version)s. status_critical = Python version not found in output. service_description = ARCCE Python version [arcce.perl] jobplugin = scripted required_programs = perl script_line = perl -v >perl.out 2>&1 output_file = perl.out output_pattern = This is perl, v(?P\S+) status_ok = Found Perl version %(version)s. status_critical = Perl version not found in output. service_description = ARCCE Perl version [arcce.gcc] jobplugin = scripted required_programs = gcc script_line = gcc -v >gcc.out 2>&1 output_file = gcc.out output_pattern = gcc version (?P\S+) status_ok = Found GCC version %(version)s. status_critical = GCC version not found in output. service_description = ARCCE GCC version [arcce.csh] jobplugin = scripted required_programs = csh script_line = echo >csh-test.csh '#! /bin/csh'; echo >>csh-test.csh 'env >csh.out'; chmod +x csh-test.csh; ./csh-test.csh output_file = csh.out output_pattern = ^PATH= status_ok = Found working csh. status_critical = Did not find $PATH in csh environment. service_description = ARCCE csh usability # Example Storage Job Checks # -------------------------- # # These check are also enabled by passing the second componest of the # section name to the --test option. This will add the specified staging to # the job description. Input files will must exist in advance. Output # files will be removed after checking that they exist. [arcce.stage_srm] jobplugin = staging staged_inputs = srm://srm.example.org/somedir/testfile staged_outputs = srm://srm.example.org/somedir/srm-%(hostname)s-%(epoch_time)s service_description = ARCCE SRM Result [arcce.stage_gridftp] jobplugin = staging staged_inputs = gsiftp://srm.example.org/somedir/testfile staged_outputs = gsiftp://srm.example.org/somedir/gsiftp-%(hostname)s-%(epoch_time)s service_description = ARCCE GridFTP Result nordugrid-arc-nagios-plugins-3.1.1/doc/index.rst0000644000175000002070000000066315002373741022546 0ustar mockbuildmock00000000000000.. NorduGrid ARC Nagios Plugins documentation master file, created by sphinx-quickstart on Thu Nov 10 14:34:24 2011. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. NorduGrid ARC Nagios Plugins ============================ NORDUGRID-MANUAL-22-HTML .. toctree:: :maxdepth: 2 intro.rst infosys.rst arcce.rst gridstorage.rst sample_config.rst nordugrid-arc-nagios-plugins-3.1.1/doc/services.cfg.example0000644000175000002070000001225415002373741024642 0ustar mockbuildmock00000000000000# -------------------------------------------------------------------------- # This is an example Nagios configuration for the ARC-CE probes meant for # documenation purposes. It cannot be used as-is. # -------------------------------------------------------------------------- # Contacts and Contact Groups # =========================== # You probably already have contacts defined in your Nagios configuration, so # you can skip these and substitute your own below. define contactgroup { contactgroup_name nagios-operators members jdoe } define contact { use generic-contact contact_name jdoe email jdoe@example.org } # Commands Definitions # ==================== # This is a dummy command for passive services. You may already have something # like it in your Nagios configuration. define command { command_name check_passive command_line /bin/true } # This command monitors running jobs and collects those which have teminated, # reporting passive results. define command { command_name check_arcce_monitor command_line $USER1$/check_arcce_monitor -H $HOSTNAME$ } # A job submission check including sub-tests which are defined in the plugin # configuration in separate sections. The results of the sub-tests will be # passively reported to the service names defined in the same configuration. define command { command_name check_arcce_submit command_line $USER1$/check_arcce_submit -H $HOSTNAME$ \ --test python --test perl --test csh --test gcc } # A job submission check with staging. The arguments to --stage-input options # must exist. The arguments to --stage-output options will be overwritten, and # deleted on termination. This command is not used below. To use it, add an # active service and a passive service named "ARCCE SRM Job Termination". define command { command_name check_arcce_submit_staging # Passed explicitly: # command_line $USER1$/check_arcce_submit \ # -H $HOSTNAME$ --job-tag srm \ # --termination-service 'ARCCE SRM Job Termination' \ # --stage-input srm.txt=srm://srm.example.org/nagios/readable.txt \ # --stage-output srm://srm.example.org/nagios/srm-$HOSTNAME$-$TIMET$.txt \ # Using a predefined job-test: # command_line $USER1$/check_arcce_submit \ # -H $HOSTNAME$ --job-tag srm \ # --termination-service 'ARCCE SRM Job Termination' \ # --test stage_srm } # Host Groups and Host Templates # ============================== # You need to have one host definitions to which the monitoring service is # assigned to. This is typically the Nagios host itself, for which you # probably already have a definition. define host { name nagios-host use generic-host max_check_attempts 10 contact_groups nagios-operators register 0 } # The following host group and template will be used for all CEs. define hostgroup { hostgroup_name arcce-hosts alias ARCCE Hosts } define host { name arcce-host use generic-host max_check_attempts 10 contact_groups nagios-operators hostgroups arcce-hosts register 0 } # Service Groups and Service Templates # ==================================== define servicegroup { servicegroup_name arcce-services alias ARCCE Services } define service { name arcce-service use generic-service servicegroups arcce-services check_period 24x7 max_check_attempts 3 flap_detection_enabled 0 contact_groups nagios-operators notifications_enabled 0 register 0 } define service { name arcce-monitoring-service use arcce-service normal_check_interval 5 retry_check_interval 5 register 0 } define service { name arcce-submission-service use arcce-service normal_check_interval 30 retry_check_interval 30 register 0 } define service { name arcce-passive-service use arcce-service active_checks_enabled 0 passive_checks_enabled 1 check_command check_passive register 0 } define service { use arcce-monitoring-service host_name localhost service_description ARCCE Monitoring check_command check_arcce_monitor } # For each ARC CE, we need one active service for submission and a number of # passive services to collect the results. In the following we associate the # per-CE services to the "arcce-hosts" group, which will add them to all # members of the group. define service { use arcce-submission-service service_description ARCCE Job Submission hostgroup_name arcce-hosts check_command check_arcce_submit } define service { use arcce-passive-service service_description ARCCE Job Termination hostgroup_name arcce-hosts } define service { use arcce-passive-service service_description ARCCE Python version hostgroup_name arcce-hosts } define service { use arcce-passive-service service_description ARCCE Perl version hostgroup_name arcce-hosts } define service { use arcce-passive-service service_description ARCCE GCC version hostgroup_name arcce-hosts } define service { use arcce-passive-service service_description ARCCE csh usability hostgroup_name arcce-hosts } # Hosts # ===== # This provides the monitoring service. define host { use nagios-host host_name localhost } # Any host which use the arcce-host template will get an active submission # service, and all the related passive services. #define host { # use arcce-host # host_name ce-00.example.org #} nordugrid-arc-nagios-plugins-3.1.1/doc/intro.rst0000644000175000002070000001455515002373741022577 0ustar mockbuildmock00000000000000**************** Introduction **************** This document describes the Nagios plugins mainly used to monitor NorduGrid ARC compute elements and related resources, but some probes should also be usable to test non-ARC resources. The package includes commands to do * LDAP queries and tests on the information system, including GLUE 2.0 and legacy schemas. * Job submission and monitoring of jobs with additional custom checks. * Transfers to and from storage elements using various protocols. The following chapters will cover the probes related to each of these topics. This chapter will describe common configuration and options. **Acknowledgements.** This work is co-funded by the EC EMI project under the FP7 Collaborative Projects Grant Agreement Nr. INFSO-RI-261611. .. _configuration-files: Configuration Files =================== The configuration is merged from a list of the INI-format files, where settings from later files take precedence. By default files matching ``/etc/nagios/*.ini`` are read in lexicographical order, but this can be overridden by setting ``$ARCNAGIOS_CONFIG`` to a colon-separated list of the files to load. A naming scheme like the following is suggested:: /etc/arc/nagios/20-dist.ini - comes with the default package /etc/arc/nagios/60-egi.ini - comes with the EGI package /etc/arc/nagios/90-local.ini - suggested for local changes An alternative to ``/etc/arc/nagios`` can be specified in the environment variable ``$ARCNAGIOS_CONFIG_DIR``. Under the same prefix, a default job script template is installed:: /etc/arc/nagios/20-dist.d/default.xrsl.j2 You can provide a modified script by placing it e.g. in ``/etc/arc/nagios/90-dist.d/default.xrsl.j2``, but be careful with this in production environment since later versions of the probes may require changes to the script which makes the modified version incompatible. Each probe has a main configuration section named after the probe or colloquially ``[arcce]`` for the ``check_arcce_*`` probes. In this section you can provide defaults for string-valued command-line options. The name of the configuration variable corresponding to an option is obtained by stripping the initial "``--``" and replacing "``-``" with "``_``", e.g. "``--home-dir``" becomes "``home_dir``". Common Options ============== The following options are common to most of the probes: ``--home-dir=`` Override $HOME at startup. This is a workaround for external commands which store things under $HOME on systems where the user account running Nagios does not have an appropriate or writable home directory. ``--loglevel=(debug|info|warning|error)`` This option allows you to increase the verbosity of the Nagios probes. Additional messages will occur as extended status lines in Nagios. ``--multiline-separator=`` Replacement for newlines when submitting multi-line results to passive services. Pass the empty string drop extra lines. This option exists because Nagios currently don't support multi-line passive results. ``--command-file=`` The path of the Nagios command file. By default $NAGIOS_COMMANDFILE is used, which is usually the right thing. ``--how-invoked=(nagios|manual)``, ``--dump-options`` These are only needed for debugging purposes. ``--arcnagios-spooldir`` Top level directory for storing state information and for use as a working area. The default is ``/var/spool/arc/nagios``. If you need to debug an issue related to CE jobs, look under the ``ce-*`` subdirectories. .. _x509-proxy: Proxy Certificate ================= The ``check_arcce_*`` and ``check_gridstorage`` probes will require a proxy certificate to succeed. The probes will maintain a proxy when provided a X509 certificate and key. You can place these in a common section: .. code-block:: ini [gridproxy] default_voms = user_key = user_cert = #user_proxy = # Optionally override the path of the generated proxy. The probes which require an X509 proxy have a ``--voms=`` option to specify the VOMS server to contact instead of ``default_voms``. When a ``user_key`` and ``user_cert`` pair is given, the default ``user_proxy`` path is unique to the selected VOMS. To use a pre-initialized proxy, make sure ``user_key`` and ``user_cert`` are not set. You will probably want to use a non-default location for the proxy. Either point to it with the environment variable ``X509_USER_PROXY`` or set it in the configuration file: .. code-block:: ini [gridproxy] user_proxy = If you use several VOs with require different certificates, you can replace the above section with one section ``gridproxy.`` per ```` and use the ``--voms`` option to select which section to use. These sections don't have the ``default_voms`` setting. Running Probes from the Command-Line ==================================== The following instructions apply to ``check_arcce_submit``, ``check_arcce_monitor``, ``check_arcce_clean``, and ``check_gridstorage``. The other probes can be invoked from the command-line without special attention. For testing and debugging, it can be convenient to invoke the probes manually as a regular user. This can be done as follows. Choose a directory where you can store run-time state. Below, we use ``/tmp``, but it may be tidier to create a fresh directory. Then, create a configuration like .. code-block:: ini [DEFAULT] arcnagios_spooldir = /tmp/arc-nagios-testing [gridproxy] default_voms = [gridproxy.your-vo] user_proxy = /tmp/x509up_u substituting suitable values for the ```` meta-variables. You may need to add additional settings depending on want you test, of course. Acquire a proxy certificate (if needed) and pointing to the set of configurations you need, including the above: .. code-block:: sh arcproxy -S export ARCNAGIOS_CONFIG=/etc/arc/nagios/20-dist.ini: The probes can now be run as .. code-block:: sh check_arcce_submit --how-invoked=manual ... check_arcce_monitor --how-invoked=manual ... check_arcce_clean --how-invoked=manual ... The main purpose of the ``--how-invoked=manual`` is to tell the probe that any passives results shall be printed to the screen rather than submitted to the Nagios command pipe. It is not strictly needed for active-only probes. nordugrid-arc-nagios-plugins-3.1.1/doc/conf.py0000644000175000002070000001642515002373741022207 0ustar mockbuildmock00000000000000# -*- coding: utf-8 -*- # # NorduGrid ARC Nagios Plugins documentation build configuration file, created by # sphinx-quickstart on Thu Nov 10 14:34:24 2011. # # This file is execfile()d with the current directory set to its containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. import sys, os from subprocess import check_output # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. sys.path.insert(0, os.path.abspath('..')) # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. #needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = ['sphinx.ext.autodoc'] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix of source filenames. source_suffix = '.rst' # The encoding of source files. #source_encoding = 'utf-8-sig' # The master toctree document. master_doc = 'index' # General information about the project. project = u'NorduGrid ARC Nagios Plugins' copyright = u'2011--2024 Petter A. Urkedal' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The full version, including alpha/beta/rc tags. release = check_output(['python3', '../setup.py', '-V']).decode('utf-8').strip() # The short X.Y version. version = '.'.join(release.split('.')[0:2]) # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. #language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: #today = '' # Else, today_fmt is used as the format for a strftime call. #today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = ['_build'] # The reST default role (used for this markup: `text`) to use for all documents. #default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. #add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). #add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. #show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. #modindex_common_prefix = [] # -- Options for HTML output --------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. html_theme = 'default' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. #html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. #html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". #html_title = None # A shorter title for the navigation bar. Default is the same as html_title. #html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. html_logo = 'media/ng-logo.png' # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. #html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. #html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. #html_use_smartypants = True # Custom sidebar templates, maps document names to template names. #html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. #html_additional_pages = {} # If false, no module index is generated. #html_domain_indices = True # If false, no index is generated. #html_use_index = True # If true, the index is split into individual pages for each letter. #html_split_index = False # If true, links to the reST sources are added to the pages. #html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. #html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. #html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. #html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). #html_file_suffix = None # Output file base name for HTML help builder. htmlhelp_basename = 'NorduGridARCNagiosPluginsdoc' # -- Options for LaTeX output -------------------------------------------------- # The paper size ('letter' or 'a4'). latex_paper_size = 'a4' # The font size ('10pt', '11pt' or '12pt'). #latex_font_size = '10pt' # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ('index', 'NorduGridARCNagiosPlugins.tex', u'NorduGrid ARC Nagios Plugins Documentation', u'Petter Urkedal', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of # the title page. latex_logo = 'media/EMI_Logo_std_150dpi_cropped.png' # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. #latex_use_parts = False # If true, show page references after internal links. #latex_show_pagerefs = False # If true, show URL addresses after external links. #latex_show_urls = False # Additional stuff for the LaTeX preamble. #latex_preamble = '' # Documents to append as an appendix to all manuals. #latex_appendices = [] # If false, no module index is generated. #latex_domain_indices = True latex_elements = { 'preamble': '\\input{../../ng-preamble.tex}\n', 'maketitle': '\\input{../../ng-titlepage.tex}\n', } # -- Options for manual page output -------------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ ('index', 'nordugridarcnagiosplugins', u'NorduGrid ARC Nagios Plugins Documentation', [u'Petter Urkedal'], 1) ] nordugrid-arc-nagios-plugins-3.1.1/doc/media/0000755000175000002070000000000015002373741021757 5ustar mockbuildmock00000000000000nordugrid-arc-nagios-plugins-3.1.1/doc/media/ng-logo.png0000644000175000002070000005336515002373741024043 0ustar mockbuildmock00000000000000PNG  IHDRx}t-gAMA7tEXtSoftwareAdobe ImageReadyqe<PLTEnmspȯjV㊊r)24'l_2W8|J(L6k"̢ɱhUVټUIDATxb0`@1(@ o ф7 hh4ፂ4FMx`@@&Q0 F@&!D!`ȥ<Mx o@@&ф7 FNp`=MxC0C"hMxh4&4V.,`(h4 蘑Z) FPKx!hb Yq8$<MxC;&{Xb+++B; FJx<<6rLL4LpVP`v7J=MxC)Hx4P;cah4 -(1Yu@`  6(Bz7@&!Ӿ;eh.(AT졗z_4J%;&9ah4 IwLlV$h;hc5@ %>[X;o($<MxC}MwL栒́@,,,1@&4;9&Uah4 Lw2LLLL\'h;`T`cҰ5@iN@Ah4 &)OMt:FPnAОNx4ycf`bac2F  FNx̪L LL)h;pffb͞ Ai%Bф7wt$//LwLLdF Th ^`($<Mx}* < 99h4 ބg+Jwt,4-Wф7hwt.()'',,/ `j&S?@&ھ;&9 aa9&~Pa)@ `vH%<Mx4Aҝ<c!,j:@&ر;&&ah4 > 0ݱ;rN=Kt XYYx} F`l ' * ԩ%:F P%>Po'AH F Lxt$ *_ф7>(A:ah4 w,4;MDMyLFHsCb=@&־-u,@3Q UhFIx.hBn(lф7&<(ɀ;L2h\ OI Ъah4 / سكA;hX-u@}Sh4 :u a9&c~~PiT%<~BPf H' Ah4 Y~J  FޠIxh MtJ-тN~zHu@@ oryܱƙ)o8$<MxxTӝ R,p'aRhju ܳІ8AcxMȋRh4 LCMwH3%:vJ,3@&IxhNA!SbJw.h 1tg@ KCX фGqo$ քςW+/=ZфGgdLwLd OxJX2˄@ F O[C_)y,:@Zi@ 6;wh ,`a:G X(++`9q;CMx4ii/ú2Nst-͍>^ul```X CMxHx;̙2p-@Th4Q?a.71cJa=VPh£z;3+$ѱ"-Fe!8; /bM0ԉ,<ܡIX[T,$eeepH@ G\)Y!Pҝ80Azl:/h£nu,= hTS$x3 @1&6# ѾòP 憏/X+E> F|#Y#,xzѝִXP3 Ftm V[ AA_|dŰKx48p;3V5Sp.a&:Q"1)aƘ xǍ2U2Y`] A4Z#_JAKeXX<"P;'0P0X F\“MöLR<)kWlHlT5̭pw;xLyVl䬆F>WQC3H_ Yb_n nicX!zܘC)xn9LT[; FZ3o !96H F^gÞ,jݡ=ZeȠբ&+k36 c&$ gnvh uPz40Edl$@yú p;$-J5 тP gƧg VC$Kx\LlaD*E O;< ЪVVqSq3@Soe5T@1ccRecSeҶv*l4@@`FlYa]ܳ`>]Mx\ꚰƜ6&?4VP3Z_1y4102$1*pb4$roNgy$'!w(B&@Gj̡upGpD\mp,=U619lCt4, ?PGy a?h$%<&lPŌ'*3)l+K6dX<Ox쪚C:JxM<ؠ O29Mr ;F~s<,Y%͡a$:UlE1h*xIrcjQQ[M<3n:I2Tlf6G{&,@Ah%<,Můf;Pぴ pP H`xxp!0@Wv!*= M 22JH=H/HЎҠ\ @#&k%P) k`SW76VnOr@KxJ&8|ŧ1b%vnhC- Yqxvvv%С<аJxffƌL(@H zɃ*h TP/*V`afHyL-cLOr@Ű8@;6Lx0Xm Tژ3&61!!iiiHc")Uy|t'ecdXJE+,`ȋ '''(ʰC$2'<2}1^[218!C @tMx6`hY k 6mBB҈ԆYO3%@0@% dgPf!' d'MytǍ($U`cB.LIᤠ< '+'H O-;Rq\ h$<[3=uuE!L|s׉K|ȮJwtL?P18Ɂ}NyJ' rCJp =y`ʩ ePMx4\a%*:q][xS7d%P +@kFhJxd'-Lr&YE$R8iA$)M|+ZJx@jb݆x p[mrE9 %Jr' 16DY$ŝ$|"tXAA QZPƟ )͡6쇏ʃ[ Aʠg/lC3HbC`  WC0i6126) ="ZT3F2" P`p \(A4ͩ`MsR5Ać; .`ӷA?!‘HTLxV&zDqц'"<5PcQ?i@ .!,. f!Cr Hx2ܔ3:jC+HJ:RpP%<{23hڐJUSS&?h1T&>z%<B³c pʃ:5@][MMp\AXF9P Lu2*hħ?q;Zy '(;%A#}}&<b C d HFJKZ4UYau9`$9 4!+ b@@- +b u>>.H)ȁD-})q6^ᛸ`u.Jkafc$E&H 3$f/.ćeP"zz@x:%@ԇ ϖh;/5K<,9!%?^^!ؘdZ @U"OlwkkC}bAg7(2(b2P#."ngm(] zi8 ٸ F[{@ajLU-#jI>(^3yaT`i΄5]p'<,4'~=l[eI \6|Q2l:|$'<b ԣ15e-Q-B 8H c4HHrNF QT`Oih@s* O R4bqڌ@!h]rOr BwнV@Cm4TA`-&ܸ (Ky̢3]Q 4ɱ+YʁVj)7yڰ4`Ⓝ&>Ec Ė&UUD?м'm+ ZQiv.Z[ wb+`p `HCXa}F`LV%\6AjtYA;bl mpSc5E>`{':a7CB#v71%PPPzpAW\249(N2D r>(-f32RTfPUՆЀ&C`4D*ad'F.<$z:VNذcwjhU3jO1* rv1X/{b *$<Ƚ[5U1dAh2x '>$;$.SR'Ut}Fг[@O|(~ | @u_@P3&|~̀Df 6n  'D=K4Hr,,,쐴^g] %D ໫pL|.)PiNcc wAy< ƚ]~3J);OB1=}##3Y&"q'NHAH$NJR2BNX$)Oߞ\X &M*1$ݱ&<&KxDtAiVR+ɱ1+hA g ]f`LTqPTJȡȎ`-0Xu(m; H[%#7TGvXC.QgO)(aIPr <(#ҢcaR-1g*ZաwHc]*$ ;19@J8|H @Mx-c XXiPhbKpgA/PS&@Wmg'*ͱB׆Lr`RAY{`o-D{F{sSS#qT )/ HX{ }(Σ ; v<@)!BgPAAt4ɉLuJxM NYF2tj!蔇<,NMQx׀205h&deMdVaKx4K֢`g.POR"J6OCMy }+ѡu(0a/x+/  N`)'BCI _T: Ș <Y$Nڒ$X H qzP rq6$<&9<"],fb@MxH@͡ cKxRR[uٱ$;)dzKKPS2`B+E b) G(%XE j⡷; 22)PÒ> O߭1ehSG?ڜh;t)/єFsX -+,I!'9D05TX ie߂#Ow zX19gɳ2)`YiNx4J<<,+FC4PS"]y5'pq(VX-L'A7V2 {-DX>D9 "D@@g-;h v؉) @zB&(! N Hb dȀFF=hƎHw5-R T$ "A/JBBK\,1  an'h&<|e" f&c4h’Z8c)쀞 ޙACj{$lF();1IhH'#&qݚ-'Êm%2) R ӝ 8A6 ^׭> H,:J2,j [ƒs(FB!pQj<'?` #TixЉZRД@i"O؋=ț}!#gB 'BlkQJAOh`2r. <cMc=Zi ǧ Kر ~ xP ๒9}h:<>6ö6  e.`+A%liN𴈼BbQ3#jB]d 5XTEj ֩Mht,DP` A&k@#\ K@Khȉ yW @wC7v6u7tc٤( /tF(cTsr#\  % \@Fm{Xag@VCu[#9"xZmXu4@A0G|PKH' o HK XW67E(ZP1O[ 9رEOx,q@GTV/='L|{ / cC? E9()Qs9;- qúU%2 HB0GIx & aMxĤ+@y-"y$N53tt hn_dU>Ʈ}kt5rKb KsZ6rb)p2.CL ``v7DKJF܅0Q&$Fhit3DQn,aIw '()) &Y % XP&4 U@:y%^&R"VrJv )@: X7T( OgMxN&#)Jw&J&#1&0@ äόl47&~UgÚYep;`P3H',qxl8U $<V %(a)MXrs0TZͪ*UȔ+` P[m0^:c,0,}z0F{;\jז#5"*ᑒI%B[F&& T1SMRQ7V0`\x1 B/7@ O/<|C6T(` @󳠹YЀ1<؁7d-*7ר@ (JxL(Trӡ5x􄨡b dm@RU?hgrI 6FM!0!οM͂C>f`8RcfUzxszU󬬪WٱXYJyhG1rvX: x 5V$<`Lj5bIw;l֥moğ@US_Mx C3ֲF|6Qc( ))X y&cxBDxv(8i&^1x 45hC3hc4bE J'>7ިhD j"':xa. uZt z:wqǰ4k4<>fc;TKw8JP]`u-;v5-(<3`s򐶞l0 <<14AY :A<;9ZPKJ` ]N|U5ΑhRPAs㑐hh%<,LcIxJƦ I)ͩ;]2x: ʐn'<R O]I#1* !t }."CӦZc- ]Cǃ^bz{Nċ,;-y0h,v#ƍ/gc h*1 hP&<"PP;;u$6UUb@OAI=ݱȀY!w4=<0@< s7*@ hj$u,@ ]cSPGOx6L ,+l!,]Zt~v4QаKxJh V[AK= x[h£.A7W = q{° *EeSRGA 9g Taiࡷ44:f3p]K̃>gP7icx @fu BPP!8FFFyx;! 2sPBW@ u;TY@Ѯ]]btHJx4 ZLwt6;tn40Kx hAXav,d׽Ui&<l} X;NB խs)0٢] ѱw{c! Bw#K(gc#_ClwhA CC`J=PK0D,NCdbҾݺ )dp0V KAeXGEKJ?:(hC@*+0Y&>&&ilh Tj1j&r,.ʈRɰCx21c1Lt)c^ZĈڱ`iaY0h nƒo/Iw@Z8 p@C6q' Z`y`g1 !cǐ%)tg%A,ͻф7H@ ńJ\2hSUUJ wBJ ;666;@1 DYʤ-IAԳ-b;#lY R֛]V n@C47Z:tȀ%ɀV81&h&?;ھ\ N³cSeR6b-h|(,݁(ެhi7@ gji[paIxHh L³cR;o z*xXی{i2FPNSSShtn ұు$-F.SSSMh Р1Zi3Z 2@C%Cҝ-Mw o" m8lGKhP'<*8qm1ᥝhi7@ 'h<l5h^ Bu `t}74@ BPsss}Pt2h1u*@ynhMx`@@&Q0 J/m@Aضu*? [[[4dӓ5h0{@```bJ;ƌيh4ፂ4F c":LĆv*8F: JlA%'> 1m:4FMx`@@ x` +x(ـ&eA)4blmm4m @C"AKt7@1 "%.K6ф7 hhH&<,`4& h4ፂ4FMx`@@&Q0 F(@ o ф7 hh4ፂ.M4IENDB`nordugrid-arc-nagios-plugins-3.1.1/doc/arcce.rst0000644000175000002070000003421515002373741022514 0ustar mockbuildmock00000000000000***************************************************** Monitoring ARC Compute Elements by Job Submission ***************************************************** General Configuration ===================== For each CE to monitor run :: check_arcce_submit -H This should be run at a relatively low frequency in order to let one job finish before the next is submitted. The probe keeps track of submitted jobs, and will hold the next submission if necessary. Subsequent sections describe additional options for testing data-staging, running custom scripts, etc. On a more regular basis, each 5 min or so, run :: check_arcce_monitor which will monitor all job status of each host and submit it passively to a service matching the host name and the service description "ARCCE Job Termination". The passive service name can be configured. Finally, a probe is provided to tidy the ARC job list after unsuccessful attempts by `check_arcce_monitor` to clean jobs. This is also set up as a single service, and only needs to run occasionally, like once a day:: check_arcce_clean For additional options, see :: check_arcce_submit --help check_arcce_monitor --help check_arcce_clean --help Plugin Configuration -------------------- The main configuration section for this probe is ``arcce``, see :ref:`configuration-files`. This probe requires an X509 proxy, see :ref:`x509-proxy`. Connection URLs for job submission (the ``--ce`` option) may be specified in the section ``arcce.connection_urls``. Example:: [arcce] voms = ops user_cert = /etc/nagios/globus/robot-cert.pem user_key = /etc/nagios/globus/robot-key.pem loglevel = DEBUG [arcce.connection_urls] arc1.example.org = ARC1:https://arc1.example.org:443/ce-service arc0.example.org = ARC0:arc0.example.org:2135/nordugrid-cluster-name=arc0.example.org,Mds-Vo-name=local,o=grid The ``user_key`` and ``user_cert`` options may be better placed in the common ``gridproxy`` section. Nagios Configuration -------------------- You will need command definitions for monitoring and submission:: define command { command_name check_arcce_monitor command_line $USER1$/check_arcce_monitor -H $HOSTNAME$ } define command { command_name check_arcce_clean command_line $USER1$/check_arcce_clean -H $HOSTNAME$ } define command { command_name check_arcce_submit command_line $USER1$/check_arcce_submit -H $HOSTNAME$ \ [--test ...] } For monitoring, add a single service like :: define service { use monitoring-service host_name localhost service_description ARCCE Monitoring check_command check_arcce_monitor } define service { use monitoring-service host_name localhost service_description ARCCE Cleaner check_command check_arcce_clean normal_check_interval 1440 retry_check_interval 120 } For each host, add something like :: define service { use submission-service host_name arc0.example.org service_description ARCCE Job Submission check_command check_arcce_submit } define service { use passive-service host_name arc0.example.org service_description ARCCE Job Termination check_command check_passive } The ``--test `` options enables tests to run in addition to a plain job submission. They are specified in individual sections of the configuration files as described below. Such a test may optionally submit the results to a named passive service instead of the above termination service. To do so, add the Nagios configuration for the service and duplicate the "``service_description``" in the section defining the test. See the arcce-example.cfg for a more complete Nagios configuration. Running Multiple Job Services on the Same Host ---------------------------------------------- By default, running jobs are tracked on a per-host basis. To define multiple job submission services for the same host, pass to ``--job-tag`` a tag which identify the service uniquely on this host. Remember to also add a passive service and pass the corresponding ``--termination-service`` option. The scheme for configuring an auxiliary submission/termination service is:: define command { command_name check_arcce_submit_ command_line $USER1$/check_arcce_submit -H $HOSTNAME$ \ --job-tag \ --termination-service 'ARCCE Job Termination for ' \ [--test ...] } define service { use submission-service host_name arc0.example.org service_description ARCCE Job Submission for check_command check_arcce_submit_ } define service { use passive-service host_name arc0.example.org service_description ARCCE Job Termination for check_command check_passive } Custom Job Descriptions ----------------------- If the generated job scripts and job descriptions are not sufficient, you can provide hand-written ones by passing the ``--job-description`` option to the ``check_arcce_submit`` command. This option is incompatible with ``--test``. Currently no substitutions are done in the job description file, other than what may be provided by ARC. Job Tests ========= Scripted Checks --------------- It is possible to add custom commands to the job scripts and do a regular expression match on the output. E.g. to test that Python is installed and report the version, add the following section to the plugin configuration file:: [arcce.python] jobplugin = scripted required_programs = python script_line = python -V >python.out 2>&1 output_file = python.out output_pattern = Python\s+(?P\S+) status_ok = Found Python version %(version)s. status_critical = Python version not found in output. service_description = ARCCE Python version The options are required_programs Space-separated list of programs to check for before running the script. If one of the programs is not found, it's reported as a critical error. script_line One-liner shell code to run, including features commonly supported by ``/bin/sh`` on year CEs. output_file The name of the file your script produces. This is mandatory, and the same file will be used to communicate errors back to ``check_arcce_monitor``. The reason standard output is not used, is to allow multiple job tests to publish independent passive results. output_pattern This is a Python regular expression which is searched for in the output of the script. It will stop on the first matched line. You cannot match more than one line, so distill the output in ``script_line`` if necessary. Named regular expression groups of the form ``(?...)`` captures their output in a variable *v*, which can be substituted in the status messages. status_ok The status message if the above regular expression matches. A named regular expression group captured in a variable *v* can be substituted with ``%(v)s``. status_critical Status message if the regular expression does not match. Obviously you cannot do substitutions of RE groups. If the test for required programs fail, then the status message will indicate which programs are missing instead. service_description The ``service_description`` of the passive Nagios service to which results are reported. See :ref:`example-ini` for more examples. It is possible to give more control over the probe status to the remote script. Instead of ``output_pattern`` the script may pass status messages and an exit code back to Nagios. This is done by printing certain magic strings to the file specified by ``output_file``: * ``__status `` sets the exit code and status line of the probe. * ``__log `` emits an additional status line which will de shown iff the log level set in the probe configuration is at least ````, which is a numeric value from the Python ``logging`` module. * ``__exit `` is used to report the exit code of a script. Anything other than 0 will cause a CRITICAL status. You probably don't want to use this yourself. The ``__status`` line may occur before, between, or after ``__log`` lines. This can be convenient to log detailed check results and issues before the final status in known. It possible to adapt this to a Nagios-style probe ``check_foo`` by wrapping it in some shell code: .. code-block:: sh script_line = (/bin/sh check_foo 2>&1; echo __status $?) | \ (read msg; sed -e 's/^/__log 20 /' -e '$s;^__log 20 \(.*\);\1 '"$msg;") \ > check_foo.out output_file = check_foo.out staged_inputs = file:////path-to/check_foo Staging Checks -------------- The "staging" job plug-in checks that file staging works in connection with job submission. It is enabled with ``--test `` where the plugin configuration file contains a corresponding section:: [arcce.] jobplugin = staging staged_inputs = ... staged_outputs = ... service_description = Note that the URLs are space-separated. They can be placed separate indented lines. Within the URLs, the following substitutions may be useful: ``%(hostname)s`` The argument to the ``-H`` option if passed to the probe, else "localhost". ``%(epoch_time)s`` The integer number of seconds since Epoch. If a staging check fails, the whole job will fail, so it's status cannot be submitted to an individual passive service as with scripted checks. For this reason, it may be preferable to create one or more individual submission services dedicated to file staging. Remember to pass unique names to ``--job-tag`` to isolate them. Custom Substitutions in Job Test Sections ========================================= In job test sections you can use substitutions of the form ``%()s``, where ```` is defined in a separate section as described as follows. Variable definitions can themselves contain substitutions of this kind. Cyclic definitions are detected and reported as UNKNOWN. **Probe Option**. A section of the form .. code-block:: ini [variable.] method = option default = declares ```` as an option which can be passed to the probe with ``-O =``. The ``default`` field may be omitted, in which case the probe option becomes mandatory for any tests using the variable. **UNIX Environment**. A section of the following form declares that ```` shall be imported from the UNIX environment. If no default value is provided, then the environment variable must be exported to the probe. .. code-block:: ini [variable.] method = getenv envvar = The ``envvar`` line optionally specifies the name of the variable to look up, which otherwise defaults to ````. **Pipe Output**. The following allows you to capture the output of a shell command: .. code-block:: ini [variable.] method = pipe command = **Custom Time Stamp.** This method provides a custom time stamp format as an alternative to ``%(epoch_time)s``. It takes the form .. code-block:: ini [variable.] method = strftime format = Note that the ``%`` characters in the ``format`` field must be escaped as ``%%``, as to avoid attempts to parse them as interpolations. An alternative ``raw_format`` field can be used, which is interpreted literally. **Random Line from File.** A section of the following form picks a random line from ````. A low entropy system source is used for seeding. .. code-block:: ini [variable.] method = random_line input_file = exclude = Leading and trailing spaces are trimmed, empty lines and lines starting with a ``#`` character are ignored. If provided, any lines matching one of the space-separated words in ``exclude`` are ignored, as well. **Switch.** If you need to set a variable on a case to case basis, the form is .. code-block:: ini [variable.] method = switch index = case[] = # ... case[] = default = This will first expand "````". If this matches "``>``" for some "````", then the expansion of ``>`` is returned, otherwise ````. See also the example below. **LDAP Search.** A value can be extracted from an LDAP attribute using .. code-block:: ini [variable.] method = ldap uri = ( )* filter = attribute = default = If multiple records are returned, the first returned record which provides a value for the requested attribute is used. If the attribute has multiple values, the first returned value is used. Note that the LDAP server may not guarantee stable ordering. **Example.** In the following staging tests, ``%(se_host)s`` is replaced by a random host name from the file ``/var/lib/gridprobes/ops/goodses.conf``, and ``%(now)s`` is replaced by a customized time stamp. .. code-block:: ini [arcce.srm] jobplugin = staging staged_output = srm://%(se_host)s/%(se_dir)s/%(hostname)s-%(now)s.txt service_description = Test Service [variable.se_host] method = random_line input_file = /var/lib/gridprobes/ops/goodses.conf [variable.now] method = strftime raw_format = %FT%T [variable.se_dir] method = switch index = %(se_host)s case[se-1.example.org] = /pnfs/se-1.example.com/nagios-testfiles case[se-2.example.org] = /dpm/se-2.example.com/home/nagios-testfiles default = /nagios-testfiles nordugrid-arc-nagios-plugins-3.1.1/doc/infosys.rst0000644000175000002070000000144115002373741023124 0ustar mockbuildmock00000000000000************************************************************** Monitoring ARC Compute Elements via the Information System ************************************************************** The ``check_arcce_info`` probe can be used to basic check of the CE without submitting any jobs. It does so by querying the information system exposed by the CE and passes a judgement based on the relevant part of the returned information according to options passed to the probe. See ``check_arcce_info --help`` for a full set of options. The ``-H`` option is required to indicate the CE, and the probe needs to validate the origin of and authenticate to the CE, which is done based on the ``--tls-*`` options. The ``--require-*`` options describe failure condition and ``--dump`` provides additional output. nordugrid-arc-nagios-plugins-3.1.1/nordugrid-arc-nagios-plugins.spec0000644000175000002070000002306415002373741026511 0ustar mockbuildmock00000000000000# Disable debuginfo since there are no binaries %global debug_package %{nil} %{!?enable_doc: %global enable_doc 1} %global site org.nordugrid %global nagios_bindir %{_libdir}/nagios/plugins %global arc_spooldir %{_localstatedir}/spool/arc %global pkg_spooldir %{arc_spooldir}/nagios %global pkg_sysconfdir %{_sysconfdir}/arc/nagios %if 0%{?rhel} == 8 %global __python3 /usr/bin/python3.8 %endif %{!?__python3: %global __python3 python3} %{!?python3_sitelib: %global python3_sitelib %(%{__python3} -c 'from distutils import sysconfig; print(sysconfig.get_python_lib())')} Name: nordugrid-arc-nagios-plugins Version: 3.1.1 Release: 1%{?dist} Summary: Nagios plugins for ARC Group: System Environment/Daemons License: ASL 2.0 URL: http://www.nordugrid.org Source0: http://download.nordugrid.org/packages/%{name}/releases/%{version}/src/%{name}-%{version}.tar.gz BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) Requires: (nordugrid-arc-client >= 6.5.0 or nordugrid-arc6-client >= 6.5.0 or nordugrid-arc7-client) Requires: nagios-plugins BuildRequires: make %if 0%{?rhel} == 8 Requires: python38-cryptography Requires: python38-jinja2 Requires: python38-ldap BuildRequires: python38-setuptools %else Requires: python3-cryptography Requires: python3-jinja2 Requires: python3-ldap BuildRequires: python3-setuptools %endif %if %{enable_doc} BuildRequires: python3-sphinx %endif %description This package provides the Nagios plugins for testing ARC CE, using the ARC-1 API. %if %{enable_doc} %package doc Summary: HTML documentation for ARC 1 Nagios plugins Group: Documentation BuildArch: noarch %endif %if %{enable_doc} %description doc HTML documentation for %{name}. %endif %package egi Summary: EGI configuration and dependencies for the ARC Nagios plugins Group: System Environment/Daemons BuildArch: noarch Requires: (nordugrid-arc-plugins-arcrest >= 6.5.0 or nordugrid-arc6-plugins-arcrest >= 6.5.0 or nordugrid-arc-plugins-needed >= 7.0.0 or nordugrid-arc7-plugins-needed) %description egi EGI configuration and dependencies for the ARC Nagios plugins %prep %setup -q -n %{name}-%{version} %build %{__python3} setup.py build %if %{enable_doc} mkdir -p doc/_build/html doc/_static make -C doc html rm -f doc/_build/html/.buildinfo %endif %install test %{buildroot} != / && rm -rf %{buildroot} %{__python3} setup.py install --root=%{buildroot} --skip-build install -m755 -d %{buildroot}%{_sysconfdir}/nagios/plugins install -m755 -d %{buildroot}%{pkg_spooldir} %clean test %{buildroot} != / && rm -rf %{buildroot} %files %dir %{pkg_sysconfdir} %dir %{pkg_sysconfdir}/20-dist.d %config(noreplace) %{pkg_sysconfdir}/20-dist.ini %config(noreplace) %{pkg_sysconfdir}/20-dist.d/default.xrsl.j2 %{nagios_bindir}/check_arcce_clean %{nagios_bindir}/check_arcce_monitor %{nagios_bindir}/check_arcce_submit %{nagios_bindir}/check_arcrest_info %{nagios_bindir}/check_arcservice %{nagios_bindir}/check_gridstorage %{python3_sitelib}/arcnagios %{python3_sitelib}/nordugrid_arc_nagios_plugins-*.egg-info %dir %{arc_spooldir} %attr(-,nagios,nagios) %{pkg_spooldir} %doc AUTHORS README.rst LICENSE NOTICE %doc doc/arcnagios.ini.example %doc doc/services.cfg.example %if %{enable_doc} %files doc %doc AUTHORS README.rst LICENSE NOTICE %doc doc/_build/html %endif %files egi %doc AUTHORS README.rst LICENSE NOTICE %dir %{pkg_sysconfdir}/60-egi.d %config(noreplace) %{pkg_sysconfdir}/60-egi.ini # FIXME: Prevent rpmbuild from generating these compiled objects: %config(noreplace) %{pkg_sysconfdir}/60-egi.d/arcce_igtf.py* %changelog * Thu Apr 24 2025 Petter Urkedal - 3.1.1-1 - New upstream release 3.1.1. * Mon Apr 07 2025 Petter Urkedal - 3.1.0-1 - New upstream release 3.1.0. * Tue Mar 25 2025 Petter Urkedal - 3.0.0-1 - New upstream release 3.0.0. * Wed Jan 15 2025 Petter Urkedal - 3.0.0-0.rc8 - New upstream release candidate 3.0.0rc8. * Wed Nov 13 2024 Petter Urkedal - 3.0.0-0.rc7 - New upstream release candidate 3.0.0rc7. * Tue Oct 22 2024 Petter Urkedal - 3.0.0-0.rc6 - New upstream release candidate 3.0.0rc6. * Wed Oct 09 2024 Anders Waananen - 3.0.0-0.rc5 - New upstream release candidate 3.0.0rc5. * Wed Oct 09 2024 Petter Urkedal - 3.0.0-0.rc4 - New upstream release candidate 3.0.0rc4. * Fri Nov 03 2023 Petter Urkedal - 2.0.1-1 - New upstream release 2.0.1. * Thu Nov 02 2023 Anders Waananen - 2.0.1-0.rc2 - New upstream release candidate 2.0.1rc2. * Wed Oct 25 2023 Petter Urkedal - 2.0.1-0.rc1 - New upstream release candidate 2.0.1rc1. * Thu Nov 14 2019 Petter Urkedal - 2.0.0-1 - New upstream release 2.0.0. * Mon Oct 14 2019 Petter Urkedal - 2.0.0-0.rc3 - New upstream release candidate 2.0.0rc3. * Wed Oct 02 2019 Petter Urkedal - 2.0.0-0.rc2 - New upstream release candidate 2.0.0rc2. * Thu Apr 25 2019 Petter Urkedal - 2.0.0-0.rc1 - New upstream release candidate 2.0.0rc1. * Thu Jun 15 2017 Petter Urkedal - 1.9.1-0.rc1 - New upstream release candidate 1.9.1rc1. * Wed May 31 2017 Anders Waananen - 1.9.0-1 - Updated to release 1.9.0. * Tue Apr 25 2017 Petter Urkedal - 1.9.0-0.rc1 - Updated to release candidate 1.9.0rc1. * Fri Sep 11 2015 Petter Urkedal - 1.8.4-1 - Updated to release 1.8.4. * Mon Jul 06 2015 Anders Waananen - 1.8.3-2 - Drop doc subpackage for el5 due to missing dependencies * Thu Jul 02 2015 Petter Urkedal - 1.8.3-1 - Updated to release 1.8.3. * Fri Mar 27 2015 Petter Urkedal - 1.8.2-1 - Updated to release 1.8.2. * Thu Jan 15 2015 Petter Urkedal - 1.8.2-0.rc2 - Updated to release candidate 1.8.2rc2. * Fri Jan 09 2015 Petter Urkedal - 1.8.2-0.rc1 - Updated to release candidate 1.8.2rc1. * Fri Aug 15 2014 Anders Waananen - 1.8.1-1 - Updated to release 1.8.1. * Fri Jun 27 2014 Petter Urkedal - 1.8.1-0.rc1 - Updated to release candidate 1.8.1rc1. * Wed Apr 30 2014 Petter Urkedal - 1.8.0-1 - Updated to release 1.8.0. * Tue Oct 22 2013 Petter Urkedal - 1.7.1-1 - Updated to release 1.7.1. * Fri Aug 16 2013 Petter Urkedal - 1.7.0-1 - Updated to release 1.7.0. * Fri Jul 05 2013 Petter Urkedal - 1.6.1-0.rc1 - Updated to release candidate 1.6.1rc1. * Fri Apr 19 2013 Petter Urkedal - 1.6.0-1 - Updated to release 1.6.0. * Sat Apr 06 2013 Petter Urkedal - 1.6.0-0.rc1 - Updated to release candidate 1.6.0rc1. * Mon Feb 18 2013 Petter Urkedal - 1.5.0-1 - Updated to release 1.5.0. * Fri Feb 01 2013 Petter Urkedal - 1.5.0-0.rc3 - Updated to release candidate 1.5.0rc3. * Mon Jan 28 2013 Petter Urkedal - 1.5.0-0.rc2 - Updated to release candidate 1.5.0rc2. * Fri Jan 11 2013 Petter Urkedal - 1.5.0-0.rc1 - Updated to release candidate 1.5.0rc1. * Thu Dec 20 2012 Petter Urkedal - 1.4.0-0.rc4 - Updated to release candidate 1.4.0rc4. * Tue Nov 27 2012 Petter Urkedal - 1.4.0-0.rc1 - Updated to release candidate 1.4.0rc1. * Mon Oct 29 2012 Petter Urkedal - 1.3.11-1 - Updated to release 1.3.11. * Wed Sep 26 2012 Petter Urkedal - 1.3.10-1 - Updated to release 1.3.10. * Fri Sep 07 2012 Petter Urkedal - 1.3.9-1 - Updated to release 1.3.9. * Mon Apr 23 2012 Petter Urkedal - 1.3.8-1 - Updated to release 1.3.8. * Tue Apr 03 2012 Petter Urkedal - 1.3.7-1 - Updated to release 1.3.7. * Mon Apr 02 2012 Petter Urkedal - 1.3.6-1 - Updated to release 1.3.6. * Thu Feb 02 2012 Petter Urkedal - 1.3.5-1 - Updated to release 1.3.5. * Thu Feb 02 2012 Petter Urkedal - 1.3.4-1 - Updated to release 1.3.4. * Thu Feb 02 2012 Petter Urkedal - 1.3.3-1 - Updated to release 1.3.3. * Wed Dec 21 2011 Petter Urkedal - 1.3.2-1 - Updated to release 1.3.2. * Mon Dec 19 2011 Petter Urkedal - 1.3.1-1 - Updated to release 1.3.1. * Thu Dec 08 2011 Petter Urkedal - 1.3.0-1 - Updated to release 1.3.0. * Wed Nov 23 2011 Petter Urkedal - 1.2.0-1 - Updated to release 1.2.0. * Mon Nov 14 2011 Petter Urkedal - Change README to README.rst. - Add documentation subpackage. * Fri Nov 04 2011 Petter Urkedal - 1.1.0-1 - Updated to release 1.1.0. * Thu Nov 03 2011 Petter Urkedal - Install default configuration file. * Wed Oct 26 2011 Petter Urkedal - 1.0.2-1 - Updated to release 1.0.2. * Thu Oct 20 2011 Petter Urkedal - 1.0.1-1 - Updated to release 1.0.1. * Tue Oct 18 2011 Petter Urkedal - Add argparse and nordugrid-arc-python dependencies. - Install README and LICENSE. * Fri Oct 14 2011 Petter Urkedal - 1.0-1 - Updated to release 1.0. - Almost complete rewrite for the new probes. * Fri Sep 30 2011 Anders Waananen - 0.9-1 - New package name and ownership * Thu Jun 30 2011 Mattias Ellert - 0.4-1 - Fix flags to stat * Thu Nov 18 2010 Mattias Ellert - 0.3-1 - Implement changes proposed by Emir * Mon Oct 11 2010 Mattias Ellert - 0.2-1 - Remove Requires (per WLCG practice) * Thu Sep 23 2010 Mattias Ellert - 0.1-1 - Initial packaging nordugrid-arc-nagios-plugins-3.1.1/Makefile0000644000175000002070000000223315002373741021573 0ustar mockbuildmock00000000000000VERSION := $(strip $(shell /usr/bin/python3 setup.py -V)) VERSION_LAST := $(strip $(shell cat VERSION 2>/dev/null || :)) default: @echo "Main target is 'dist'" @echo "VERSION = $(VERSION)" sdist: VERSION /usr/bin/python3 setup.py $@ TEMPLATED = nordugrid-arc-nagios-plugins.spec $(TEMPLATED): %: %.in VERSION @VERSION=$(VERSION); \ PREVERSION=`echo -n $(VERSION) | sed s/^[0-9.]*//`; \ RPMRELEASE=$${PREVERSION:+0.}$${PREVERSION:-1}; \ BASEVERSION=$${VERSION%%[a-z]*}; \ echo "Updating $@ to version $(VERSION)."; \ sed -e "s;@VERSION@;$(VERSION);g" \ -e "s;@BASEVERSION@;$$BASEVERSION;g" \ -e "s;@PREVERSION@;$$PREVERSION;g" \ -e "s;@RPMRELEASE@;$$RPMRELEASE;g" \ < $< > $@.new && mv -f $@.new $@ dist: $(TEMPLATED) sdist test -d dist && find dist -type f -name \*tar.gz -exec cp -p '{}' $(CURDIR) \; rpm: dist rpmbuild -tb nordugrid-arc-nagios-plugins-$(VERSION).tar.gz ifneq ($(VERSION),$(VERSION_LAST)) VERSION: always ifeq ($(VERSION_LAST),) @echo 'Creating VERSION with content $(VERSION).' else @echo 'Replacing VERSION content from $(VERSION_LAST) to $(VERSION).' endif @echo >$@ $(VERSION) always: endif .PHONY: dist rpm always nordugrid-arc-nagios-plugins-3.1.1/README.rst0000644000175000002070000000156015002373741021624 0ustar mockbuildmock00000000000000******************** ARC Nagios Plugins ******************** This README describes the Nagios plugins for ARC-1 including CEs and associated services. Documentation can be found in the ``doc`` subdirectory, as well as using the ``--help`` options of the plugins. If you have Sphinx installed, you can create nicer looking documentation by typing make -C doc html or using one of the other targets of the same makefile. The result is placed in a subdirectory of ``doc/_build``. Installation ============ This package uses Python distutils, so you can install it with :: python setup.py build sudo python setup.py install For customized installations, please refer to the manual http://docs.python.org/distutils/ or ``python setup.py --help``. The package also comes with an RPM spec, so it can be built similar to an SRPM using ``rpmbuild -tb ``. nordugrid-arc-nagios-plugins-3.1.1/setup.py0000644000175000002070000000414315002373741021647 0ustar mockbuildmock00000000000000import os import re import sys from setuptools import setup, find_packages from distutils.sysconfig import get_config_vars from subprocess import check_output def file_contents(fp): fd = open(fp, 'r') content = fd.read() fd.close() return content.strip() _git_version_re = re.compile(r'v(\d+.\d+.\d+(rc\d+)?)(-(\d+)-g([0-9a-f]+))?$') def get_version(): top_srcdir = os.path.dirname(sys.argv[0]) or '.' if os.path.isdir(os.path.join(top_srcdir, '.git')): cmd = ['git', 'describe', '--tags'] version = check_output(cmd, cwd=top_srcdir).decode('utf-8').strip() mo = re.match(_git_version_re, version) if mo: if mo.group(3): return '%s.dev%s+%s' % (mo.group(1), mo.group(4), mo.group(5)) else: return mo.group(1) return file_contents(os.path.join(top_srcdir, 'VERSION')) def get_nagios_plugin_dir(): exec_prefix, libdir = get_config_vars('exec_prefix', 'LIBDIR') # if libdir.startswith(exec_prefix + '/'): # libdir = libdir[len(exec_prefix)+1:] # return os.path.join(libdir, 'nagios/plugins') if libdir.endswith('/usr/lib64'): return '/usr/lib64/nagios/plugins' else: return '/usr/lib/nagios/plugins' setup( name = "nordugrid-arc-nagios-plugins", version = get_version(), description = 'Nagios Probes for Arc CEs', url = 'http://www.nordugrid.org/', author = 'Petter Urkedal', author_email = 'urkedal@nbi.dk', requires = ['cryptography', 'ldap'], packages = find_packages(), data_files = [ (get_nagios_plugin_dir(), [ 'plugins/check_arcce_clean', 'plugins/check_arcce_monitor', 'plugins/check_arcce_submit', 'plugins/check_arcrest_info', 'plugins/check_arcservice', 'plugins/check_gridstorage', ]), ('/etc/arc/nagios', ['config/20-dist.ini']), ('/etc/arc/nagios/20-dist.d', ['config/20-dist.d/default.xrsl.j2']), ('/etc/arc/nagios', ['config/60-egi.ini']), ('/etc/arc/nagios/60-egi.d', ['config/60-egi.d/arcce_igtf.py']), ], ) nordugrid-arc-nagios-plugins-3.1.1/config/0000755000175000002070000000000015002373741021400 5ustar mockbuildmock00000000000000nordugrid-arc-nagios-plugins-3.1.1/config/60-egi.ini0000644000175000002070000000627715002373741023104 0ustar mockbuildmock00000000000000[variable.voms] method = option # The IGTF CA Certificates Probe # ============================== [variable.igtf_base_url] # Usage: check_arcce_submit -O igtf_base_url= # Overrides the location of the ITGF release files. # method = option default = http://repository.egi.eu/sw/production/cas/1/current/meta [arcce.dist-caversion] # Usage: check_arcce_submit --job-tag caversion --test caversion ... # Checks that all IGTF CA certificates are installed on the CE, that they # are up to date, and that there are no obsolete ITGF CAs installed. # jobplugin = scripted staged_inputs = file:%(config_dir)s/60-egi.d/arcce_igtf.py %(igtf_base_url)s/ca-policy-egi-core.release;cache=no %(igtf_base_url)s/ca-policy-egi-core.list;cache=no %(igtf_base_url)s/ca-policy-egi-core.obsoleted;cache=no output_file = caversion.out runtime_environments = ENV/PROXY script_line = PYTHON=`which python2 python python3 2>/dev/null| head -1`; $PYTHON arcce_igtf.py >caversion.out service_description = org.nordugrid.ARC-CE-IGTF%(service_suffix)s # SRM Staging # =========== # Location of the BDII server to query for information about the VO-specific # location on storage elements. [variable.top_bdii] method = option default = ldap://lcg-bdii.cern.ch:2170 [variable.se_vo_dir] method = ldap uri = %(top_bdii)s basedn = mds-vo-name=local,o=grid filter = (&(objectClass=GlueVOInfo)(GlueChunkKey=GlueSEUniqueID=%(se_host)s)(GlueVOInfoAccessControlBaseRule=VO:%(voms)s)) attribute = GlueVOInfoPath, GlueSAPath [variable.good_ses_file] # Usage: check_arcce_submit -O good_ses_file= ... # The location of a file containing a list if known good storage elements # which can be used for Nagios staging tests. This is not needed if the # -O se_host= option is provided. # method = option [variable.se_host] # The host name of the storage element to use for staging tests. The # following picks a random line from the file specified in good_ses_file, # but it can be overriden with -O se_host=. # method = random_line input_file = %(good_ses_file)s reputation_dist = se_host [variable.se_test_dir] # Usage: check_arcce_submit -O se_test_dir= ... # A directory on se_host where test files can be written. # method = option default = %(se_vo_dir)s/nagios-%(local_hostname)s/arcce [variable.stage_stamp] method = strftime raw_format = %Y%m%dT%H%M [arcce.dist-stage-https] # Usage: check_arcce_submit --job-tag https --test dist-stage-https ... # Performs staging tests using the SRM protocol. # jobplugin = staging upload_if_missing = https://%(se_host)s%(se_test_dir)s/https-input staged_inputs = https://%(se_host)s%(se_test_dir)s/https-input staged_outputs = https://%(se_host)s%(se_test_dir)s/https-%(stage_stamp)s-%(hostname)s service_description = org.nordugrid.ARC-CE-https%(service_suffix)s [arcce.dist-stage-srm] # Usage: check_arcce_submit --job-tag srm --test dist-stage-srm ... # Performs staging tests using the SRM protocol. # jobplugin = staging upload_if_missing = srm://%(se_host)s%(se_test_dir)s/srm-input staged_inputs = srm://%(se_host)s%(se_test_dir)s/srm-input staged_outputs = srm://%(se_host)s%(se_test_dir)s/srm-%(stage_stamp)s-%(hostname)s service_description = org.nordugrid.ARC-CE-srm%(service_suffix)s nordugrid-arc-nagios-plugins-3.1.1/config/20-dist.ini0000644000175000002070000000511615002373741023266 0ustar mockbuildmock00000000000000[variable.service_suffix] # Usage: check_arcce_submit -O service_suffix= ... # Appends to all passive service names. # method = option default = [variable.local_hostname] # The host name of the Nagios server. method = pipe command = hostname # Software Tests for the Main Job Submission Probe # ================================================ # # These tests for a few programs. They use regular expressions, assuming # that the commands write out the version information in a certain form. # If the probe should file, you can find the output matched against under # # /var/spool/nagios/plugins/arcce/$VO/$HOST/job_output/* # # and adjust the regular expressions. Please also report the issue to # http://bugzilla.nordugrid.org [arcce.dist-sw-csh] # Usage: check_arcce_submit --test dist-sw-csh ... # jobplugin = scripted #required_programs = csh script_line = echo >csh-test.csh '#! /bin/csh'; echo >>csh-test.csh 'env >csh.out'; chmod +x csh-test.csh; ./csh-test.csh output_file = csh.out output_pattern = ^PATH= status_ok = Found working csh. status_critical = Could not find \$PATH in the csh environment service_description = org.nordugrid.ARC-CE-sw-csh%(service_suffix)s [arcce.dist-sw-gcc] # Usage: check_arcce_submit --test dist-sw-gcc ... # jobplugin = scripted #required_programs = gcc script_line = gcc --version >gcc.out 2>&1 output_file = gcc.out output_pattern = gcc\s+(\(.*\)\s+)?(?P\d+\.\S+) status_ok = Found GCC version %(version)s. status_critical = Could not match GCC version. See /etc/arc/nagios/20-dist.ini for debugging hints. service_description = org.nordugrid.ARC-CE-sw-gcc%(service_suffix)s [arcce.dist-sw-python] # Usage: check_arcce_submit --test dist-sw-python ... # jobplugin = scripted #required_programs = python script_line = PYTHON=`which python2 python python3 2>/dev/null| head -1`; $PYTHON -V >python.out 2>&1 output_file = python.out output_pattern = Python\s+(?P\S+) status_ok = Found Python version %(version)s. status_critical = Could not match Python version. See /etc/arc/nagios/20-dist.ini for debugging hints. service_description = org.nordugrid.ARC-CE-sw-python%(service_suffix)s [arcce.dist-sw-perl] # Usage: check_arcce_submit --test dist-sw-perl ... # jobplugin = scripted #required_programs = perl script_line = perl -v >perl.out 2>&1 output_file = perl.out output_pattern = This is [Pp]erl.*\bv(?P[0-9A-Za-z.-]+) status_ok = Found Perl version %(version)s. status_critical = Could not match Perl version. See /etc/arc/nagios/20-dist.ini for debugging hints. service_description = org.nordugrid.ARC-CE-sw-perl%(service_suffix)s nordugrid-arc-nagios-plugins-3.1.1/config/20-dist.d/0000755000175000002070000000000015002373741023004 5ustar mockbuildmock00000000000000nordugrid-arc-nagios-plugins-3.1.1/config/20-dist.d/default.xrsl.j20000644000175000002070000000216415002373741025657 0ustar mockbuildmock00000000000000&(executable="{{ jd.script_name }}") (jobname="{{ jd.job_name }}") {%- if jd.script_args %} (arguments={% for arg in jd.script_args %}"{{ arg }}" {% endfor %}) {%- endif %} (stdout="{{ jd.output }}") (stderr="{{ jd.error }}") {%- if jd.logdir %} (gmlog="{{ jd.logdir }}") {%- endif -%} {%- if jd.wall_time_limit %} (wallTime="{{ jd.wall_time_limit }} seconds") {%- endif -%} {%- if jd.memory_limit %} (memory="{{ (jd.memory_limit + 1048575) // 1048576 }}") {%- endif %} (inputfiles= ("{{ jd.script_name }}" "file:{{ jd.script_path }}" "overwrite=yes") {%- for fn, url, url_opts in jd.staged_inputs %} ("{{ fn }}" "{{ url or '' }}" "overwrite=yes" {%- for url_opt in url_opts %} "{{ url_opt }}"{% endfor %}) {%- endfor -%}) {%- if jd.staged_outputs %} (outputfiles= {%- for fn, url, url_opts in jd.staged_outputs %} ("{{ fn }}" "{{ url or '' }}" "overwrite=yes" {%- for url_opt in url_opts %} "{{ url_opt }}"{% endfor %}) {%- endfor -%}) {%- endif -%} {%- if jd.queue_name %} (queue="{{ jd.queue_name }}") {%- endif -%} {%- for rte in jd.runtime_environments %} (runTimeEnvironment="{{rte}}") {%- endfor -%} {{"\n"}} nordugrid-arc-nagios-plugins-3.1.1/config/60-egi.d/0000755000175000002070000000000015002373741022611 5ustar mockbuildmock00000000000000nordugrid-arc-nagios-plugins-3.1.1/config/60-egi.d/arcce_igtf.py0000644000175000002070000002226115002373741025254 0ustar mockbuildmock00000000000000# Ulf Tigerstedt # NGI_FI and NGI_NDGF # Work in progress since 2011 # secret value 246094 do_hashing=True import os, re import glob import posixpath try: import base64, hashlib except ImportError: do_hashing=False import datetime import sys import logging log = logging.getLogger() logging.basicConfig() igtffilename="ca-policy-egi-core.list" releasefilename="ca-policy-egi-core.release" obsoletedfilename="ca-policy-egi-core.obsoleted" if (len(sys.argv) == 2): ca_cert_location=sys.argv[1] else: ca_cert_location = os.getenv('X509_CERT_DIR') if ca_cert_location is None: print('__status 3 Did not receive a $X509_CERT_DIR, ' \ 'missing runtime environment?') sys.exit(3) if not os.path.exists(ca_cert_location): print('__status 3 Missing directory $X509_CERT_DIR (%s).'%ca_cert_location) sys.exit(3) # What are the thresholds? days_warning=0 days_critical=7 try: igtflist = open(igtffilename, 'r') except: print('__status 3 Missing %s.'%igtffilename) sys.exit(3) try: obsoletedlist = open(obsoletedfilename, 'r') except: print('__status 3 Missing %s.'%obsoletedlist) sys.exit(3) currentonlynames=[] igtfonlynames=[] obsoletedonlynames=[] nagios_warning=0 nagios_critical=0 nagios_messages = [] def counted_noun(n, sg, pl = None): return '%d %s'%(n, n == 1 and sg or pl or sg + 's') def short_list(xs, limit = 4): if len(xs) <= limit: return ', '.join(xs) else: return ', '.join(xs[0:limit] + ['...']) def getreleasedateandversion(filename): """ Parse a EGI Trustanchor release file to get the release date and version """ try: releasefile = open(filename,'r') except: print("__status 3 Missing trustanchor release file %s."%filename) sys.exit(3) for line in releasefile: if re.match("^.*",line): [crud,spaces,xml1,fulldate,xml2,crud2]=re.split("^(.*)()([0-9]{8})().*$",line) if re.match("^.*",line): [crud,spaces,xml1,releaseversion,packaging,xml2,crud2]=re.split("^(.*)()([0-9]+.[0-9]+)(-[0-9]+)().*$",line) return [fulldate,releaseversion] def getnameversion(filename): """ Read the alias and version number from a """ """ CA distribution .info file """ if posixpath.islink(filename): return ["","",""] caname = "" version = "" sha1fp0 = "" try: inf = open(filename,'r') except: print("__status 3 Missing CA info file %s."%filename) sys.exit(3) for line in inf: if re.match('^alias.*',line): (junk, caname, _) = re.split(r'^alias\s*=\s*(.*)', line) #print caname if re.match('^version.*',line): (junk, version, _) = re.split(r'version\s*=\s*([0-9]+.[0-9]+)', line) #print version if re.match("^sha1fp\.0",line): (junk, sha1fp0, _) = re.split(r'sha1fp\.0\s*=\s*([0-9A-F:]{59})', line) #print sha1fp0 if ((caname != "") and (version != "") and (sha1fp0 != "")): return [caname,version,sha1fp0] return ["","",""] def load_pem(pem_file): with open(pem_file) as fp: current_type = None current_data = None for line in fp: line = line.strip() if line.startswith('-----BEGIN ') and line.endswith('-----'): if not current_type is None: raise ValueError( 'Invalid PEM file %s, nested BEGIN.', pem_file) current_type = line[11:-5] current_data = '' elif line.startswith('-----END ') and line.endswith('-----'): if not current_type == line[9:-5]: raise ValueError( 'Invalid PEM file %s, END %s should be END %s' % (pem_file, line[9:-5], current_type)) data = base64.b64decode(current_data.encode('ASCII', 'strict')) yield (current_type, data) current_type = None current_data = None else: if not current_type is None: current_data += line if not current_type is None: raise ValueError( 'Invalid PEM file %s, missing END %s.', pem_file, current_type) def load_pem_cert(pem_file): data = [d for t, d in load_pem(pem_file) if t == 'CERTIFICATE'] if len(data) == 0: raise ValueError('%s contains no certificate' % pem_file) if len(data) > 1: raise ValueError('%s contains multiple certificates' % pem_file) return data[0] def checksha1fp(infofile,infohash): """ Hunt down a certificate that matches the .info file (which might be a .0 or a .pem). Then read the data and compute the SHA1 fingerprint of it """ # If this is and old python, don't do hashing if not do_hashing: return True ifile="" newfilepem = re.sub('\.info$','.pem',infofile) newfile0 = re.sub('\.info$','.0',infofile) if (posixpath.exists(newfilepem)): ifile=newfilepem elif (posixpath.exists(newfile0)): ifile=newfile0 if (ifile == ""): return False cleanedhash = re.sub(':','',infohash.lower()) try: data = load_pem_cert(ifile) except Exception as xc: log.warn('%s: %s'%(ifile, xc)) return False hash = hashlib.sha1(data) if (hash.hexdigest() == cleanedhash): return True else: return False [releasedate,releaseversion] = getreleasedateandversion(releasefilename) for line in igtflist: if re.match('^ca_.*',line): [nonce, ca, caname, version, _endline] = re.split('(^ca_)(.*)-([0-9]+.[0-9]+-[0-9]+)$',line) igtfonlynames.append(caname) for line in obsoletedlist: if re.match('^[A-Za-z0-9].*',line): [nonce, obsolete, endline] = re.split('(^[A-Za-z0-9].*$)',line) obsoletedonlynames.append(obsolete) [junk,ryear,rmonth,rdate,_] = re.split("([0-9]{4})([0-9]{2})([0-9]{2})",releasedate) today=datetime.date.today() releasedate=datetime.date(int(ryear),int(rmonth),int(rdate)) difference=today-releasedate allinfos=glob.glob(ca_cert_location+'/*.info') present_obsolete = [] present_by_version = {} for cfile in allinfos: [caname,version,sha1fp0]=getnameversion(cfile); if (caname != ""): currentonlynames.append(caname) if ((caname in igtfonlynames) and (difference.days > days_warning) and (version < releaseversion)): if (difference.days > days_warning): nagios_warning += 1 if (difference.days > days_critical): nagios_critical += 1 if not version in present_by_version: present_by_version[version] = [] present_by_version[version].append(caname) if (caname in obsoletedonlynames): if (difference.days > days_warning): nagios_warning += 1 present_obsolete.append(caname) result=checksha1fp(cfile,sha1fp0) if (not result): nagios_critical += 1 nagios_messages.append("SHA Fingerprint failed for %s."%caname) nagios_issues = [] for version, canames in present_by_version.items(): nagios_issues.append( 'found %s from %s (%s)' % (counted_noun(len(canames), 'CA'), version, short_list(canames))) log.error('CAs from %s: %s'%(version, ', '.join(canames))) if present_obsolete: nagios_issues.append( '%s obsolete (%s)' % (counted_noun(len(present_obsolete), 'CA is', 'CAs are'), short_list(present_obsolete))) log.error('Obsolete CAs: %s'%', '.join(present_obsolete)) missingcas=[i for i in igtfonlynames if i not in currentonlynames] if (len(missingcas) > 0): if (difference.days > days_warning): nagios_warning += 1 nagios_issues.append( 'missing %s (%s)' % (counted_noun(len(missingcas), 'CA'), short_list(missingcas))) log.error('Missing CAs: %s'%', '.join(missingcas)) if not nagios_issues: nagios_issues.append('all present') status = nagios_critical and 2 or nagios_warning and 1 or 0 msg = ', '.join(nagios_issues) print("__status %d IGTF-%s, %s old, %s." \ % (status, releaseversion, counted_noun(difference.days, 'day'), msg)) for msg in nagios_messages: print("__log 40 %s" % msg) nordugrid-arc-nagios-plugins-3.1.1/MANIFEST.in0000644000175000002070000000074515002373741021677 0ustar mockbuildmock00000000000000include plugins/check_* include config/20-dist.ini include config/20-dist.d/*.xrsl.j2 include config/60-egi.ini include config/60-egi.d/*.xrsl.j2 include config/60-egi.d/*.py include nordugrid-arc-nagios-plugins.spec* include jobscripts/* include README.rst LICENSE AUTHORS NOTICE include doc/Makefile include doc/conf.py include doc/*.example include doc/*.rst include doc/media/*.png include debian/* include debian/source/format include Makefile include VERSION include MANIFEST.in nordugrid-arc-nagios-plugins-3.1.1/plugins/0000755000175000002070000000000015002373741021614 5ustar mockbuildmock00000000000000nordugrid-arc-nagios-plugins-3.1.1/plugins/check_arcrest_info0000755000175000002070000002323015002373741025355 0ustar mockbuildmock00000000000000#! /usr/bin/python3 # pylint: disable=R0903 from __future__ import annotations import argparse import codecs import json import ssl import sys import traceback from typing import Any, Optional, List, NoReturn import urllib.request # Data Representation # ------------------- class InfoParseError(ValueError): def __init__(self, msg: str, data: Any): super().__init__(msg) self.data = data class ComputingShare: id: str name: str associated_endpoints: List[ComputingEndpoint] def __init__(self, data: Any): self.id = data["ID"] # pylint: disable=C0103 self.name = data["Name"] self.associated_endpoints = [] def add_endpoint(self, endpoint: ComputingEndpoint): self.associated_endpoints.append(endpoint) class ComputingEndpoint: health_state: str health_state_info: Optional[str] interface_name: str def __init__(self, data: Any): self.id = data["ID"] # pylint: disable=C0103 self.name = data["Name"] self.health_state = data["HealthState"] self.health_state_info = data.get("HealthStateInfo") self.interface_name = data["InterfaceName"] def __str__(self): return self.id def _fixup_list(xs): # It seems that singleton lists are reported as bare elements. This applies # at least to ComputingShare and Associations/ComputingShareID, but likely # applies generally. if isinstance(xs, list): return xs return [xs] class ComputingService: def __init__(self, data: Any): # pylint: disable=W0707 self.endpoints_by_id = {} self.shares_by_id = {} try: data = data["Domains"]["AdminDomain"]["Services"]["ComputingService"] except IndexError: raise InfoParseError("ComputingService not found.", data) shares_data = _fixup_list(data.get("ComputingShare", [])) endpoints_data = _fixup_list(data.get("ComputingEndpoint", [])) for share_data in shares_data: try: share = ComputingShare(share_data) self.shares_by_id[share.id] = share except (KeyError, ValueError, TypeError) as exn: raise InfoParseError( f"Failed to parse share: {exn}", share_data) for endpoint_data in endpoints_data: try: endpoint = ComputingEndpoint(endpoint_data) self.endpoints_by_id[endpoint.id] = endpoint for share_id in _fixup_list( endpoint_data.get("Associations", {}) .get("ComputingShareID", [])): self.shares_by_id[share_id].add_endpoint(endpoint) except (KeyError, ValueError, TypeError) as exn: raise InfoParseError( f"Failed to parse endpoint: {exn}", endpoint_data) def dump(self): print("Found endpoints:") for endpoint in self.endpoints_by_id.values(): print(f"- {endpoint}") for share in self.shares_by_id.values(): print(f"Found share {share.id} using endpoints:") for endpoint in share.associated_endpoints: print(f"- {endpoint}") # Checks # ------ class ServiceError: def __init__(self, brief_message: str, full_message: Optional[str] = None): self.brief_message = brief_message self.full_message = full_message def check_service( service: ComputingService, *, min_endpoint_count: int, min_share_count: int, required_interfaces: List[int]): errors = [] def critical(*args, **kwargs): errors.append(ServiceError(*args, **kwargs)) # Check that we have at least one share and at least one endpoint. if len(service.shares_by_id) < min_share_count: critical(f"Only {len(service.shares_by_id)} " f"of {min_share_count} ComputingShare(s) found.") if len(service.endpoints_by_id) < min_endpoint_count: critical(f"Only {len(service.endpoints_by_id)} " f"of {min_endpoint_count} ComputingEndpoint(s) found.") # Check that all endpoints are active. for endpoint in service.endpoints_by_id.values(): if endpoint.health_state != "ok": msg = f"Endpoint {endpoint.id} is {endpoint.health_state}" if endpoint.health_state_info: critical(msg, msg + ": " + endpoint.health_state_info) else: critical(msg) # Check that all shares have at least one active endpoint. for share in service.shares_by_id.values(): endpoints = [ endpoint for endpoint in share.associated_endpoints if endpoint.health_state == "ok" ] if not endpoints: critical(f"Share {share.id} has no working endpoint.") interfaces = set(endpoint.interface_name for endpoint in endpoints) for interface in set(required_interfaces).difference(interfaces): critical(f"Interface {interface} missing for share {share.id}.") # Report errors and exit. if errors: if len(errors) == 1: print(errors[0].brief_message) if errors[0].full_message: print(errors[0].full_message) else: print("Multiple errors found, see details.") for error in errors: print(error.full_message or error.brief_message) return 2 print("No problems found.") return 0 # Main Program # ------------ def report_uncaught_exception_and_exit(msg_while: str) -> NoReturn: print(f"Uncaught exception while {msg_while}") traceback.print_exc(file=sys.stdout) sys.exit(3) def report_parse_error_and_exit(exn: InfoParseError, data: Any) -> NoReturn: print(exn) traceback.print_exc() print("The unparsed JSON fragment is:") print(json.dumps(exn.data, indent=4, sort_keys=True)) print("The full JSON document is:") print(json.dumps(data, indent=4, sort_keys=True)) sys.exit(2) def fetch_service_or_exit( url: str, tls_ca_dir: Optional[str] = None, tls_key: Optional[str] = None, tls_cert: Optional[str] = None): try: context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) if tls_ca_dir: context.load_verify_locations(capath=tls_ca_dir) if tls_key and tls_cert: context.load_cert_chain(keyfile=tls_key, certfile=tls_cert) headers = {"Accept": "application/json"} request = urllib.request.Request(url, headers=headers) with urllib.request.urlopen(request, context=context) as fh: data = json.load(codecs.getreader("utf-8")(fh)) except (urllib.error.HTTPError, urllib.error.URLError, ssl.SSLError, json.decoder.JSONDecodeError) as exn: print(f"Failed to fetch from {url}: {exn}") sys.exit(2) except: # pylint: disable=W0702 report_uncaught_exception_and_exit(f"fetching from {url}") try: return ComputingService(data) except InfoParseError as exn: report_parse_error_and_exit(exn, data) except: # pylint: disable=W0702 report_uncaught_exception_and_exit(f"processing data from {url}") def main(): argp = argparse.ArgumentParser( description=""" NAGIOS probe to check the status of an ARC CE using the org.nordugrid.arcrest interface. """) argp.add_argument("--host", "-H", type=str, help="host name of the CE to check") argp.add_argument("--port", "-P", type=int, help="port number of the information system endpoint") argp.add_argument("--endpoint", "-U", help="URL of the information system endpoint") argp.add_argument("--tls-ca-dir", type=str, default="/etc/grid-security/certificates", help="directory containing accepted X.509 CA certificates") argp.add_argument("--tls-cert", type=str, help="client certificate used to authenticate to the CE") argp.add_argument("--tls-key", type=str, help="client key used to authenticate to the CE") argp.add_argument("--require-min-share-count", type=int, default=1, help="require that there are at least this number of shares") argp.add_argument("--require-min-endpoint-count", type=int, default=1, help="require that there are at least this number of endpoints") argp.add_argument("--require-interface", type=str, nargs="*", default=[], help="require that there is an endpoint supporting the given " "interface for each share") argp.add_argument("--dump", action='store_true', help="dump some of the gathered information at the end of the " "output, for debugging or casual inspection") args = argp.parse_args() if not args.endpoint is None: endpoint = args.endpoint elif not args.host is None: if args.port is None: endpoint = f"https://{args.host}/arex/rest/1.0/info" else: endpoint = f"https://{args.host}:{args.port}/arex/rest/1.0/info" else: argp.error("Either --host/-H or --endpoint/-U is required.") service = fetch_service_or_exit( url=endpoint, tls_ca_dir=args.tls_ca_dir, tls_cert=args.tls_cert, tls_key=args.tls_key) exit_code = check_service( service, min_share_count=args.require_min_share_count, min_endpoint_count=args.require_min_endpoint_count, required_interfaces=args.require_interface) if args.dump: print("") print("## Data Dump ##") print("") service.dump() sys.exit(exit_code) if __name__ == "__main__": main() nordugrid-arc-nagios-plugins-3.1.1/plugins/check_arcce_clean0000755000175000002070000000062515002373741025121 0ustar mockbuildmock00000000000000#! /usr/bin/python3 # # ARCCE Job Probe: Cleanup import logging import sys try: from arcnagios.ce.check_arcce_clean import Check_arcce_clean except ImportError as exn: sys.stdout.write('UNKNOWN: Error loading modules : %s\n\n' 'sys.path = %s\n' % (exn, sys.path)) sys.exit(3) logging.basicConfig() # for manual invocation probe = Check_arcce_clean() probe.nagios_run() nordugrid-arc-nagios-plugins-3.1.1/plugins/check_arcservice0000755000175000002070000001104415002373741025025 0ustar mockbuildmock00000000000000#!/usr/bin/python3 # # Contributed by Nagy Zsombor. # Support for GLUE 2.0r1 by Gabor Roczei. # Released under the Apache License with permission from Gabor Roczei. # See also http://bugzilla.nordugrid.org/show_bug.cgi?id=1983. # # TODO: If this is still needed, move code into functions and fix pylint issues. # pylint: disable=all import getopt import signal import socket import sys import traceback import http.client import urllib.parse try: import xml.etree.ElementTree as ET except ImportError: import elementtree.ElementTree as ET # pylint: disable=import-error glue_schemas = [ 'http://schemas.ogf.org/glue/2008/05/spec_2.0_d41_r01', 'http://schemas.ogf.org/glue/2009/03/spec_2.0_r1', ] timeout = 10 def handler(signum, frame): # pylint: disable=unused-argument print("ARCSERVICE UNKNOWN: Check timed out after %s seconds" % timeout) sys.exit(3) signal.signal(signal.SIGALRM, handler) signal.alarm(timeout) def usage(): print("""Usage: check_arcservice -u -k -c -t --debug -u the URL of the service to check (mandatory) -t after this amount of seconds the check will return UNKNOWN (default: 10) -k path of the key file (default: /etc/grid-security/hostkey.pem) -c path of the cert file (default: /etc/grid-security/hostcert.pem) --debug print some debugging information """) sys.exit(3) try: options, args = getopt.getopt(sys.argv[1:],"u:k:c:t:",["debug"]) except getopt.GetoptError: usage() key_file = '/etc/grid-security/hostkey.pem' cert_file = '/etc/grid-security/hostcert.pem' url = '' debug = False for name, value in options: if name in ['-k']: key_file = value if name in ['-c']: cert_file = value if name in ['-u']: url = value if name in ['--debug']: debug = True if name in ['-t']: timeout = int(value) signal.alarm(timeout) if not key_file or not cert_file or not url: usage() try: parsed = urllib.parse.urlparse(url) https = (parsed[0] == 'https') hostport = parsed[1] path = parsed[2] except (ValueError, KeyError): print("ARCSERVICE UNKNOWN: Error parsing URL %s" % url) get_resource_property_document_request = ''' http://docs.oasis-open.org/wsrf/rpw-2/GetResourcePropertyDocument/GetResourcePropertyDocumentRequest ''' if https: import ssl ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2) ssl_context.load_cert_chain(certfile = cert_file, keyfile = key_file) connection = http.client.HTTPSConnection(host=hostport, context=ssl_context) else: connection = http.client.HTTPConnection(host=hostport) try: connection.request('POST', path, get_resource_property_document_request) except (socket.error, http.client.HTTPException) as exn: print("ARCSERVICE CRITICAL: Connecting to %s failed: %s" % (url, exn)) if debug: traceback.print_exc() sys.exit(2) response = connection.getresponse() if response.status == 200: data = response.read() try: et = ET.fromstring(data) health_state = None for glue_schema in glue_schemas: health_state = et.findtext(".//{%s}HealthState" % glue_schema) if health_state: break if health_state is None: print("ARCSERVICE UNKNOWN: Service's health state is unknown") if debug: print(data) sys.exit(3) if health_state == 'ok': print("ARCSERVICE OK: Service's health state is %s" % health_state) if debug: print(data) sys.exit(0) else: print("ARCSERVICE CRITICAL: Service's health state is %s" % health_state) if debug: print(data) sys.exit(2) except ET.ParseError as exn: print("ARCSERVICE UNKNOWN: Unable to parse respone (%s)" % exn) if debug: print(data) sys.exit(3) else: print("ARCSERVICE CRITICAL: Invalid response from server (%s, %s)" \ % (response.status, response.reason)) if debug: print(response.read()) sys.exit(2) nordugrid-arc-nagios-plugins-3.1.1/plugins/check_arcce_monitor0000755000175000002070000000063615002373741025530 0ustar mockbuildmock00000000000000#! /usr/bin/python3 # # ARCCE Job Probe: Monitoring import logging import sys try: from arcnagios.ce.check_arcce_monitor import Check_arcce_monitor except ImportError as exn: sys.stdout.write('UNKNOWN: Error loading modules : %s\n\n' 'sys.path = %s\n' % (exn, sys.path)) sys.exit(3) logging.basicConfig() # for manual invocation probe = Check_arcce_monitor() probe.nagios_run() nordugrid-arc-nagios-plugins-3.1.1/plugins/check_arcce_submit0000755000175000002070000000063315002373741025341 0ustar mockbuildmock00000000000000#! /usr/bin/python3 # # ARCCE Job Probe: Submission import logging import sys try: from arcnagios.ce.check_arcce_submit import Check_arcce_submit except ImportError as exn: sys.stdout.write('UNKNOWN: Error loading modules : %s\n\n' 'sys.path = %s\n' % (exn, sys.path)) sys.exit(3) logging.basicConfig() # for manual invocation probe = Check_arcce_submit() probe.nagios_run() nordugrid-arc-nagios-plugins-3.1.1/plugins/check_gridstorage0000755000175000002070000000053015002373741025207 0ustar mockbuildmock00000000000000#! /usr/bin/python3 import logging import sys try: from arcnagios.se.check_gridstorage import Check_gridstorage except ImportError as xc: sys.stdout.write('UNKNOWN: Error loading modules: %s\n\nsys.path = %r\n' %(xc, sys.path)) sys.exit(3) logging.basicConfig() probe = Check_gridstorage() probe.nagios_run() nordugrid-arc-nagios-plugins-3.1.1/debian/0000755000175000002070000000000015002373741021355 5ustar mockbuildmock00000000000000nordugrid-arc-nagios-plugins-3.1.1/debian/rules0000755000175000002070000000220615002373741022435 0ustar mockbuildmock00000000000000#!/usr/bin/make -f -include /usr/share/dpkg/buildflags.mk configure: configure-stamp : configure-stamp: dh_testdir touch $@ build: build-arch build-indep : build-arch: build-stamp : build-indep: build-stamp python3 setup.py build mkdir -p doc/_build/html doc/_static make -C doc html rm -f doc/_build/html/.buildinfo build-stamp: configure-stamp : touch $@ clean: dh_testdir dh_testroot dh_clean configure-stamp build-stamp install: build-stamp dh_testdir dh_testroot dh_prep python3 setup.py install --skip-build \ --install-layout=deb --prefix=/usr --root=debian/tmp rm -rf debian/tmp/usr/lib/python*/dist-packages/*.egg-info binary-indep: dh_testdir dh_testroot dh_installdirs dh_installdocs dh_install dh_installchangelogs [ -x /usr/bin/dh_sphinxdoc ] && dh_sphinxdoc || : dh_compress -X html dh_python3 [ -x /usr/bin/dh_lintian ] && dh_lintian || : dh_link dh_fixperms dh_missing --fail-missing dh_installdeb dh_gencontrol dh_md5sums dh_builddeb binary-arch: install : binary: binary-arch binary-indep : .PHONY: build-arch build-indep build clean binary-arch binary-indep binary install configure nordugrid-arc-nagios-plugins-3.1.1/debian/nordugrid-arc-nagios-plugins.install0000644000175000002070000000071015002373741030440 0ustar mockbuildmock00000000000000debian/tmp/etc/arc/nagios/20-dist.ini debian/tmp/etc/arc/nagios/20-dist.d/default.xrsl.j2 debian/tmp/usr/lib/nagios/plugins/check_arcce_clean debian/tmp/usr/lib/nagios/plugins/check_arcce_monitor debian/tmp/usr/lib/nagios/plugins/check_arcce_submit debian/tmp/usr/lib/nagios/plugins/check_arcrest_info debian/tmp/usr/lib/nagios/plugins/check_arcservice debian/tmp/usr/lib/nagios/plugins/check_gridstorage debian/tmp/usr/lib/python*/dist-packages/arcnagios nordugrid-arc-nagios-plugins-3.1.1/debian/source/0000755000175000002070000000000015002373741022655 5ustar mockbuildmock00000000000000nordugrid-arc-nagios-plugins-3.1.1/debian/source/format0000644000175000002070000000001415002373741024063 0ustar mockbuildmock000000000000003.0 (quilt) nordugrid-arc-nagios-plugins-3.1.1/debian/copyright0000644000175000002070000000235415002373741023314 0ustar mockbuildmock00000000000000This package was debianized by Anders Wäänänen on Mon, 26 Jan 2012 11:06:54 +0100. It was downloaded from http://www.nordugrid.org Upstream Authors: Petter Urkedal Anders Wäänänen Copyright: Copyright (C) 2006-2009 by the respective employers of the the above authors: University of Copenhagen, Denmark Niels Bohr Institute, Copenhagen, Denmark License: Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. On Debian systems, the complete text of the Apache version 2.0 license can be found in `/usr/share/common-licenses/Apache-2.0'. The Debian packaging was prepared by Anders Wäänänen of the upstream developers and is also licensed under the Apache 2.0 license. nordugrid-arc-nagios-plugins-3.1.1/debian/nordugrid-arc-nagios-plugins-doc.docs0000644000175000002070000000002015002373741030457 0ustar mockbuildmock00000000000000doc/_build/html nordugrid-arc-nagios-plugins-3.1.1/debian/nordugrid-arc-nagios-plugins.dirs0000644000175000002070000000007515002373741027737 0ustar mockbuildmock00000000000000etc/arc/nagios etc/arc/nagios/20-dist.d var/spool/arc/nagios nordugrid-arc-nagios-plugins-3.1.1/debian/compat0000644000175000002070000000000315002373741022554 0ustar mockbuildmock0000000000000012 nordugrid-arc-nagios-plugins-3.1.1/debian/nordugrid-arc-nagios-plugins-egi.dirs0000644000175000002070000000003015002373741030470 0ustar mockbuildmock00000000000000etc/arc/nagios/60-egi.d nordugrid-arc-nagios-plugins-3.1.1/debian/control0000644000175000002070000000254615002373741022767 0ustar mockbuildmock00000000000000Source: nordugrid-arc-nagios-plugins Section: net Priority: optional Maintainer: Mattias Ellert Uploaders: Anders Waananen Build-Depends: debhelper (>= 12), dh-python, python3, python3-setuptools, python3-sphinx X-Python-Version: 3.8.2 Standards-Version: 3.9.2 Vcs-Browser: http://svn.nordugrid.org/trac/nordugrid/browser/nagios Vcs-Svn: http://svn.nordugrid.org/repos/nordugrid/nagios Homepage: http://www.nordugrid.org Package: nordugrid-arc-nagios-plugins Architecture: all # FIXME: nordugrid-arc-client should be (>= 1.0.0 && << 6.0.0~ || >= 6.3.0~) Depends: ${misc:Depends}, ${python3:Depends}, python3, python3-cryptography, python3-jinja2, python3-ldap, nordugrid-arc-client (>= 6.5.0) Description: NorduGrid ARC Nagios plugins This package provides the Nagios plugins for testing ARC CE, using the ARC-1 API. Package: nordugrid-arc-nagios-plugins-doc Architecture: all Depends: ${misc:Depends}, ${python3:Depends}, ${sphinxdoc:Depends} Section: doc Description: NorduGrid ARC Nagios plugins documentation This package contains HTML documentation for the ARC nagios probes. Package: nordugrid-arc-nagios-plugins-egi Architecture: all Depends: ${misc:Depends}, ${python3:Depends} Description: EGI configuration and dependencies for the ARC Nagios plugins This package contains EGI specific probe(s) and configuration. nordugrid-arc-nagios-plugins-3.1.1/debian/nordugrid-arc-nagios-plugins.docs0000644000175000002070000000011515002373741027721 0ustar mockbuildmock00000000000000AUTHORS NOTICE README.rst doc/arcnagios.ini.example doc/services.cfg.example nordugrid-arc-nagios-plugins-3.1.1/debian/changelog0000644000175000002070000001245015002373741023231 0ustar mockbuildmock00000000000000nordugrid-arc-nagios-plugins (3.1.1-1) unstable; urgency=low * New upstream release 3.1.1. -- Petter Urkedal Thu, 24 Apr 2025 10:25:00 +0200 nordugrid-arc-nagios-plugins (3.1.0-1) unstable; urgency=low * New upstream release 3.1.0. -- Petter Urkedal Mon, 07 Apr 2025 11:20:00 +0200 nordugrid-arc-nagios-plugins (3.0.0-1) unstable; urgency=low * New upstream release 3.0.0. -- Petter Urkedal Tue, 25 Mar 2025 10:10:00 +0100 nordugrid-arc-nagios-plugins (3.0.0~rc8-1) unstable; urgency=low * New upstream release candidate 3.0.0rc8. -- Petter Urkedal Wed, 15 Jan 2025 13:10:00 +0200 nordugrid-arc-nagios-plugins (3.0.0~rc7-1) unstable; urgency=low * New upstream release candidate 3.0.0rc7. -- Petter Urkedal Wed, 13 Nov 2024 11:45:00 +0200 nordugrid-arc-nagios-plugins (3.0.0~rc6-1) unstable; urgency=low * New upstream release candidate 3.0.0rc6. -- Petter Urkedal Tue, 22 Oct 2024 13:13:35 +0200 nordugrid-arc-nagios-plugins (3.0.0~rc5-1) unstable; urgency=low * New upstream release candidate 3.0.0rc5. -- Anders Waananen Wed, 09 Oct 2024 14:35:01 +0200 nordugrid-arc-nagios-plugins (2.0.1-1) unstable; urgency=low * New upstream release 2.0.1. -- Petter Urkedal Fri, 03 Nov 2023 16:15:00 +0100 nordugrid-arc-nagios-plugins (2.0.1~rc2-1) unstable; urgency=low * New upstream release candidate 2.0.1rc2. -- Anders Waananen Thu, 02 Nov 2023 17:38:06 +0100 nordugrid-arc-nagios-plugins (2.0.1~rc1-1) unstable; urgency=low * New upstream release candidate 2.0.1rc1. -- Petter Urkedal Thu, 25 Oct 2023 12:40:00 +0200 nordugrid-arc-nagios-plugins (2.0.0-1) unstable; urgency=low * New upstream release 2.0.0. -- Petter Urkedal Thu, 14 Nov 2019 10:47:58 +0100 nordugrid-arc-nagios-plugins (2.0.0~rc3-1) unstable; urgency=low * New upstream release candidate 2.0.0rc3. -- Petter Urkedal Mon, 14 Oct 2019 12:33:43 +0200 nordugrid-arc-nagios-plugins (2.0.0~rc2-1) unstable; urgency=low * New upstream release candidate 2.0.0rc2. -- Petter Urkedal Wed, 02 Oct 2019 13:49:02 +0200 nordugrid-arc-nagios-plugins (2.0.0~rc1-1) unstable; urgency=low * New upstream release candidate 2.0.0rc1. -- Petter Urkedal Thu, 25 Apr 2019 11:28:52 +0200 nordugrid-arc-nagios-plugins (1.9.1~rc1-1) unstable; urgency=low * New upstream release candidate 1.9.1rc1. -- Petter Urkedal Thu, 15 Jun 2017 17:07:26 +0200 nordugrid-arc-nagios-plugins (1.9.0-1) unstable; urgency=low * New upstream release. -- Anders Waananen Wed, 31 May 2017 09:50:19 +0200 nordugrid-arc-nagios-plugins (1.9.0~rc1-1) unstable; urgency=low * New upstream release. -- Anders Waananen Tue, 25 Apr 2017 21:19:12 +0200 nordugrid-arc-nagios-plugins (1.8.4-1) unstable; urgency=low * New upstream release. -- Anders Waananen Fri, 11 Sep 2015 10:38:38 +0200 nordugrid-arc-nagios-plugins (1.8.3-1) unstable; urgency=low * New upstream release. -- Anders Waananen Thu, 02 Jul 2015 15:01:23 +0200 nordugrid-arc-nagios-plugins (1.8.2-1) unstable; urgency=low * New upstream release. -- Anders Waananen Fri, 27 Mar 2015 15:05:18 +0100 nordugrid-arc-nagios-plugins (1.8.2~rc2-1) unstable; urgency=low * New upstream release. -- Anders Waananen Thu, 15 Jan 2015 12:28:01 +0100 nordugrid-arc-nagios-plugins (1.8.2~rc1-1) unstable; urgency=low * New upstream release. -- Anders Waananen Fri, 09 Jan 2015 13:50:12 +0100 nordugrid-arc-nagios-plugins (1.8.1-1) unstable; urgency=low * New upstream release. -- Anders Waananen Fri, 15 Aug 2014 22:44:44 +0200 nordugrid-arc-nagios-plugins (1.8.1~rc1-1) unstable; urgency=low * New upstream release. -- Anders Waananen Sun, 06 Jul 2014 22:37:39 +0200 nordugrid-arc-nagios-plugins (1.8.0-1) unstable; urgency=low * New upstream release. -- Anders Waananen Wed, 30 Apr 2014 21:18:03 +0200 nordugrid-arc-nagios-plugins (1.7.0-1) unstable; urgency=low * New upstream release. -- Anders Waananen Mon, 25 Nov 2013 21:25:48 +0100 nordugrid-arc-nagios-plugins (1.6.0-1) unstable; urgency=low * New upstream release. -- Anders Waananen Sat, 20 Apr 2013 00:55:28 +0200 nordugrid-arc-nagios-plugins (1.6.0~rc1-1) unstable; urgency=low * New upstream release. -- Anders Waananen Wed, 10 Apr 2013 19:10:25 +0200 nordugrid-arc-nagios-plugins (1.5.0-1) unstable; urgency=low * New upstream release. -- Anders Waananen Wed, 20 Feb 2013 01:22:27 +0100 nordugrid-arc-nagios-plugins (1.4.0~rc1-1) unstable; urgency=low * 1.4.0 Release candidate 1 -- Anders Waananen Wed, 28 Nov 2012 12:51:47 +0100 nordugrid-arc-nagios-plugins (1.3.11-1) unstable; urgency=low * New upstream release. -- Anders Waananen Mon, 29 Oct 2012 18:22:25 +0100 nordugrid-arc-nagios-plugins (1.3.10-1) unstable; urgency=low * New upstream release. -- Anders Waananen Wed, 26 Sep 2012 17:08:36 +0200 nordugrid-arc-nagios-plugins-3.1.1/debian/nordugrid-arc-nagios-plugins-egi.install0000644000175000002070000000012715002373741031204 0ustar mockbuildmock00000000000000debian/tmp/etc/arc/nagios/60-egi.ini debian/tmp/etc/arc/nagios/60-egi.d/arcce_igtf.py* nordugrid-arc-nagios-plugins-3.1.1/nordugrid-arc-nagios-plugins.spec.in0000644000175000002070000002313715002373741027117 0ustar mockbuildmock00000000000000# Disable debuginfo since there are no binaries %global debug_package %{nil} %{!?enable_doc: %global enable_doc 1} %global site org.nordugrid %global nagios_bindir %{_libdir}/nagios/plugins %global arc_spooldir %{_localstatedir}/spool/arc %global pkg_spooldir %{arc_spooldir}/nagios %global pkg_sysconfdir %{_sysconfdir}/arc/nagios %if 0%{?rhel} == 8 %global __python3 /usr/bin/python3.8 %endif %{!?__python3: %global __python3 python3} %{!?python3_sitelib: %global python3_sitelib %(%{__python3} -c 'from distutils import sysconfig; print(sysconfig.get_python_lib())')} Name: nordugrid-arc-nagios-plugins Version: @BASEVERSION@ Release: @RPMRELEASE@%{?dist} Summary: Nagios plugins for ARC Group: System Environment/Daemons License: ASL 2.0 URL: http://www.nordugrid.org Source0: http://download.nordugrid.org/packages/%{name}/releases/%{version}/src/%{name}-%{version}@PREVERSION@.tar.gz BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) Requires: (nordugrid-arc-client >= 6.5.0 or nordugrid-arc6-client >= 6.5.0 or nordugrid-arc7-client) Requires: nagios-plugins BuildRequires: make %if 0%{?rhel} == 8 Requires: python38-cryptography Requires: python38-jinja2 Requires: python38-ldap BuildRequires: python38-setuptools %else Requires: python3-cryptography Requires: python3-jinja2 Requires: python3-ldap BuildRequires: python3-setuptools %endif %if %{enable_doc} BuildRequires: python3-sphinx %endif %description This package provides the Nagios plugins for testing ARC CE, using the ARC-1 API. %if %{enable_doc} %package doc Summary: HTML documentation for ARC 1 Nagios plugins Group: Documentation BuildArch: noarch %endif %if %{enable_doc} %description doc HTML documentation for %{name}. %endif %package egi Summary: EGI configuration and dependencies for the ARC Nagios plugins Group: System Environment/Daemons BuildArch: noarch Requires: (nordugrid-arc-plugins-arcrest >= 6.5.0 or nordugrid-arc6-plugins-arcrest >= 6.5.0 or nordugrid-arc-plugins-needed >= 7.0.0 or nordugrid-arc7-plugins-needed) %description egi EGI configuration and dependencies for the ARC Nagios plugins %prep %setup -q -n %{name}-%{version}@PREVERSION@ %build %{__python3} setup.py build %if %{enable_doc} mkdir -p doc/_build/html doc/_static make -C doc html rm -f doc/_build/html/.buildinfo %endif %install test %{buildroot} != / && rm -rf %{buildroot} %{__python3} setup.py install --root=%{buildroot} --skip-build install -m755 -d %{buildroot}%{_sysconfdir}/nagios/plugins install -m755 -d %{buildroot}%{pkg_spooldir} %clean test %{buildroot} != / && rm -rf %{buildroot} %files %dir %{pkg_sysconfdir} %dir %{pkg_sysconfdir}/20-dist.d %config(noreplace) %{pkg_sysconfdir}/20-dist.ini %config(noreplace) %{pkg_sysconfdir}/20-dist.d/default.xrsl.j2 %{nagios_bindir}/check_arcce_clean %{nagios_bindir}/check_arcce_monitor %{nagios_bindir}/check_arcce_submit %{nagios_bindir}/check_arcrest_info %{nagios_bindir}/check_arcservice %{nagios_bindir}/check_gridstorage %{python3_sitelib}/arcnagios %{python3_sitelib}/nordugrid_arc_nagios_plugins-*.egg-info %dir %{arc_spooldir} %attr(-,nagios,nagios) %{pkg_spooldir} %doc AUTHORS README.rst LICENSE NOTICE %doc doc/arcnagios.ini.example %doc doc/services.cfg.example %if %{enable_doc} %files doc %doc AUTHORS README.rst LICENSE NOTICE %doc doc/_build/html %endif %files egi %doc AUTHORS README.rst LICENSE NOTICE %dir %{pkg_sysconfdir}/60-egi.d %config(noreplace) %{pkg_sysconfdir}/60-egi.ini # FIXME: Prevent rpmbuild from generating these compiled objects: %config(noreplace) %{pkg_sysconfdir}/60-egi.d/arcce_igtf.py* %changelog * Thu Apr 24 2025 Petter Urkedal - 3.1.1-1 - New upstream release 3.1.1. * Mon Apr 07 2025 Petter Urkedal - 3.1.0-1 - New upstream release 3.1.0. * Tue Mar 25 2025 Petter Urkedal - 3.0.0-1 - New upstream release 3.0.0. * Wed Jan 15 2025 Petter Urkedal - 3.0.0-0.rc8 - New upstream release candidate 3.0.0rc8. * Wed Nov 13 2024 Petter Urkedal - 3.0.0-0.rc7 - New upstream release candidate 3.0.0rc7. * Tue Oct 22 2024 Petter Urkedal - 3.0.0-0.rc6 - New upstream release candidate 3.0.0rc6. * Wed Oct 09 2024 Anders Waananen - 3.0.0-0.rc5 - New upstream release candidate 3.0.0rc5. * Wed Oct 09 2024 Petter Urkedal - 3.0.0-0.rc4 - New upstream release candidate 3.0.0rc4. * Fri Nov 03 2023 Petter Urkedal - 2.0.1-1 - New upstream release 2.0.1. * Thu Nov 02 2023 Anders Waananen - 2.0.1-0.rc2 - New upstream release candidate 2.0.1rc2. * Wed Oct 25 2023 Petter Urkedal - 2.0.1-0.rc1 - New upstream release candidate 2.0.1rc1. * Thu Nov 14 2019 Petter Urkedal - 2.0.0-1 - New upstream release 2.0.0. * Mon Oct 14 2019 Petter Urkedal - 2.0.0-0.rc3 - New upstream release candidate 2.0.0rc3. * Wed Oct 02 2019 Petter Urkedal - 2.0.0-0.rc2 - New upstream release candidate 2.0.0rc2. * Thu Apr 25 2019 Petter Urkedal - 2.0.0-0.rc1 - New upstream release candidate 2.0.0rc1. * Thu Jun 15 2017 Petter Urkedal - 1.9.1-0.rc1 - New upstream release candidate 1.9.1rc1. * Wed May 31 2017 Anders Waananen - 1.9.0-1 - Updated to release 1.9.0. * Tue Apr 25 2017 Petter Urkedal - 1.9.0-0.rc1 - Updated to release candidate 1.9.0rc1. * Fri Sep 11 2015 Petter Urkedal - 1.8.4-1 - Updated to release 1.8.4. * Mon Jul 06 2015 Anders Waananen - 1.8.3-2 - Drop doc subpackage for el5 due to missing dependencies * Thu Jul 02 2015 Petter Urkedal - 1.8.3-1 - Updated to release 1.8.3. * Fri Mar 27 2015 Petter Urkedal - 1.8.2-1 - Updated to release 1.8.2. * Thu Jan 15 2015 Petter Urkedal - 1.8.2-0.rc2 - Updated to release candidate 1.8.2rc2. * Fri Jan 09 2015 Petter Urkedal - 1.8.2-0.rc1 - Updated to release candidate 1.8.2rc1. * Fri Aug 15 2014 Anders Waananen - 1.8.1-1 - Updated to release 1.8.1. * Fri Jun 27 2014 Petter Urkedal - 1.8.1-0.rc1 - Updated to release candidate 1.8.1rc1. * Wed Apr 30 2014 Petter Urkedal - 1.8.0-1 - Updated to release 1.8.0. * Tue Oct 22 2013 Petter Urkedal - 1.7.1-1 - Updated to release 1.7.1. * Fri Aug 16 2013 Petter Urkedal - 1.7.0-1 - Updated to release 1.7.0. * Fri Jul 05 2013 Petter Urkedal - 1.6.1-0.rc1 - Updated to release candidate 1.6.1rc1. * Fri Apr 19 2013 Petter Urkedal - 1.6.0-1 - Updated to release 1.6.0. * Sat Apr 06 2013 Petter Urkedal - 1.6.0-0.rc1 - Updated to release candidate 1.6.0rc1. * Mon Feb 18 2013 Petter Urkedal - 1.5.0-1 - Updated to release 1.5.0. * Fri Feb 01 2013 Petter Urkedal - 1.5.0-0.rc3 - Updated to release candidate 1.5.0rc3. * Mon Jan 28 2013 Petter Urkedal - 1.5.0-0.rc2 - Updated to release candidate 1.5.0rc2. * Fri Jan 11 2013 Petter Urkedal - 1.5.0-0.rc1 - Updated to release candidate 1.5.0rc1. * Thu Dec 20 2012 Petter Urkedal - 1.4.0-0.rc4 - Updated to release candidate 1.4.0rc4. * Tue Nov 27 2012 Petter Urkedal - 1.4.0-0.rc1 - Updated to release candidate 1.4.0rc1. * Mon Oct 29 2012 Petter Urkedal - 1.3.11-1 - Updated to release 1.3.11. * Wed Sep 26 2012 Petter Urkedal - 1.3.10-1 - Updated to release 1.3.10. * Fri Sep 07 2012 Petter Urkedal - 1.3.9-1 - Updated to release 1.3.9. * Mon Apr 23 2012 Petter Urkedal - 1.3.8-1 - Updated to release 1.3.8. * Tue Apr 03 2012 Petter Urkedal - 1.3.7-1 - Updated to release 1.3.7. * Mon Apr 02 2012 Petter Urkedal - 1.3.6-1 - Updated to release 1.3.6. * Thu Feb 02 2012 Petter Urkedal - 1.3.5-1 - Updated to release 1.3.5. * Thu Feb 02 2012 Petter Urkedal - 1.3.4-1 - Updated to release 1.3.4. * Thu Feb 02 2012 Petter Urkedal - 1.3.3-1 - Updated to release 1.3.3. * Wed Dec 21 2011 Petter Urkedal - 1.3.2-1 - Updated to release 1.3.2. * Mon Dec 19 2011 Petter Urkedal - 1.3.1-1 - Updated to release 1.3.1. * Thu Dec 08 2011 Petter Urkedal - 1.3.0-1 - Updated to release 1.3.0. * Wed Nov 23 2011 Petter Urkedal - 1.2.0-1 - Updated to release 1.2.0. * Mon Nov 14 2011 Petter Urkedal - Change README to README.rst. - Add documentation subpackage. * Fri Nov 04 2011 Petter Urkedal - 1.1.0-1 - Updated to release 1.1.0. * Thu Nov 03 2011 Petter Urkedal - Install default configuration file. * Wed Oct 26 2011 Petter Urkedal - 1.0.2-1 - Updated to release 1.0.2. * Thu Oct 20 2011 Petter Urkedal - 1.0.1-1 - Updated to release 1.0.1. * Tue Oct 18 2011 Petter Urkedal - Add argparse and nordugrid-arc-python dependencies. - Install README and LICENSE. * Fri Oct 14 2011 Petter Urkedal - 1.0-1 - Updated to release 1.0. - Almost complete rewrite for the new probes. * Fri Sep 30 2011 Anders Waananen - 0.9-1 - New package name and ownership * Thu Jun 30 2011 Mattias Ellert - 0.4-1 - Fix flags to stat * Thu Nov 18 2010 Mattias Ellert - 0.3-1 - Implement changes proposed by Emir * Mon Oct 11 2010 Mattias Ellert - 0.2-1 - Remove Requires (per WLCG practice) * Thu Sep 23 2010 Mattias Ellert - 0.1-1 - Initial packaging nordugrid-arc-nagios-plugins-3.1.1/setup.cfg0000644000175000002070000000004615002373741021754 0ustar mockbuildmock00000000000000[egg_info] tag_build = tag_date = 0 nordugrid-arc-nagios-plugins-3.1.1/AUTHORS0000644000175000002070000000072615002373741021210 0ustar mockbuildmock00000000000000Individual contributors ----------------------- Zsombor Nagy Gábor Rőczei Ulf Tigerstedt Petter Urkedal Anders Wäänänen Organizations employing contributors ------------------------------------ University of Copenhagen (Denmark) CSC - IT Center for Science Ltd (Finland) NIIFI - Nemzeti Információs Infrastruktúra Fejlesztési Intézet (Hungary) NordForsk (Norway) nordugrid-arc-nagios-plugins-3.1.1/LICENSE0000644000175000002070000002367615002373741021156 0ustar mockbuildmock00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS nordugrid-arc-nagios-plugins-3.1.1/PKG-INFO0000644000175000002070000000044215002373741021230 0ustar mockbuildmock00000000000000Metadata-Version: 1.1 Name: nordugrid-arc-nagios-plugins Version: 3.1.1 Summary: Nagios Probes for Arc CEs Home-page: http://www.nordugrid.org/ Author: Petter Urkedal Author-email: urkedal@nbi.dk License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN Requires: cryptography Requires: ldap nordugrid-arc-nagios-plugins-3.1.1/NOTICE0000644000175000002070000000167315002373741021046 0ustar mockbuildmock00000000000000ARC Nagios Plugins ------------------ This product includes Nagios plugins for ARC services. The software is developed by the NorduGrid collaboration (http://www.nordugrid.org) with financial support from the European Commission. Unless stated otherwise, the Copyright is collectively owned by individual contributors and contributing organisations as listed in the AUTHORS file. The software is licensed under the Apache License, Version 2.0 (the "License"); you may not use files from this software distribution except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. nordugrid-arc-nagios-plugins-3.1.1/nordugrid_arc_nagios_plugins.egg-info/0000755000175000002070000000000015002373741027550 5ustar mockbuildmock00000000000000nordugrid-arc-nagios-plugins-3.1.1/nordugrid_arc_nagios_plugins.egg-info/top_level.txt0000644000175000002070000000001215002373741032273 0ustar mockbuildmock00000000000000arcnagios nordugrid-arc-nagios-plugins-3.1.1/nordugrid_arc_nagios_plugins.egg-info/SOURCES.txt0000644000175000002070000000335515002373741031442 0ustar mockbuildmock00000000000000AUTHORS LICENSE MANIFEST.in Makefile NOTICE README.rst VERSION nordugrid-arc-nagios-plugins.spec nordugrid-arc-nagios-plugins.spec.in setup.py arcnagios/__init__.py arcnagios/arcclients.py arcnagios/arcutils.py arcnagios/confargparse.py arcnagios/nagutils.py arcnagios/persistence.py arcnagios/reputation.py arcnagios/rescheduler.py arcnagios/substitution.py arcnagios/utils.py arcnagios/vomsutils.py arcnagios/ce/__init__.py arcnagios/ce/check_arcce_clean.py arcnagios/ce/check_arcce_monitor.py arcnagios/ce/check_arcce_submit.py arcnagios/ce/jobplugin.py arcnagios/ce/jobutils.py arcnagios/ce/jobplugins/__init__.py arcnagios/ce/jobplugins/scripted.py arcnagios/ce/jobplugins/staging.py arcnagios/se/__init__.py arcnagios/se/check_gridstorage.py config/20-dist.ini config/60-egi.ini config/20-dist.d/default.xrsl.j2 config/60-egi.d/arcce_igtf.py debian/changelog debian/compat debian/control debian/copyright debian/nordugrid-arc-nagios-plugins-doc.docs debian/nordugrid-arc-nagios-plugins-egi.dirs debian/nordugrid-arc-nagios-plugins-egi.install debian/nordugrid-arc-nagios-plugins.dirs debian/nordugrid-arc-nagios-plugins.docs debian/nordugrid-arc-nagios-plugins.install debian/rules debian/source/format doc/Makefile doc/arcce.rst doc/arcnagios.ini.example doc/conf.py doc/gridstorage.rst doc/index.rst doc/infosys.rst doc/intro.rst doc/sample_config.rst doc/services.cfg.example doc/media/ng-logo.png nordugrid_arc_nagios_plugins.egg-info/PKG-INFO nordugrid_arc_nagios_plugins.egg-info/SOURCES.txt nordugrid_arc_nagios_plugins.egg-info/dependency_links.txt nordugrid_arc_nagios_plugins.egg-info/top_level.txt plugins/check_arcce_clean plugins/check_arcce_monitor plugins/check_arcce_submit plugins/check_arcrest_info plugins/check_arcservice plugins/check_gridstoragenordugrid-arc-nagios-plugins-3.1.1/nordugrid_arc_nagios_plugins.egg-info/PKG-INFO0000644000175000002070000000044215002373741030645 0ustar mockbuildmock00000000000000Metadata-Version: 1.1 Name: nordugrid-arc-nagios-plugins Version: 3.1.1 Summary: Nagios Probes for Arc CEs Home-page: http://www.nordugrid.org/ Author: Petter Urkedal Author-email: urkedal@nbi.dk License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN Requires: cryptography Requires: ldap nordugrid-arc-nagios-plugins-3.1.1/nordugrid_arc_nagios_plugins.egg-info/dependency_links.txt0000644000175000002070000000000115002373741033616 0ustar mockbuildmock00000000000000 nordugrid-arc-nagios-plugins-3.1.1/VERSION0000644000175000002070000000000615002373741021177 0ustar mockbuildmock000000000000003.1.1