#!/usr/bin/env python # # Copyright (c) 2014 Catalyst.net Ltd # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # """ Check various aspects of a running BIRD daemon (interface states, several routing protocols) by interrogating the daemon via its control socket. Intended to be run from nagios. Michael Fincham . """ import argparse import re import socket import sys BIRD_CONTROL_SOCKET="/run/bird/bird.ctl" NAGIOS_OK = 0 NAGIOS_WARNING = 1 NAGIOS_CRITICAL = 2 NAGIOS_UNKNOWN = 3 class BirdChecker(object): def __init__(self, control_socket, ignore): self.control_socket = control_socket self.ignore = ignore # mostly from pybird.py def _send_query(self, query): """ Open a socket to the BIRD control socket, send the query and get the raw response. """ sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) sock.connect(self.control_socket) sock.send("%s\n" % query) data = '' prev_data = None while (data.find("\n0000") == -1) and (data.find("\n8003") == -1) and (data.find("\n0013") == -1) and (data.find("\n9001") == -1) and (data.find("\n8001") == -1): data += sock.recv(1024) if data == prev_data: raise ValueError("Could not read additional data from BIRD") prev_data = data sock.close() return str(data) def _interface_flags(self): """ Parse the output of 'show interfaces' and return a dict containing all interfaces with their set flags. The raw data looks like: 1001-br-nat up (index=19) 1004- MultiAccess Broadcast Multicast AdminUp LinkUp MTU=1500 1003- 192.0.2.X/32 (Primary, scope univ) 192.0.2.X/32 (Unselected, scope univ) 192.0.2.X/32 (Unselected, scope univ) 192.0.2.X/32 (Unselected, scope univ) Will be turned in to: {'br-nat': ['MultiAccess', 'Broadcast', 'Multicast', 'AdminUp', 'LinkUp', 'MTU=1500']} """ interface_data = self._send_query('show interfaces') interfaces = {} current_interface = '' for line in iter(interface_data.splitlines(False)): if line.startswith('1001-'): # line describes the interface current_interface = line[5:].split()[0] elif line.startswith('1004-'): # line describes the interface flags interfaces[current_interface] = line.strip().split()[1:] return interfaces def _generic_protocol_properties(self, command, protocol_id, columns, index_column): """ Parse the output of an arbitrary command that returns output similar to 'show bfd sessions' and return a dict containing all protocols' sessions with their properties. The raw data looks like: 1020-bfd1: IP address Interface State Since Interval Timeout 192.0.2.X 192.0.2.X Will be turned in to: {'bfd1': {'192.0.2.X 'interval': '0.100', 'since': '2014-10-06', 'state': 'Up', 'timeout': '0.500'}, '192.0.2.X 'interval': '0.100', 'since': '2014-09-24', 'state': 'Up', 'timeout': '0.500'}}} When called as: _generic_protocol_properties('show bfd sessions', 1020, ('ip_address', 'interface', 'state', 'since', 'interval', 'timeout'), 'ip_address') """ protocol_data = self._send_query(command) protocols = {} current_protocol = '' skip_line = False for line in iter(protocol_data.splitlines(False)): if skip_line: skip_line = False continue elif line.startswith('%i-' % protocol_id): # line marking the start of a protocol current_protocol = line[5:].split()[0][:-1] protocols[current_protocol] = {} skip_line = True # skip next line (the column headers) elif line.startswith(' '): # hopefully a protocol record properties = {} line = line.strip().split(None, len(columns)) for column_number, column_name in enumerate(columns): properties[column_name] = line[column_number] protocols.pop(index_column, None) protocols[current_protocol][properties[index_column]] = dict((k, v) for (k, v) in properties.iteritems() if k != index_column) if self.ignore is not None: return({k:v for k,v in protocols.iteritems() if not self.ignore.search(k)}) else: return(protocols) def _protocol_properties(self): # XXX this is a very cargo cult function... tidy it """ Parse the output of 'show protocols' and return a dict containing all protocols' sessions with their properties. The raw data looks like: 2002-name proto table state since info 1002-bogons Static master up 2014-10-29 kernel1 Kernel master up 2014-10-29 device1 Device master up 2014-10-29 edge Direct master up 2014-10-29 bfd1 BFD master up 2014-10-29 aggregates Static master up 2014-10-29 blackholes Static master up 2014-10-29 core OSPF master up 2014-10-29 Running 0000 Will be turned in to: {'aggregates': {'info': '', 'proto': 'Static', 'since': '2014-10-29', 'state': 'up', 'table': 'master'}, (and so on...) When called as: _protocol_properties() """ columns = ('name', 'proto', 'table', 'state', 'since', 'info') index_column = 'name' protocol_data = self._send_query('show protocols') instances = {} for line in iter(protocol_data.splitlines(False)): if line.startswith('2002-'): # column names continue elif line.startswith('1002-') or line.startswith(' '): # hopefully a protocol record properties = {} if line.startswith('1002-'): line = line[5:] line = line.strip().split(None, len(columns)) for column_number, column_name in enumerate(columns): try: properties[column_name] = line[column_number] except: properties[column_name] = '' instance_name = properties['name'] properties.pop('name', None) instances[instance_name] = properties if self.ignore is not None: return({k:v for k,v in instances.iteritems() if not self.ignore.search(k)}) else: return(instances) def check_interfaces(self): """ Check that all interfaces are either up or disabled, returns a tuple of (nagios_code, reason). """ disabled = [] down = [] up = [] unknown = [] for interface, flags in self._interface_flags().iteritems(): if 'AdminDown' in flags: disabled.append(interface) elif 'LinkDown' in flags: down.append(interface) elif 'LinkUp' in flags: up.append(interface) else: unknown.append(interface) if down: return((NAGIOS_CRITICAL, '%i down (%s)' % (len(down), ', '.join(down)))) if unknown: return((NAGIOS_UNKNOWN, '%i unknown (%s)' % (len(unknown), ', '.join(unknown)))) if up and not disabled: return((NAGIOS_OK, '%i up' % len(up))) elif up and disabled: return((NAGIOS_OK, '%i up, %i disabled' % (len(up), len(disabled)))) else: return((NAGIOS_OK, 'no interfaces')) def check_bfd(self): """ Check that all configured BFD sessions are 'up'. """ down = [] up = [] bfd_sessions = self._generic_protocol_properties('show bfd sessions', 1020, ('ip_address', 'interface', 'state', 'since', 'interval', 'timeout'), 'ip_address') for protocol, sessions in bfd_sessions.iteritems(): for neighbor, properties in sessions.iteritems(): if properties['state'] != 'Up': down.append(neighbor) else: up.append(neighbor) if down: return((NAGIOS_CRITICAL, '%i down (%s)' % (len(down), ', '.join(down)))) if up: return((NAGIOS_OK, '%i up' % len(up))) else: return((NAGIOS_OK, 'BFD not running')) def check_ospf(self): """ Check that all configured OSPF neighbors are 'Full'. """ down = [] up = [] for protocol, sessions in self._generic_protocol_properties('show ospf neighbors', 1013, ('router_id', 'pri', 'state', 'dtime', 'interface', 'router_ip'), 'router_id').iteritems(): for neighbor, properties in sessions.iteritems(): if not properties['state'].lower().startswith('full/'): down.append(neighbor) else: up.append(neighbor) if down: return((NAGIOS_CRITICAL, '%i down (%s)' % (len(down), ', '.join(down)))) if up: return((NAGIOS_OK, '%i up' % len(up))) else: return((NAGIOS_OK, 'OSPF not running')) def check_bgp(self): """ Check that all configured BGP neighbors are 'Established'. """ down = [] up = [] disabled = [] protocol_instances = self._protocol_properties() for instance, properties in protocol_instances.iteritems(): if properties['proto'] == 'BGP' and properties['state'] == 'down': disabled.append(instance) continue if properties['proto'] == 'BGP' and properties['info'] != 'Established': down.append(instance) elif properties['proto'] == 'BGP': up.append(instance) if down: return((NAGIOS_CRITICAL, '%i down (%s)' % (len(down), ', '.join(down)))) if up and not disabled: return((NAGIOS_OK, '%i up' % len(up))) elif up and disabled: return((NAGIOS_OK, '%i up, %i disabled' % (len(up), len(disabled)))) else: return((NAGIOS_OK, 'BGP not running')) def check_proto(self, protocol): """ Check that the queried protocol is 'up'. """ protocol_instances = self._protocol_properties() if protocol not in protocol_instances: return (NAGIOS_CRITICAL, "%s not found"%protocol) properties = protocol_instances[protocol] if properties['state'] == 'down': return (NAGIOS_OK, "%s is disabled"%(protocol)) if properties['state'] != 'up': return (NAGIOS_CRITICAL, "%s is not up (%s)\nInfo: %s"%(protocol, properties["state"], properties["info"])) return (NAGIOS_OK, "%s is up\nInfo: %s"%(protocol, properties["info"])) if __name__ == "__main__": parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('check', choices=('interfaces', 'bfd', 'ospf', 'bgp', 'single_protocol'), help='which check to run') parser.add_argument('--control-socket', default=BIRD_CONTROL_SOCKET, help='location of BIRD control socket, defaults to %s' % BIRD_CONTROL_SOCKET) parser.add_argument('--ignore', default=None, help='if supplied, a regular expression of protocol names which should be ignored') parser.add_argument("--protocol", default=None, help="the protocol to check if used with 'single_protocol'") args = parser.parse_args() try: ignore = re.compile(args.ignore) except: ignore = None checker = BirdChecker(args.control_socket, ignore) try: if args.check == 'interfaces': return_code, description = checker.check_interfaces() elif args.check == 'bfd': return_code, description = checker.check_bfd() elif args.check == 'ospf': return_code, description = checker.check_ospf() elif args.check == 'bgp': return_code, description = checker.check_bgp() elif args.check == 'single_protocol': return_code, description = checker.check_proto(args.protocol) else: raise NotImplementedError("no check matching '%s'" % args.check) # this will only happen if not all possible choices in argparse are not implemented except socket.error: sys.stdout.write("error: could not connect to bird\n") sys.exit(NAGIOS_UNKNOWN) sys.stdout.write("%s\n" % description) sys.exit(return_code)