389 lines
14 KiB
Python
Executable File
389 lines
14 KiB
Python
Executable File
#!/usr/bin/env python
|
|
#
|
|
# Copyright (c) 2014 Catalyst.net Ltd
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
#
|
|
|
|
"""
|
|
Check various aspects of a running BIRD daemon (interface states, several routing protocols) by interrogating the
|
|
daemon via its control socket. Intended to be run from nagios.
|
|
|
|
Michael Fincham <michael.fincham@catalyst.net.nz>.
|
|
"""
|
|
|
|
import argparse
|
|
import re
|
|
import socket
|
|
import sys
|
|
|
|
BIRD_CONTROL_SOCKET="/run/bird/bird.ctl"
|
|
|
|
NAGIOS_OK = 0
|
|
NAGIOS_WARNING = 1
|
|
NAGIOS_CRITICAL = 2
|
|
NAGIOS_UNKNOWN = 3
|
|
|
|
class BirdChecker(object):
|
|
|
|
def __init__(self, control_socket, ignore):
|
|
self.control_socket = control_socket
|
|
self.ignore = ignore
|
|
|
|
# mostly from pybird.py
|
|
def _send_query(self, query):
|
|
"""
|
|
Open a socket to the BIRD control socket, send the query and get
|
|
the raw response.
|
|
"""
|
|
|
|
sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
|
|
sock.connect(self.control_socket)
|
|
sock.send("%s\n" % query)
|
|
|
|
data = ''
|
|
prev_data = None
|
|
|
|
while (data.find("\n0000") == -1) and (data.find("\n8003") == -1) and (data.find("\n0013") == -1) and (data.find("\n9001") == -1) and (data.find("\n8001") == -1):
|
|
data += sock.recv(1024)
|
|
if data == prev_data:
|
|
raise ValueError("Could not read additional data from BIRD")
|
|
prev_data = data
|
|
|
|
sock.close()
|
|
return str(data)
|
|
|
|
def _interface_flags(self):
|
|
"""
|
|
Parse the output of 'show interfaces' and return a dict containing all
|
|
interfaces with their set flags.
|
|
|
|
The raw data looks like:
|
|
|
|
1001-br-nat up (index=19)
|
|
1004- MultiAccess Broadcast Multicast AdminUp LinkUp MTU=1500
|
|
1003- 192.0.2.X/32 (Primary, scope univ)
|
|
192.0.2.X/32 (Unselected, scope univ)
|
|
192.0.2.X/32 (Unselected, scope univ)
|
|
192.0.2.X/32 (Unselected, scope univ)
|
|
|
|
Will be turned in to:
|
|
|
|
{'br-nat': ['MultiAccess', 'Broadcast', 'Multicast', 'AdminUp', 'LinkUp', 'MTU=1500']}
|
|
"""
|
|
|
|
interface_data = self._send_query('show interfaces')
|
|
|
|
interfaces = {}
|
|
current_interface = ''
|
|
|
|
for line in iter(interface_data.splitlines(False)):
|
|
if line.startswith('1001-'): # line describes the interface
|
|
current_interface = line[5:].split()[0]
|
|
elif line.startswith('1004-'): # line describes the interface flags
|
|
interfaces[current_interface] = line.strip().split()[1:]
|
|
|
|
return interfaces
|
|
|
|
def _generic_protocol_properties(self, command, protocol_id, columns, index_column):
|
|
"""
|
|
Parse the output of an arbitrary command that returns output similar to 'show bfd sessions'
|
|
and return a dict containing all protocols' sessions with their properties.
|
|
|
|
The raw data looks like:
|
|
|
|
1020-bfd1:
|
|
IP address Interface State Since Interval Timeout
|
|
192.0.2.X
|
|
192.0.2.X
|
|
|
|
Will be turned in to:
|
|
|
|
{'bfd1': {'192.0.2.X
|
|
'interval': '0.100',
|
|
'since': '2014-10-06',
|
|
'state': 'Up',
|
|
'timeout': '0.500'},
|
|
'192.0.2.X
|
|
'interval': '0.100',
|
|
'since': '2014-09-24',
|
|
'state': 'Up',
|
|
'timeout': '0.500'}}}
|
|
|
|
When called as:
|
|
|
|
_generic_protocol_properties('show bfd sessions', 1020, ('ip_address', 'interface', 'state', 'since', 'interval', 'timeout'), 'ip_address')
|
|
"""
|
|
|
|
protocol_data = self._send_query(command)
|
|
protocols = {}
|
|
current_protocol = ''
|
|
|
|
skip_line = False
|
|
|
|
for line in iter(protocol_data.splitlines(False)):
|
|
if skip_line:
|
|
skip_line = False
|
|
continue
|
|
elif line.startswith('%i-' % protocol_id): # line marking the start of a protocol
|
|
current_protocol = line[5:].split()[0][:-1]
|
|
protocols[current_protocol] = {}
|
|
skip_line = True # skip next line (the column headers)
|
|
elif line.startswith(' '): # hopefully a protocol record
|
|
properties = {}
|
|
|
|
line = line.strip().split(None, len(columns))
|
|
for column_number, column_name in enumerate(columns):
|
|
properties[column_name] = line[column_number]
|
|
protocols.pop(index_column, None)
|
|
protocols[current_protocol][properties[index_column]] = dict((k, v) for (k, v) in properties.iteritems() if k != index_column)
|
|
|
|
if self.ignore is not None:
|
|
return({k:v for k,v in protocols.iteritems() if not self.ignore.search(k)})
|
|
else:
|
|
return(protocols)
|
|
|
|
def _protocol_properties(self):
|
|
# XXX this is a very cargo cult function... tidy it
|
|
"""
|
|
Parse the output of 'show protocols' and return a dict containing all protocols' sessions with their properties.
|
|
|
|
The raw data looks like:
|
|
|
|
2002-name proto table state since info
|
|
1002-bogons Static master up 2014-10-29
|
|
kernel1 Kernel master up 2014-10-29
|
|
device1 Device master up 2014-10-29
|
|
edge Direct master up 2014-10-29
|
|
bfd1 BFD master up 2014-10-29
|
|
aggregates Static master up 2014-10-29
|
|
blackholes Static master up 2014-10-29
|
|
core OSPF master up 2014-10-29 Running
|
|
0000
|
|
|
|
Will be turned in to:
|
|
|
|
{'aggregates': {'info': '',
|
|
'proto': 'Static',
|
|
'since': '2014-10-29',
|
|
'state': 'up',
|
|
'table': 'master'},
|
|
(and so on...)
|
|
|
|
When called as:
|
|
|
|
_protocol_properties()
|
|
"""
|
|
|
|
columns = ('name', 'proto', 'table', 'state', 'since', 'info')
|
|
index_column = 'name'
|
|
|
|
protocol_data = self._send_query('show protocols')
|
|
|
|
instances = {}
|
|
|
|
for line in iter(protocol_data.splitlines(False)):
|
|
if line.startswith('2002-'): # column names
|
|
continue
|
|
elif line.startswith('1002-') or line.startswith(' '): # hopefully a protocol record
|
|
properties = {}
|
|
|
|
if line.startswith('1002-'):
|
|
line = line[5:]
|
|
|
|
line = line.strip().split(None, len(columns))
|
|
for column_number, column_name in enumerate(columns):
|
|
try:
|
|
properties[column_name] = line[column_number]
|
|
except:
|
|
properties[column_name] = ''
|
|
|
|
instance_name = properties['name']
|
|
properties.pop('name', None)
|
|
instances[instance_name] = properties
|
|
|
|
if self.ignore is not None:
|
|
return({k:v for k,v in instances.iteritems() if not self.ignore.search(k)})
|
|
else:
|
|
return(instances)
|
|
|
|
def check_interfaces(self):
|
|
"""
|
|
Check that all interfaces are either up or disabled, returns a tuple of (nagios_code, reason).
|
|
"""
|
|
|
|
disabled = []
|
|
down = []
|
|
up = []
|
|
unknown = []
|
|
|
|
for interface, flags in self._interface_flags().iteritems():
|
|
if 'AdminDown' in flags:
|
|
disabled.append(interface)
|
|
elif 'LinkDown' in flags:
|
|
down.append(interface)
|
|
elif 'LinkUp' in flags:
|
|
up.append(interface)
|
|
else:
|
|
unknown.append(interface)
|
|
|
|
if down:
|
|
return((NAGIOS_CRITICAL, '%i down (%s)' % (len(down), ', '.join(down))))
|
|
|
|
if unknown:
|
|
return((NAGIOS_UNKNOWN, '%i unknown (%s)' % (len(unknown), ', '.join(unknown))))
|
|
|
|
if up and not disabled:
|
|
return((NAGIOS_OK, '%i up' % len(up)))
|
|
elif up and disabled:
|
|
return((NAGIOS_OK, '%i up, %i disabled' % (len(up), len(disabled))))
|
|
else:
|
|
return((NAGIOS_OK, 'no interfaces'))
|
|
|
|
def check_bfd(self):
|
|
"""
|
|
Check that all configured BFD sessions are 'up'.
|
|
"""
|
|
|
|
down = []
|
|
up = []
|
|
|
|
bfd_sessions = self._generic_protocol_properties('show bfd sessions', 1020, ('ip_address', 'interface', 'state', 'since', 'interval', 'timeout'), 'ip_address')
|
|
|
|
for protocol, sessions in bfd_sessions.iteritems():
|
|
for neighbor, properties in sessions.iteritems():
|
|
if properties['state'] != 'Up':
|
|
down.append(neighbor)
|
|
else:
|
|
up.append(neighbor)
|
|
|
|
if down:
|
|
return((NAGIOS_CRITICAL, '%i down (%s)' % (len(down), ', '.join(down))))
|
|
|
|
if up:
|
|
return((NAGIOS_OK, '%i up' % len(up)))
|
|
else:
|
|
return((NAGIOS_OK, 'BFD not running'))
|
|
|
|
def check_ospf(self):
|
|
"""
|
|
Check that all configured OSPF neighbors are 'Full'.
|
|
"""
|
|
|
|
down = []
|
|
up = []
|
|
|
|
for protocol, sessions in self._generic_protocol_properties('show ospf neighbors', 1013, ('router_id', 'pri', 'state', 'dtime', 'interface', 'router_ip'), 'router_id').iteritems():
|
|
for neighbor, properties in sessions.iteritems():
|
|
if not properties['state'].lower().startswith('full/'):
|
|
down.append(neighbor)
|
|
else:
|
|
up.append(neighbor)
|
|
|
|
if down:
|
|
return((NAGIOS_CRITICAL, '%i down (%s)' % (len(down), ', '.join(down))))
|
|
|
|
if up:
|
|
return((NAGIOS_OK, '%i up' % len(up)))
|
|
else:
|
|
return((NAGIOS_OK, 'OSPF not running'))
|
|
|
|
def check_bgp(self):
|
|
"""
|
|
Check that all configured BGP neighbors are 'Established'.
|
|
"""
|
|
|
|
down = []
|
|
up = []
|
|
disabled = []
|
|
|
|
protocol_instances = self._protocol_properties()
|
|
|
|
for instance, properties in protocol_instances.iteritems():
|
|
|
|
if properties['proto'] == 'BGP' and properties['state'] == 'down':
|
|
disabled.append(instance)
|
|
continue
|
|
|
|
if properties['proto'] == 'BGP' and properties['info'] != 'Established':
|
|
down.append(instance)
|
|
elif properties['proto'] == 'BGP':
|
|
up.append(instance)
|
|
|
|
if down:
|
|
return((NAGIOS_CRITICAL, '%i down (%s)' % (len(down), ', '.join(down))))
|
|
|
|
if up and not disabled:
|
|
return((NAGIOS_OK, '%i up' % len(up)))
|
|
elif up and disabled:
|
|
return((NAGIOS_OK, '%i up, %i disabled' % (len(up), len(disabled))))
|
|
else:
|
|
return((NAGIOS_OK, 'BGP not running'))
|
|
|
|
def check_proto(self, protocol):
|
|
"""
|
|
Check that the queried protocol is 'up'.
|
|
"""
|
|
|
|
protocol_instances = self._protocol_properties()
|
|
|
|
if protocol not in protocol_instances:
|
|
return (NAGIOS_CRITICAL, "%s not found"%protocol)
|
|
|
|
properties = protocol_instances[protocol]
|
|
|
|
if properties['state'] == 'down':
|
|
return (NAGIOS_OK, "%s is disabled"%(protocol))
|
|
|
|
if properties['state'] != 'up':
|
|
return (NAGIOS_CRITICAL, "%s is not up (%s)\nInfo: %s"%(protocol, properties["state"], properties["info"]))
|
|
|
|
return (NAGIOS_OK, "%s is up\nInfo: %s"%(protocol, properties["info"]))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
parser = argparse.ArgumentParser(description=__doc__)
|
|
parser.add_argument('check', choices=('interfaces', 'bfd', 'ospf', 'bgp', 'single_protocol'), help='which check to run')
|
|
parser.add_argument('--control-socket', default=BIRD_CONTROL_SOCKET, help='location of BIRD control socket, defaults to %s' % BIRD_CONTROL_SOCKET)
|
|
parser.add_argument('--ignore', default=None, help='if supplied, a regular expression of protocol names which should be ignored')
|
|
parser.add_argument("--protocol", default=None, help="the protocol to check if used with 'single_protocol'")
|
|
args = parser.parse_args()
|
|
|
|
try:
|
|
ignore = re.compile(args.ignore)
|
|
except:
|
|
ignore = None
|
|
|
|
checker = BirdChecker(args.control_socket, ignore)
|
|
|
|
try:
|
|
if args.check == 'interfaces':
|
|
return_code, description = checker.check_interfaces()
|
|
elif args.check == 'bfd':
|
|
return_code, description = checker.check_bfd()
|
|
elif args.check == 'ospf':
|
|
return_code, description = checker.check_ospf()
|
|
elif args.check == 'bgp':
|
|
return_code, description = checker.check_bgp()
|
|
elif args.check == 'single_protocol':
|
|
return_code, description = checker.check_proto(args.protocol)
|
|
else:
|
|
raise NotImplementedError("no check matching '%s'" % args.check) # this will only happen if not all possible choices in argparse are not implemented
|
|
except socket.error:
|
|
sys.stdout.write("error: could not connect to bird\n")
|
|
sys.exit(NAGIOS_UNKNOWN)
|
|
|
|
sys.stdout.write("%s\n" % description)
|
|
sys.exit(return_code)
|