diff --git a/check_bird b/check_bird
new file mode 100755
index 0000000..5cc2f3c
--- /dev/null
+++ b/check_bird
@@ -0,0 +1,388 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2014 Catalyst.net Ltd
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+
+"""
+Check various aspects of a running BIRD daemon (interface states, several routing protocols) by interrogating the
+daemon via its control socket. Intended to be run from nagios.
+
+Michael Fincham .
+"""
+
+import argparse
+import re
+import socket
+import sys
+
+BIRD_CONTROL_SOCKET="/run/bird/bird.ctl"
+
+NAGIOS_OK = 0
+NAGIOS_WARNING = 1
+NAGIOS_CRITICAL = 2
+NAGIOS_UNKNOWN = 3
+
+class BirdChecker(object):
+
+ def __init__(self, control_socket, ignore):
+ self.control_socket = control_socket
+ self.ignore = ignore
+
+ # mostly from pybird.py
+ def _send_query(self, query):
+ """
+ Open a socket to the BIRD control socket, send the query and get
+ the raw response.
+ """
+
+ sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+ sock.connect(self.control_socket)
+ sock.send("%s\n" % query)
+
+ data = ''
+ prev_data = None
+
+ while (data.find("\n0000") == -1) and (data.find("\n8003") == -1) and (data.find("\n0013") == -1) and (data.find("\n9001") == -1) and (data.find("\n8001") == -1):
+ data += sock.recv(1024)
+ if data == prev_data:
+ raise ValueError("Could not read additional data from BIRD")
+ prev_data = data
+
+ sock.close()
+ return str(data)
+
+ def _interface_flags(self):
+ """
+ Parse the output of 'show interfaces' and return a dict containing all
+ interfaces with their set flags.
+
+ The raw data looks like:
+
+ 1001-br-nat up (index=19)
+ 1004- MultiAccess Broadcast Multicast AdminUp LinkUp MTU=1500
+ 1003- 192.0.2.X/32 (Primary, scope univ)
+ 192.0.2.X/32 (Unselected, scope univ)
+ 192.0.2.X/32 (Unselected, scope univ)
+ 192.0.2.X/32 (Unselected, scope univ)
+
+ Will be turned in to:
+
+ {'br-nat': ['MultiAccess', 'Broadcast', 'Multicast', 'AdminUp', 'LinkUp', 'MTU=1500']}
+ """
+
+ interface_data = self._send_query('show interfaces')
+
+ interfaces = {}
+ current_interface = ''
+
+ for line in iter(interface_data.splitlines(False)):
+ if line.startswith('1001-'): # line describes the interface
+ current_interface = line[5:].split()[0]
+ elif line.startswith('1004-'): # line describes the interface flags
+ interfaces[current_interface] = line.strip().split()[1:]
+
+ return interfaces
+
+ def _generic_protocol_properties(self, command, protocol_id, columns, index_column):
+ """
+ Parse the output of an arbitrary command that returns output similar to 'show bfd sessions'
+ and return a dict containing all protocols' sessions with their properties.
+
+ The raw data looks like:
+
+ 1020-bfd1:
+ IP address Interface State Since Interval Timeout
+ 192.0.2.X
+ 192.0.2.X
+
+ Will be turned in to:
+
+ {'bfd1': {'192.0.2.X
+ 'interval': '0.100',
+ 'since': '2014-10-06',
+ 'state': 'Up',
+ 'timeout': '0.500'},
+ '192.0.2.X
+ 'interval': '0.100',
+ 'since': '2014-09-24',
+ 'state': 'Up',
+ 'timeout': '0.500'}}}
+
+ When called as:
+
+ _generic_protocol_properties('show bfd sessions', 1020, ('ip_address', 'interface', 'state', 'since', 'interval', 'timeout'), 'ip_address')
+ """
+
+ protocol_data = self._send_query(command)
+ protocols = {}
+ current_protocol = ''
+
+ skip_line = False
+
+ for line in iter(protocol_data.splitlines(False)):
+ if skip_line:
+ skip_line = False
+ continue
+ elif line.startswith('%i-' % protocol_id): # line marking the start of a protocol
+ current_protocol = line[5:].split()[0][:-1]
+ protocols[current_protocol] = {}
+ skip_line = True # skip next line (the column headers)
+ elif line.startswith(' '): # hopefully a protocol record
+ properties = {}
+
+ line = line.strip().split(None, len(columns))
+ for column_number, column_name in enumerate(columns):
+ properties[column_name] = line[column_number]
+ protocols.pop(index_column, None)
+ protocols[current_protocol][properties[index_column]] = dict((k, v) for (k, v) in properties.iteritems() if k != index_column)
+
+ if self.ignore is not None:
+ return({k:v for k,v in protocols.iteritems() if not self.ignore.search(k)})
+ else:
+ return(protocols)
+
+ def _protocol_properties(self):
+ # XXX this is a very cargo cult function... tidy it
+ """
+ Parse the output of 'show protocols' and return a dict containing all protocols' sessions with their properties.
+
+ The raw data looks like:
+
+ 2002-name proto table state since info
+ 1002-bogons Static master up 2014-10-29
+ kernel1 Kernel master up 2014-10-29
+ device1 Device master up 2014-10-29
+ edge Direct master up 2014-10-29
+ bfd1 BFD master up 2014-10-29
+ aggregates Static master up 2014-10-29
+ blackholes Static master up 2014-10-29
+ core OSPF master up 2014-10-29 Running
+ 0000
+
+ Will be turned in to:
+
+ {'aggregates': {'info': '',
+ 'proto': 'Static',
+ 'since': '2014-10-29',
+ 'state': 'up',
+ 'table': 'master'},
+ (and so on...)
+
+ When called as:
+
+ _protocol_properties()
+ """
+
+ columns = ('name', 'proto', 'table', 'state', 'since', 'info')
+ index_column = 'name'
+
+ protocol_data = self._send_query('show protocols')
+
+ instances = {}
+
+ for line in iter(protocol_data.splitlines(False)):
+ if line.startswith('2002-'): # column names
+ continue
+ elif line.startswith('1002-') or line.startswith(' '): # hopefully a protocol record
+ properties = {}
+
+ if line.startswith('1002-'):
+ line = line[5:]
+
+ line = line.strip().split(None, len(columns))
+ for column_number, column_name in enumerate(columns):
+ try:
+ properties[column_name] = line[column_number]
+ except:
+ properties[column_name] = ''
+
+ instance_name = properties['name']
+ properties.pop('name', None)
+ instances[instance_name] = properties
+
+ if self.ignore is not None:
+ return({k:v for k,v in instances.iteritems() if not self.ignore.search(k)})
+ else:
+ return(instances)
+
+ def check_interfaces(self):
+ """
+ Check that all interfaces are either up or disabled, returns a tuple of (nagios_code, reason).
+ """
+
+ disabled = []
+ down = []
+ up = []
+ unknown = []
+
+ for interface, flags in self._interface_flags().iteritems():
+ if 'AdminDown' in flags:
+ disabled.append(interface)
+ elif 'LinkDown' in flags:
+ down.append(interface)
+ elif 'LinkUp' in flags:
+ up.append(interface)
+ else:
+ unknown.append(interface)
+
+ if down:
+ return((NAGIOS_CRITICAL, '%i down (%s)' % (len(down), ', '.join(down))))
+
+ if unknown:
+ return((NAGIOS_UNKNOWN, '%i unknown (%s)' % (len(unknown), ', '.join(unknown))))
+
+ if up and not disabled:
+ return((NAGIOS_OK, '%i up' % len(up)))
+ elif up and disabled:
+ return((NAGIOS_OK, '%i up, %i disabled' % (len(up), len(disabled))))
+ else:
+ return((NAGIOS_OK, 'no interfaces'))
+
+ def check_bfd(self):
+ """
+ Check that all configured BFD sessions are 'up'.
+ """
+
+ down = []
+ up = []
+
+ bfd_sessions = self._generic_protocol_properties('show bfd sessions', 1020, ('ip_address', 'interface', 'state', 'since', 'interval', 'timeout'), 'ip_address')
+
+ for protocol, sessions in bfd_sessions.iteritems():
+ for neighbor, properties in sessions.iteritems():
+ if properties['state'] != 'Up':
+ down.append(neighbor)
+ else:
+ up.append(neighbor)
+
+ if down:
+ return((NAGIOS_CRITICAL, '%i down (%s)' % (len(down), ', '.join(down))))
+
+ if up:
+ return((NAGIOS_OK, '%i up' % len(up)))
+ else:
+ return((NAGIOS_OK, 'BFD not running'))
+
+ def check_ospf(self):
+ """
+ Check that all configured OSPF neighbors are 'Full'.
+ """
+
+ down = []
+ up = []
+
+ for protocol, sessions in self._generic_protocol_properties('show ospf neighbors', 1013, ('router_id', 'pri', 'state', 'dtime', 'interface', 'router_ip'), 'router_id').iteritems():
+ for neighbor, properties in sessions.iteritems():
+ if not properties['state'].lower().startswith('full/'):
+ down.append(neighbor)
+ else:
+ up.append(neighbor)
+
+ if down:
+ return((NAGIOS_CRITICAL, '%i down (%s)' % (len(down), ', '.join(down))))
+
+ if up:
+ return((NAGIOS_OK, '%i up' % len(up)))
+ else:
+ return((NAGIOS_OK, 'OSPF not running'))
+
+ def check_bgp(self):
+ """
+ Check that all configured BGP neighbors are 'Established'.
+ """
+
+ down = []
+ up = []
+ disabled = []
+
+ protocol_instances = self._protocol_properties()
+
+ for instance, properties in protocol_instances.iteritems():
+
+ if properties['proto'] == 'BGP' and properties['state'] == 'down':
+ disabled.append(instance)
+ continue
+
+ if properties['proto'] == 'BGP' and properties['info'] != 'Established':
+ down.append(instance)
+ elif properties['proto'] == 'BGP':
+ up.append(instance)
+
+ if down:
+ return((NAGIOS_CRITICAL, '%i down (%s)' % (len(down), ', '.join(down))))
+
+ if up and not disabled:
+ return((NAGIOS_OK, '%i up' % len(up)))
+ elif up and disabled:
+ return((NAGIOS_OK, '%i up, %i disabled' % (len(up), len(disabled))))
+ else:
+ return((NAGIOS_OK, 'BGP not running'))
+
+ def check_proto(self, protocol):
+ """
+ Check that the queried protocol is 'up'.
+ """
+
+ protocol_instances = self._protocol_properties()
+
+ if protocol not in protocol_instances:
+ return (NAGIOS_CRITICAL, "%s not found"%protocol)
+
+ properties = protocol_instances[protocol]
+
+ if properties['state'] == 'down':
+ return (NAGIOS_OK, "%s is disabled"%(protocol))
+
+ if properties['state'] != 'up':
+ return (NAGIOS_CRITICAL, "%s is not up (%s)\nInfo: %s"%(protocol, properties["state"], properties["info"]))
+
+ return (NAGIOS_OK, "%s is up\nInfo: %s"%(protocol, properties["info"]))
+
+
+if __name__ == "__main__":
+
+ parser = argparse.ArgumentParser(description=__doc__)
+ parser.add_argument('check', choices=('interfaces', 'bfd', 'ospf', 'bgp', 'single_protocol'), help='which check to run')
+ parser.add_argument('--control-socket', default=BIRD_CONTROL_SOCKET, help='location of BIRD control socket, defaults to %s' % BIRD_CONTROL_SOCKET)
+ parser.add_argument('--ignore', default=None, help='if supplied, a regular expression of protocol names which should be ignored')
+ parser.add_argument("--protocol", default=None, help="the protocol to check if used with 'single_protocol'")
+ args = parser.parse_args()
+
+ try:
+ ignore = re.compile(args.ignore)
+ except:
+ ignore = None
+
+ checker = BirdChecker(args.control_socket, ignore)
+
+ try:
+ if args.check == 'interfaces':
+ return_code, description = checker.check_interfaces()
+ elif args.check == 'bfd':
+ return_code, description = checker.check_bfd()
+ elif args.check == 'ospf':
+ return_code, description = checker.check_ospf()
+ elif args.check == 'bgp':
+ return_code, description = checker.check_bgp()
+ elif args.check == 'single_protocol':
+ return_code, description = checker.check_proto(args.protocol)
+ else:
+ raise NotImplementedError("no check matching '%s'" % args.check) # this will only happen if not all possible choices in argparse are not implemented
+ except socket.error:
+ sys.stdout.write("error: could not connect to bird\n")
+ sys.exit(NAGIOS_UNKNOWN)
+
+ sys.stdout.write("%s\n" % description)
+ sys.exit(return_code)