add chekc_bird
This commit is contained in:
388
check_bird
Executable file
388
check_bird
Executable file
@@ -0,0 +1,388 @@
|
||||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright (c) 2014 Catalyst.net Ltd
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
"""
|
||||
Check various aspects of a running BIRD daemon (interface states, several routing protocols) by interrogating the
|
||||
daemon via its control socket. Intended to be run from nagios.
|
||||
|
||||
Michael Fincham <michael.fincham@catalyst.net.nz>.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import socket
|
||||
import sys
|
||||
|
||||
BIRD_CONTROL_SOCKET="/run/bird/bird.ctl"
|
||||
|
||||
NAGIOS_OK = 0
|
||||
NAGIOS_WARNING = 1
|
||||
NAGIOS_CRITICAL = 2
|
||||
NAGIOS_UNKNOWN = 3
|
||||
|
||||
class BirdChecker(object):
|
||||
|
||||
def __init__(self, control_socket, ignore):
|
||||
self.control_socket = control_socket
|
||||
self.ignore = ignore
|
||||
|
||||
# mostly from pybird.py
|
||||
def _send_query(self, query):
|
||||
"""
|
||||
Open a socket to the BIRD control socket, send the query and get
|
||||
the raw response.
|
||||
"""
|
||||
|
||||
sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
|
||||
sock.connect(self.control_socket)
|
||||
sock.send("%s\n" % query)
|
||||
|
||||
data = ''
|
||||
prev_data = None
|
||||
|
||||
while (data.find("\n0000") == -1) and (data.find("\n8003") == -1) and (data.find("\n0013") == -1) and (data.find("\n9001") == -1) and (data.find("\n8001") == -1):
|
||||
data += sock.recv(1024)
|
||||
if data == prev_data:
|
||||
raise ValueError("Could not read additional data from BIRD")
|
||||
prev_data = data
|
||||
|
||||
sock.close()
|
||||
return str(data)
|
||||
|
||||
def _interface_flags(self):
|
||||
"""
|
||||
Parse the output of 'show interfaces' and return a dict containing all
|
||||
interfaces with their set flags.
|
||||
|
||||
The raw data looks like:
|
||||
|
||||
1001-br-nat up (index=19)
|
||||
1004- MultiAccess Broadcast Multicast AdminUp LinkUp MTU=1500
|
||||
1003- 192.0.2.X/32 (Primary, scope univ)
|
||||
192.0.2.X/32 (Unselected, scope univ)
|
||||
192.0.2.X/32 (Unselected, scope univ)
|
||||
192.0.2.X/32 (Unselected, scope univ)
|
||||
|
||||
Will be turned in to:
|
||||
|
||||
{'br-nat': ['MultiAccess', 'Broadcast', 'Multicast', 'AdminUp', 'LinkUp', 'MTU=1500']}
|
||||
"""
|
||||
|
||||
interface_data = self._send_query('show interfaces')
|
||||
|
||||
interfaces = {}
|
||||
current_interface = ''
|
||||
|
||||
for line in iter(interface_data.splitlines(False)):
|
||||
if line.startswith('1001-'): # line describes the interface
|
||||
current_interface = line[5:].split()[0]
|
||||
elif line.startswith('1004-'): # line describes the interface flags
|
||||
interfaces[current_interface] = line.strip().split()[1:]
|
||||
|
||||
return interfaces
|
||||
|
||||
def _generic_protocol_properties(self, command, protocol_id, columns, index_column):
|
||||
"""
|
||||
Parse the output of an arbitrary command that returns output similar to 'show bfd sessions'
|
||||
and return a dict containing all protocols' sessions with their properties.
|
||||
|
||||
The raw data looks like:
|
||||
|
||||
1020-bfd1:
|
||||
IP address Interface State Since Interval Timeout
|
||||
192.0.2.X
|
||||
192.0.2.X
|
||||
|
||||
Will be turned in to:
|
||||
|
||||
{'bfd1': {'192.0.2.X
|
||||
'interval': '0.100',
|
||||
'since': '2014-10-06',
|
||||
'state': 'Up',
|
||||
'timeout': '0.500'},
|
||||
'192.0.2.X
|
||||
'interval': '0.100',
|
||||
'since': '2014-09-24',
|
||||
'state': 'Up',
|
||||
'timeout': '0.500'}}}
|
||||
|
||||
When called as:
|
||||
|
||||
_generic_protocol_properties('show bfd sessions', 1020, ('ip_address', 'interface', 'state', 'since', 'interval', 'timeout'), 'ip_address')
|
||||
"""
|
||||
|
||||
protocol_data = self._send_query(command)
|
||||
protocols = {}
|
||||
current_protocol = ''
|
||||
|
||||
skip_line = False
|
||||
|
||||
for line in iter(protocol_data.splitlines(False)):
|
||||
if skip_line:
|
||||
skip_line = False
|
||||
continue
|
||||
elif line.startswith('%i-' % protocol_id): # line marking the start of a protocol
|
||||
current_protocol = line[5:].split()[0][:-1]
|
||||
protocols[current_protocol] = {}
|
||||
skip_line = True # skip next line (the column headers)
|
||||
elif line.startswith(' '): # hopefully a protocol record
|
||||
properties = {}
|
||||
|
||||
line = line.strip().split(None, len(columns))
|
||||
for column_number, column_name in enumerate(columns):
|
||||
properties[column_name] = line[column_number]
|
||||
protocols.pop(index_column, None)
|
||||
protocols[current_protocol][properties[index_column]] = dict((k, v) for (k, v) in properties.iteritems() if k != index_column)
|
||||
|
||||
if self.ignore is not None:
|
||||
return({k:v for k,v in protocols.iteritems() if not self.ignore.search(k)})
|
||||
else:
|
||||
return(protocols)
|
||||
|
||||
def _protocol_properties(self):
|
||||
# XXX this is a very cargo cult function... tidy it
|
||||
"""
|
||||
Parse the output of 'show protocols' and return a dict containing all protocols' sessions with their properties.
|
||||
|
||||
The raw data looks like:
|
||||
|
||||
2002-name proto table state since info
|
||||
1002-bogons Static master up 2014-10-29
|
||||
kernel1 Kernel master up 2014-10-29
|
||||
device1 Device master up 2014-10-29
|
||||
edge Direct master up 2014-10-29
|
||||
bfd1 BFD master up 2014-10-29
|
||||
aggregates Static master up 2014-10-29
|
||||
blackholes Static master up 2014-10-29
|
||||
core OSPF master up 2014-10-29 Running
|
||||
0000
|
||||
|
||||
Will be turned in to:
|
||||
|
||||
{'aggregates': {'info': '',
|
||||
'proto': 'Static',
|
||||
'since': '2014-10-29',
|
||||
'state': 'up',
|
||||
'table': 'master'},
|
||||
(and so on...)
|
||||
|
||||
When called as:
|
||||
|
||||
_protocol_properties()
|
||||
"""
|
||||
|
||||
columns = ('name', 'proto', 'table', 'state', 'since', 'info')
|
||||
index_column = 'name'
|
||||
|
||||
protocol_data = self._send_query('show protocols')
|
||||
|
||||
instances = {}
|
||||
|
||||
for line in iter(protocol_data.splitlines(False)):
|
||||
if line.startswith('2002-'): # column names
|
||||
continue
|
||||
elif line.startswith('1002-') or line.startswith(' '): # hopefully a protocol record
|
||||
properties = {}
|
||||
|
||||
if line.startswith('1002-'):
|
||||
line = line[5:]
|
||||
|
||||
line = line.strip().split(None, len(columns))
|
||||
for column_number, column_name in enumerate(columns):
|
||||
try:
|
||||
properties[column_name] = line[column_number]
|
||||
except:
|
||||
properties[column_name] = ''
|
||||
|
||||
instance_name = properties['name']
|
||||
properties.pop('name', None)
|
||||
instances[instance_name] = properties
|
||||
|
||||
if self.ignore is not None:
|
||||
return({k:v for k,v in instances.iteritems() if not self.ignore.search(k)})
|
||||
else:
|
||||
return(instances)
|
||||
|
||||
def check_interfaces(self):
|
||||
"""
|
||||
Check that all interfaces are either up or disabled, returns a tuple of (nagios_code, reason).
|
||||
"""
|
||||
|
||||
disabled = []
|
||||
down = []
|
||||
up = []
|
||||
unknown = []
|
||||
|
||||
for interface, flags in self._interface_flags().iteritems():
|
||||
if 'AdminDown' in flags:
|
||||
disabled.append(interface)
|
||||
elif 'LinkDown' in flags:
|
||||
down.append(interface)
|
||||
elif 'LinkUp' in flags:
|
||||
up.append(interface)
|
||||
else:
|
||||
unknown.append(interface)
|
||||
|
||||
if down:
|
||||
return((NAGIOS_CRITICAL, '%i down (%s)' % (len(down), ', '.join(down))))
|
||||
|
||||
if unknown:
|
||||
return((NAGIOS_UNKNOWN, '%i unknown (%s)' % (len(unknown), ', '.join(unknown))))
|
||||
|
||||
if up and not disabled:
|
||||
return((NAGIOS_OK, '%i up' % len(up)))
|
||||
elif up and disabled:
|
||||
return((NAGIOS_OK, '%i up, %i disabled' % (len(up), len(disabled))))
|
||||
else:
|
||||
return((NAGIOS_OK, 'no interfaces'))
|
||||
|
||||
def check_bfd(self):
|
||||
"""
|
||||
Check that all configured BFD sessions are 'up'.
|
||||
"""
|
||||
|
||||
down = []
|
||||
up = []
|
||||
|
||||
bfd_sessions = self._generic_protocol_properties('show bfd sessions', 1020, ('ip_address', 'interface', 'state', 'since', 'interval', 'timeout'), 'ip_address')
|
||||
|
||||
for protocol, sessions in bfd_sessions.iteritems():
|
||||
for neighbor, properties in sessions.iteritems():
|
||||
if properties['state'] != 'Up':
|
||||
down.append(neighbor)
|
||||
else:
|
||||
up.append(neighbor)
|
||||
|
||||
if down:
|
||||
return((NAGIOS_CRITICAL, '%i down (%s)' % (len(down), ', '.join(down))))
|
||||
|
||||
if up:
|
||||
return((NAGIOS_OK, '%i up' % len(up)))
|
||||
else:
|
||||
return((NAGIOS_OK, 'BFD not running'))
|
||||
|
||||
def check_ospf(self):
|
||||
"""
|
||||
Check that all configured OSPF neighbors are 'Full'.
|
||||
"""
|
||||
|
||||
down = []
|
||||
up = []
|
||||
|
||||
for protocol, sessions in self._generic_protocol_properties('show ospf neighbors', 1013, ('router_id', 'pri', 'state', 'dtime', 'interface', 'router_ip'), 'router_id').iteritems():
|
||||
for neighbor, properties in sessions.iteritems():
|
||||
if not properties['state'].lower().startswith('full/'):
|
||||
down.append(neighbor)
|
||||
else:
|
||||
up.append(neighbor)
|
||||
|
||||
if down:
|
||||
return((NAGIOS_CRITICAL, '%i down (%s)' % (len(down), ', '.join(down))))
|
||||
|
||||
if up:
|
||||
return((NAGIOS_OK, '%i up' % len(up)))
|
||||
else:
|
||||
return((NAGIOS_OK, 'OSPF not running'))
|
||||
|
||||
def check_bgp(self):
|
||||
"""
|
||||
Check that all configured BGP neighbors are 'Established'.
|
||||
"""
|
||||
|
||||
down = []
|
||||
up = []
|
||||
disabled = []
|
||||
|
||||
protocol_instances = self._protocol_properties()
|
||||
|
||||
for instance, properties in protocol_instances.iteritems():
|
||||
|
||||
if properties['proto'] == 'BGP' and properties['state'] == 'down':
|
||||
disabled.append(instance)
|
||||
continue
|
||||
|
||||
if properties['proto'] == 'BGP' and properties['info'] != 'Established':
|
||||
down.append(instance)
|
||||
elif properties['proto'] == 'BGP':
|
||||
up.append(instance)
|
||||
|
||||
if down:
|
||||
return((NAGIOS_CRITICAL, '%i down (%s)' % (len(down), ', '.join(down))))
|
||||
|
||||
if up and not disabled:
|
||||
return((NAGIOS_OK, '%i up' % len(up)))
|
||||
elif up and disabled:
|
||||
return((NAGIOS_OK, '%i up, %i disabled' % (len(up), len(disabled))))
|
||||
else:
|
||||
return((NAGIOS_OK, 'BGP not running'))
|
||||
|
||||
def check_proto(self, protocol):
|
||||
"""
|
||||
Check that the queried protocol is 'up'.
|
||||
"""
|
||||
|
||||
protocol_instances = self._protocol_properties()
|
||||
|
||||
if protocol not in protocol_instances:
|
||||
return (NAGIOS_CRITICAL, "%s not found"%protocol)
|
||||
|
||||
properties = protocol_instances[protocol]
|
||||
|
||||
if properties['state'] == 'down':
|
||||
return (NAGIOS_OK, "%s is disabled"%(protocol))
|
||||
|
||||
if properties['state'] != 'up':
|
||||
return (NAGIOS_CRITICAL, "%s is not up (%s)\nInfo: %s"%(protocol, properties["state"], properties["info"]))
|
||||
|
||||
return (NAGIOS_OK, "%s is up\nInfo: %s"%(protocol, properties["info"]))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument('check', choices=('interfaces', 'bfd', 'ospf', 'bgp', 'single_protocol'), help='which check to run')
|
||||
parser.add_argument('--control-socket', default=BIRD_CONTROL_SOCKET, help='location of BIRD control socket, defaults to %s' % BIRD_CONTROL_SOCKET)
|
||||
parser.add_argument('--ignore', default=None, help='if supplied, a regular expression of protocol names which should be ignored')
|
||||
parser.add_argument("--protocol", default=None, help="the protocol to check if used with 'single_protocol'")
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
ignore = re.compile(args.ignore)
|
||||
except:
|
||||
ignore = None
|
||||
|
||||
checker = BirdChecker(args.control_socket, ignore)
|
||||
|
||||
try:
|
||||
if args.check == 'interfaces':
|
||||
return_code, description = checker.check_interfaces()
|
||||
elif args.check == 'bfd':
|
||||
return_code, description = checker.check_bfd()
|
||||
elif args.check == 'ospf':
|
||||
return_code, description = checker.check_ospf()
|
||||
elif args.check == 'bgp':
|
||||
return_code, description = checker.check_bgp()
|
||||
elif args.check == 'single_protocol':
|
||||
return_code, description = checker.check_proto(args.protocol)
|
||||
else:
|
||||
raise NotImplementedError("no check matching '%s'" % args.check) # this will only happen if not all possible choices in argparse are not implemented
|
||||
except socket.error:
|
||||
sys.stdout.write("error: could not connect to bird\n")
|
||||
sys.exit(NAGIOS_UNKNOWN)
|
||||
|
||||
sys.stdout.write("%s\n" % description)
|
||||
sys.exit(return_code)
|
||||
Reference in New Issue
Block a user