#!/bin/bash # # plugin to monitor difference between st_last_event_ts and st_last_received_ts # in sl_status table (based on slony_ and slony_lag_) # # http://blog.endpoint.com/2009/07/slony-slstatus-and-diagnosing.html # # st_origin: the local slony system # st_received: the slony instance that sent an event # st_last_event: the sequence number of the last event received from that origin/received pair # st_last_event_ts: the timestamp on the last event received # st_last_received: the sequence number of the last sl_event + sl_confirm pair received # st_last_received_ts: the timestamp on the sl_confirm in that pair # st_last_received_event_ts: the timestamp on the sl_event in that pair # st_lag_num_events: difference between st_last_event and st_last_received # st_lag_time: difference between st_last_event_ts and st_last_received_ts # # # Configuration variables: # # PGHOST - Database server to use. # PGUSER - User to connect as. # PGPASSWORD - Password to use. # PGSCHEMA - Replication schema. # # Configuration example: # # munin-node: # # [slony*] # user slony # env.PGHOST localhost # env.PGUSER slony # env.PGPASSWORD password # env.PGSCHEMA _slony # # postgresql.conf: # # standard_conforming_strings = on # # ln -s /usr/share/munin/plugins/slony_lag_time_/etc/munin/plugins/slony_lag_time_PGDATABASE # # # Magic markers (optional - only used by munin-config and some installation scripts): #%# family=contrib PGDATABASE=$(basename $0 | sed 's/^slony_lag_time_//g') if [ "$1" = "config" ]; then echo "graph_args --base 1000 -l 0" echo "graph_category db" echo "graph_info Slony st_lag_time for ${PGDATABASE}" echo "graph_title Slony lag time for ${PGDATABASE}" echo "graph_vlabel \${graph_period}" psql -h ${PGHOST} -d ${PGDATABASE} -U ${PGUSER} -tc "SELECT no_id,regexp_replace(pa_conninfo, '.*host=(.*?) .*$', '\\\\1') FROM ${PGSCHEMA}.sl_node JOIN ${PGSCHEMA}.sl_path ON (pa_server=no_id) WHERE pa_client=${PGSCHEMA}.getlocalnodeid('${PGSCHEMA}'::name);" | while read node_id sep host do test -z "${node_id}" && continue echo "${node_id}.label ${host}" echo "${node_id}.type GAUGE" echo "${node_id}.draw LINE2" echo "${node_id}.info difference between st_last_event_ts and st_last_received_ts" echo "${node_id}.warning 300" echo "${node_id}.critical 600" done exit 0 fi psql -h ${PGHOST} -d ${PGDATABASE} -U ${PGUSER} -tc "SELECT st_received, extract(epoch FROM st_lag_time)::integer FROM ${PGSCHEMA}.sl_status ORDER BY 1;" | while read node_id sep time do test -z "${node_id}" && continue echo "${node_id}.value ${time}" done