#!/bin/bash # # # MySQL # # Description: Manages a MySQL database as Linux-HA resource # # Authors: Alan Robertson: DB2 Script # Jakub Janczak: rewrite as MySQL # Andrew Beekhof: cleanup and import # Sebastian Reitenbach: add OpenBSD defaults, more cleanup # Narayan Newton: add Gentoo/Debian defaults # Marian Marinov, Florian Haas: add replication capability # Yves Trudeau, Baron Schwartz: add VIP support and improve replication # Jervin Real, Kenny Gryp: Booth Compatibility Improvements # # Support: linux-ha@lists.linux-ha.org # License: GNU General Public License (GPL) # # (c) 2002-2005 International Business Machines, Inc. # 2005-2010 Linux-HA contributors # # An example usage in /etc/ha.d/haresources: # node1 10.0.0.170 mysql # # Version: 20141112131457 # # See usage() function below for more details... # # OCF instance parameters: # OCF_RESKEY_binary # OCF_RESKEY_binary_prefix # OCF_RESKEY_client_binary # OCF_RESKEY_config # OCF_RESKEY_datadir # OCF_RESKEY_user # OCF_RESKEY_group # OCF_RESKEY_test_table # OCF_RESKEY_test_user # OCF_RESKEY_test_passwd # OCF_RESKEY_enable_creation # OCF_RESKEY_additional_parameters # OCF_RESKEY_log # OCF_RESKEY_pid # OCF_RESKEY_socket # OCF_RESKEY_replication_user # OCF_RESKEY_replication_passwd # OCF_RESKEY_replication_port # OCF_RESKEY_replication_options # OCF_RESKEY_max_slave_lag # OCF_RESKEY_evict_outdated_slaves # OCF_RESKEY_reader_attribute # OCF_RESKEY_reader_failcount # OCF_RESKEY_backup_lockfile # OCF_RESKEY_geo_remote_IP # OCF_RESKEY_geo_remote_user # OCF_RESKEY_geo_remote_opts # OCF_RESKEY_booth_master_ticket # OCF_RESKEY_post_promote_script # OCF_RESKEY_prm_binlog_parser_path # OCF_RESKEY_try_restart_crashed_master ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### # Fill in some defaults if no values are specified HOSTOS=`uname` if [ "X${HOSTOS}" = "XOpenBSD" ];then OCF_RESKEY_binary_default="/usr/local/bin/mysqld" OCF_RESKEY_config_default="/etc/my.cnf" OCF_RESKEY_datadir_default="/var/mysql" OCF_RESKEY_user_default="_mysql" OCF_RESKEY_group_default="_mysql" OCF_RESKEY_log_default="/var/log/mysqld.log" OCF_RESKEY_pid_default="/var/mysql/mysqld.pid" OCF_RESKEY_socket_default="/var/run/mysql/mysql.sock" else OCF_RESKEY_binary_default="/usr/sbin/mysqld" OCF_RESKEY_config_default="/etc/my.cnf" OCF_RESKEY_datadir_default="/var/lib/mysql" OCF_RESKEY_user_default="mysql" OCF_RESKEY_group_default="mysql" OCF_RESKEY_log_default="/var/log/mysqld.log" OCF_RESKEY_pid_default="/var/run/mysql/mysqld.pid" OCF_RESKEY_socket_default="/var/lib/mysql/mysql.sock" fi OCF_RESKEY_client_binary_default="mysql" OCF_RESKEY_binary_prefix_default="" OCF_RESKEY_test_user_default="root" OCF_RESKEY_test_table_default="mysql.user" OCF_RESKEY_test_passwd_default="" OCF_RESKEY_enable_creation_default=0 OCF_RESKEY_additional_parameters_default="" OCF_RESKEY_replication_port_default="3306" OCF_RESKEY_max_slave_lag_default="3600" OCF_RESKEY_evict_outdated_slaves_default="false" OCF_RESKEY_reader_attribute_default="readable" OCF_RESKEY_reader_failcount_default="1" OCF_RESKEY_backup_lockfile_default="/var/lock/innobackupex" OCF_RESKEY_booth_master_ticket_default="ticketMaster" OCF_RESKEY_async_stop_default=0 OCF_RESKEY_try_restart_crashed_master_default=1 : ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} MYSQL_SBINDIR=`dirname ${OCF_RESKEY_binary}` : ${OCF_RESKEY_binary_prefix=${OCF_RESKEY_binary_prefix_default}} : ${OCF_RESKEY_client_binary=${OCF_RESKEY_client_binary_default}} : ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} : ${OCF_RESKEY_datadir=${OCF_RESKEY_datadir_default}} : ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} : ${OCF_RESKEY_group=${OCF_RESKEY_group_default}} : ${OCF_RESKEY_log=${OCF_RESKEY_log_default}} : ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} : ${OCF_RESKEY_socket=${OCF_RESKEY_socket_default}} : ${OCF_RESKEY_test_user=${OCF_RESKEY_test_user_default}} : ${OCF_RESKEY_test_table=${OCF_RESKEY_test_table_default}} : ${OCF_RESKEY_test_passwd=${OCF_RESKEY_test_passwd_default}} : ${OCF_RESKEY_enable_creation=${OCF_RESKEY_enable_creation_default}} : ${OCF_RESKEY_additional_parameters=${OCF_RESKEY_additional_parameters_default}} : ${OCF_RESKEY_replication_user=${OCF_RESKEY_replication_user_default}} : ${OCF_RESKEY_replication_passwd=${OCF_RESKEY_replication_passwd_default}} : ${OCF_RESKEY_replication_port=${OCF_RESKEY_replication_port_default}} : ${OCF_RESKEY_replication_options=${OCF_RESKEY_replication_options_default}} : ${OCF_RESKEY_max_slave_lag=${OCF_RESKEY_max_slave_lag_default}} : ${OCF_RESKEY_evict_outdated_slaves=${OCF_RESKEY_evict_outdated_slaves_default}} : ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_reader_attribute_default}} : ${OCF_RESKEY_reader_failcount=${OCF_RESKEY_reader_failcount_default}} : ${OCF_RESKEY_backup_lockfile=${OCF_RESKEY_backup_lockfile_default}} : ${OCF_RESKEY_geo_remote_IP}="" : ${OCF_RESKEY_geo_remote_user}="root" : ${OCF_RESKEY_geo_remote_opts}="" : ${OCF_RESKEY_booth_master_ticket}=${OCF_RESKEY_booth_master_ticket_default} : ${OCF_RESKEY_post_promote_script}="" : ${OCF_RESKEY_prm_binlog_parser_path=${OCF_RESKEY_prm_binlog_parser_path_default}} : ${OCF_RESKEY_async_stop=${OCF_RESKEY_async_stop_default}} : ${OCF_RESKEY_try_restart_crashed_master=${OCF_RESKEY_try_restart_crashed_master_default}} ####################################################################### # Convenience variables MYSQL=$OCF_RESKEY_client_binary #Add a timeout to the mysql client, no commands should take more than 2s if [ -x "/usr/bin/timeout" ]; then TIMEOUT="/usr/bin/timeout 5 " else TIMEOUT="" fi MYSQL="$TIMEOUT $MYSQL" MYSQLBINLOG=`which mysqlbinlog 2> /dev/null` MYSQL_BINDIR=`dirname ${OCF_RESKEY_client_binary}` MYSQL_OPTIONS_LOCAL="-A -S $OCF_RESKEY_socket --connect_timeout=10" MYSQL_OPTIONS_REPL="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_replication_user --password=$OCF_RESKEY_replication_passwd" MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd" MYSQL_LAST_ERR=0 MYSQL_TOO_MANY_CONN_ERR=1040 CRM_MASTER="$TIMEOUT ${HA_SBIN_DIR}/crm_master -l reboot " HOSTNAME=`uname -n` CRM_ATTR="$TIMEOUT ${HA_SBIN_DIR}/crm_attribute -N $HOSTNAME -q" INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'` CRM_ATTR_REPL_INFO="$TIMEOUT ${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_REPL_INFO -s mysql_replication -q " CRM_ATTR_REPL_STATUS="$TIMEOUT ${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_REPL_STATUS -s mysql_replication -q " CRM_ATTR_LAST_TRX="$TIMEOUT ${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_NEW_MASTER_LAST_TRX -s mysql_replication -q " CRM_ATTR_MASTER_CRASHED_TS="$CRM_ATTR -l reboot --name ${INSTANCE_ATTR_NAME}_master_crashed" CRM_ATTR_NODE_LAST_TRX_MD5="$CRM_ATTR -l reboot --name ${INSTANCE_ATTR_NAME}_last_trx_md5" CRM_RES="$TIMEOUT ${HA_SBIN_DIR}/crm_resource" CRM_TICKET="$TIMEOUT ${HA_SBIN_DIR}/crm_ticket" SSH="$TIMEOUT /usr/bin/ssh " MAX_BINLOG_SIZE_CACHE="${HA_RSCTMP}/max_binlog_size_cache" ASYNC_STOP_WITNESS_FILE="${HA_RSCTMP}/stop_${INSTANCE_ATTR_NAME}" OCF_STOPPING=100 #custom error code for async_stop ####################################################################### usage() { cat < 1.0 Resource script for MySQL. May manage a standalone MySQL database, a clone set with externally managed replication, or a complete master/slave replication setup. While managing replication, the default behavior is to use uname -n values in the change master to command. Other IPs can be specified manually by adding a node attribute \${INSTANCE_ATTR_NAME}_mysql_master_IP giving the IP to use for replication. For example, if the mysql primitive you are using is p_mysql, the attribute to set will be p_mysql_mysql_master_IP. Manages a MySQL database instance Location of the MySQL server binary MySQL server binary A prefix to the MySQL server binary. I could be for example a LD_PRELOAD or a call to numactl. MySQL server binary prefix Location of the MySQL client binary MySQL client binary Configuration file MySQL config Directory containing databases MySQL datadir User running MySQL daemon MySQL user Group running MySQL daemon (for logfile and directory permissions) MySQL group The logfile to be used for mysqld. MySQL log file The pidfile to be used for mysqld. MySQL pid file The socket to be used for mysqld. MySQL socket Table to be tested in monitor statement (in database.table notation) MySQL test table MySQL test user, must have select privilege on test_table MySQL test user MySQL test user password MySQL test user password If the MySQL database does not exist, it will be created Create the database if it does not exist Additional parameters which are passed to the mysqld on startup. (e.g. --skip-external-locking or --skip-grant-tables) Additional parameters to pass to mysqld MySQL replication user. This user is used for starting and stopping MySQL replication, for setting and resetting the master host, and for setting and unsetting read-only mode. Because of that, this user must have SUPER, REPLICATION SLAVE, REPLICATION CLIENT, and PROCESS privileges on all nodes within the cluster. Mandatory if you define a master-slave resource. MySQL replication user MySQL replication password. Used for replication client and slave. Mandatory if you define a master-slave resource. MySQL replication user password The port on which the Master MySQL instance is listening. MySQL replication port Extra options to pass to CHANGE MASTER, be sure to pass a preceeding comma. Handy for SSL, for example: replication_options=", MASTER_SSL=1, MASTER_SSL_CA='/path/to/ca.crt'" MySQL replication options The maximum number of seconds a replication slave is allowed to lag behind its master. Do not set this to zero. What the cluster manager does in case a slave exceeds this maximum lag is determined by the evict_outdated_slaves parameter. Maximum time (seconds) a MySQL slave is allowed to lag behind a master If set to true, any slave which is more than max_slave_lag seconds behind the master has its MySQL instance shut down. If this parameter is set to false in a primitive or clone resource, it is simply ignored. If set to false in a master/slave resource, then exceeding the maximum slave lag will merely push down the master preference so the lagging slave is never promoted to the new master. Determines whether to shut down badly lagging slaves An attribute that the RA can manage to specify whether a node can be read from. This node attribute will be 1 if it's fine to read from the node, and 0 otherwise (for example, when a slave has lagged too far behind the master). A typical example for the use of this attribute would be to tie a set of IP addresses to MySQL slaves that can be read from. This parameter is only meaningful in master/slave set configurations. Sets the node attribute that determines whether a node is usable for clients to read from. The number of times a monitor operation can find the slave to be unsuitable for reader VIP before failing. Useful if there are short intermittent issues like clock adjustments in VMs. Allowed failcount for reader The path to a file that will be exclusively locked by any backup process. The lockfile serves to provide a reliable way of determining whether to restart the slave process or not. If a thirdparty process locks this file, the agent will fail to lock the file and will not start the slave. When this agent is able to lock the file, it is assumed that backups are finished and the slave thread should be running and will start it. A typical cron command example would be like: flock -xn /var/lock/innobackupex innobackupex --safe-slave-backup /tmp/mysqlbackup This example will use innobackupex's ability to stop the slave when necessary to ensure backup consistency. During this time the agent will not start the slave. Once the backup is complete, the lock will automatically expire and the agent can start the slave if it isn't already. Path to backup lockfile In case multiple Geo redundant sites are connected with the booth protocol this is the IP to use to connect to the remote cluster to query replication info. Normally this would be the writer VIP on the remote cluster. Also, ssh is used for communication so make sure keys are exchanged and that ssh options are set in a way that connection doesn't take many seconds. If empty, the booth type behavior is not triggered. IP of the remote cluster When geo_remote_IP is not empty, this will be the custom SSH user that can be used to connect to remote sites. SSH user for remote-enabled sites When geo_remote_IP is not empty, this will be the custom SSH options that can be used to connect to remote sites. SSH options for remote-enabled sites In case multiple Geo redundant sites are connected with the booth protocol this is the name of the ticket used to identify the master side. Booth ticket name Allows to run custom code following a promotion. An application of this is to prevent fail-back of the master role after an initial failover. Post promote script Path to the prm_binlog_parser tool that is used to publish the last trx of a new master after a hard crash of the previous master. The tool can be downloaded from https://github.com/percona/percona-pacemaker-agents/tree/master/tools/ybinlogp Path to the prm_binlog_parser tool If set to true, PRM will not wait for MySQL to stop after sending the SIGTERM signal. This can be useful to speed up failover when a server has a large number of dirty pages and takes a long time to shutdown, or worse, receives a SIGKILL after the stop timeout. The main drawback is that if PRM wants to restart MySQL before it completed its shutdown, the operation will error out. Asynchronous stop of MySQL If set to true, PRM will try to restart a failed master in place instead of promoting another node. This can help recover untransmitted binary logs. However, if you have a large database that takes a long time to recovery, this may not be a good option for you. Try restarting a crashed master END } # Convenience functions # get the binlog references and md5 of the payload. Currently unable to report # the 1st trx. get_last_binlog() { local binlog_files local maxEntries local lastNSec local Ts Year Month Day Time local startAt nlines local bltempfile binlog_files=$1 maxEntries=$2 #lastNSec=$3 # for debugging cp "$binlog_files" /tmp let nlines=maxEntries*3+6 # below return 3 lines per Xid + need to skip last 2 startAt=`$MYSQLBINLOG -vvv --base64-output=DECODE-ROWS $1 | \ grep 'Xid =' -A2 | grep -v '\-\-' | tail -n $nlines | \ egrep 'Xid|\# at [0-9]{2,10}' | tac | grep -A2 Xid | \ grep '# at ' | tail -n 1 | rev | cut -d' ' -f1 | rev` md5='' queryok=0 bltempfile=`mktemp ${HA_RSCTMP}/${OCF_RESOURCE_INSTANCE}.XXXXXX` ( for blf in $1; do blf_no_path=`echo "$blf" | rev | cut -d'/' -f1 | rev` echo "$blf_no_path@" $MYSQLBINLOG --base64-output=DECODE-ROWS -vvv --start-position="$startAt" $blf | \ ( while read line; do if [ "$queryok" -eq 1 ]; then if [[ $line =~ ^\# ]]; then if [[ $line =~ .*end_log_pos\ ([0-9]*).*Xid\ =\ [0-9]*$ ]]; then echo "${BASH_REMATCH[1]},`md5sum $bltempfile | cut -d' ' -f1`" echo -n '' > $bltempfile else if [[ $line =~ ^\#\#\# ]]; then echo "${line}" >> $bltempfile fi fi else echo "${line}" >> $bltempfile fi else if [[ $line =~ ^BEGIN ]]; then queryok=1 fi fi done ) done ) | tail -n $maxEntries echo "@" rm -f $bltempfile } # Set the master score, the presence of the master_crashed_ts attribute # presents the update. set_master_score() { local master_crashed_ts master_crashed_ts=`$CRM_ATTR_MASTER_CRASHED_TS --query --default=0` if [ "$master_crashed_ts" -eq "0" ]; then $CRM_MASTER -v $1 fi } set_read_only() { # Sets or unsets read-only mode. Accepts one boolean as its # optional argument. If invoked without any arguments, defaults to # enabling read only mode. Should only be set in master/slave # setups. # Returns $OCF_SUCCESS if the operation succeeds, or # $OCF_ERR_GENERIC if it fails. local ro_val if ocf_is_true $1; then ro_val="on" else ro_val="off" fi mysql_run -Q -sw $MYSQL $MYSQL_OPTIONS_REPL \ -e "SET GLOBAL read_only=${ro_val}" } get_read_only() { # Check if read-only is set local read_only_state read_only_state=`mysql_run -Q -sw -O $MYSQL -N $MYSQL_OPTIONS_REPL \ -e "SHOW VARIABLES like 'read_only'" | awk '{print $2}'` if [ "$read_only_state" = "ON" ]; then return 0 else return 1 fi } is_slave() { # Determine whether the machine is currently running as a MySQL # slave, as determined per SHOW SLAVE STATUS. Returns 1 if SHOW # SLAVE STATUS creates an empty result set, 0 otherwise. local rc local tmpfile # Check whether this machine should be slave if ! ocf_is_ms || ! get_read_only; then return 1 fi get_slave_info rc=$? if [ $rc -eq 0 ]; then # show slave status is not empty # Is there a master_log_file defined? (master_log_file is deleted # by reset slave if [ "$master_log_file" ]; then return 0 else return 1 fi else # "SHOW SLAVE STATUS" returns an empty set if instance is not a # replication slave return 1 fi } parse_slave_info() { # Extracts field $1 from result of "SHOW SLAVE STATUS\G" from file $2 sed -ne "s/^.* $1: \(.*\)$/\1/p" < $2 } # get the current max_binlog_size. Since this value rarely change, we cache # it for 1h get_max_binlog_size() { if [ -e $MAX_BINLOG_SIZE_CACHE ]; then cat $MAX_BINLOG_SIZE_CACHE if [ `date +%s` -gt "$((`stat -c %Z $MAX_BINLOG_SIZE_CACHE`+3600))" ]; then rm $MAX_BINLOG_SIZE_CACHE fi else mysql_run -Q -sw -O $MYSQL -N $MYSQL_OPTIONS_REPL \ -e "Show global variables like 'max_binlog_size';" | \ awk '{ print $2 }' > $MAX_BINLOG_SIZE_CACHE cat $MAX_BINLOG_SIZE_CACHE fi } get_slave_info() { local mysql_options tmpfile if [ "$master_log_file" -a "$master_host" ]; then # variables are already defined, get_slave_info has been run before return $OCF_SUCCESS else tmpfile=`mktemp ${HA_RSCTMP}/check_slave.${OCF_RESOURCE_INSTANCE}.XXXXXX` mysql_run -Q -sw -O $MYSQL $MYSQL_OPTIONS_REPL \ -e 'SHOW SLAVE STATUS\G' > $tmpfile if [ -s $tmpfile ]; then master_host=`parse_slave_info Master_Host $tmpfile` master_user=`parse_slave_info Master_User $tmpfile` master_port=`parse_slave_info Master_Port $tmpfile` master_log_file=`parse_slave_info Master_Log_File $tmpfile` relay_log_file=`parse_slave_info Relay_Log_File $tmpfile` master_log_pos=`parse_slave_info Read_Master_Log_Pos $tmpfile` slave_sql=`parse_slave_info Slave_SQL_Running $tmpfile` slave_io=`parse_slave_info Slave_IO_Running $tmpfile` slave_io_state=`parse_slave_info Slave_IO_State $tmpfile` last_errno=`parse_slave_info Last_Errno $tmpfile` secs_behind=`parse_slave_info Seconds_Behind_Master $tmpfile` ocf_log debug "MySQL instance has a non empty slave status" else # Instance produced an empty "SHOW SLAVE STATUS" output -- # instance is not a slave ocf_log err "check_slave invoked on an instance that is not a replication slave." rm -f $tmpfile return $OCF_ERR_GENERIC fi rm -f $tmpfile return $OCF_SUCCESS fi } check_slave() { # Checks slave status local rc new_master get_slave_info rc=$? if [ $rc -eq 0 ]; then # Did we receive an error other than max_connections? if [ $last_errno -ne 0 -a $last_errno -ne "$MYSQL_TOO_MANY_CONN_ERR" ]; then # Whoa. Replication ran into an error. This slave has # diverged from its master. Make sure this resource # doesn't restart in place. ocf_log err "MySQL instance configured for replication, but replication has failed." # Just pull the reader VIP away, killing MySQL here would be pretty evil # on a loaded server set_reader_attr 0 #Since replication is broken, not suitable to be a master set_master_score -INF exit $OCF_SUCCESS fi # If we got max_connections, let's only remove the vip if [ $last_errno -eq "$MYSQL_TOO_MANY_CONN_ERR" ]; then set_reader_attr 0 exit $OCF_SUCCESS fi if [ "$slave_io" != 'Yes' ]; then # Not necessarily a bad thing. The master may have # temporarily shut down, and the slave may just be # reconnecting. A warning can't hurt, though. ocf_log warn "MySQL Slave IO threads currently not running." # Sanity check, are we at least on the right master if [ "$master_host" != "$glb_cib_master" ]; then # Not pointing to the right master # Is this a recent master failover on the remote side if [ "${#glb_remote_info}" -gt "0" -a "$slave_sql" = 'Yes' ]; then # looks like, the sql thread is still running, no need # to remove the vip, doing nothing : else set_reader_attr 0 fi # try setting up the slave with the new master set_master exit $OCF_SUCCESS elif [ "$slave_sql" == 'Yes' ]; then # If the slq thread is running, it is an issue with the io thread # let's try to restart it if [ "$slave_io_state" != "" ]; then # The io thread is running but is not connected, let's restart it. mysql_run -Q -sw $MYSQL $MYSQL_OPTIONS_REPL \ -e "STOP SLAVE IO_THREAD" fi # At this point, the io_thread should be stopped. # let's try to start it again. mysql_run -Q -sw $MYSQL $MYSQL_OPTIONS_REPL \ -e "START SLAVE IO_THREAD" # We give some time to connect sleep 2 get_slave_info rc=$? if [ $rc -eq 0 -a "$slave_io" == 'Yes' ]; then ocf_log info "MySQL Slave IO thread started succesfully." else ocf_log warn "We could not start the MySQL Slave IO thread." fi fi fi if [ "$slave_sql" != 'Yes' ]; then # We don't have a replication SQL thread running. Not a # good thing. Try to recoved by restarting the SQL thread # and remove reader vip. Prevent MySQL restart. ocf_log err "MySQL Slave SQL threads currently not running." # Remove reader vip set_reader_attr 0 # If sql is not running, can't be a master set_master_score -INF # Check that the flock tool exists first if type flock &>/dev/null; then ( flock -xn 8 if [ $? -eq 0 ]; then mysql_run -Q -sw $MYSQL $MYSQL_OPTIONS_REPL \ -e "START SLAVE" else ocf_log info "Unable to lock $OCF_RESKEY_backup_lockfile. Not starting slave." fi ) 8>$OCF_RESKEY_backup_lockfile else # try to restart slave mysql_run -Q -sw $MYSQL $MYSQL_OPTIONS_REPL \ -e "START SLAVE" fi # Return success to prevent a restart exit $OCF_SUCCESS fi if ocf_is_true $OCF_RESKEY_evict_outdated_slaves; then # We're supposed to bail out if we lag too far # behind. Let's check our lag. if [ $secs_behind -gt $OCF_RESKEY_max_slave_lag ]; then ocf_log err "MySQL Slave is $secs_behind seconds behind master (allowed maximum: $OCF_RESKEY_max_slave_lag)." # Remove reader vip set_reader_attr 0 exit $OCF_ERR_INSTALLED fi elif ocf_is_ms; then # Even if we're not set to evict lagging slaves, we can # still use the seconds behind master value to set our # master preference. local master_pref test $secs_behind -eq 0 2>/dev/null if [ $? -eq 2 ]; then # SBM is undefined or not an integer master_pref=0 set_reader_attr 0 else master_pref=$((${OCF_RESKEY_max_slave_lag}-${secs_behind})) if [ $master_pref -lt 0 ]; then # Sanitize a below-zero preference to just zero master_pref=0 fi if [ $secs_behind -gt $OCF_RESKEY_max_slave_lag ]; then set_reader_attr 0 else set_reader_attr 1 fi #Edge case verification, check if on the right master set_master nologging fi # Is the datadir almost full if check_datadir_state; then set_master_score $master_pref else # full so not good for a master set_master_score -2147483640 fi fi ocf_log debug "MySQL instance running as a replication slave" else # Instance produced an empty "SHOW SLAVE STATUS" output -- # instance is not a slave # TODO: Needs to handle when get_slave_info will return too many connections error if [ $MYSQL_LAST_ERR -eq "$MYSQL_TOO_MANY_CONN_ERR" ]; then # Remove the vip set_reader_attr 0 return $OCF_SUCCESS fi # An empty status could happen when a master is demote in a # geo DR setup, let's check if [ $MYSQL_LAST_ERR -eq 0 -a $glb_master_exists -eq 1 ]; then # This is not the master side, let's try to setup the slave # No need to unset the master since slave status is empty set_reader_attr 0 set_master return $OCF_SUCCESS fi ocf_log err "check_slave invoked on an instance that is not a replication slave." exit $OCF_ERR_GENERIC fi } set_master() { local new_master master_log_file master_log_pos new_master_info local master_params new_master_log_file new_master_log_pos if [ "$glb_master_exists" ]; then if [ "${#glb_remote_info}" -gt "0" ]; then # geo_remote_IP is defined, let's do the booth part if [ $glb_master_side -ne 0 ]; then # this is _not_ the side with the token new_master_info=`echo $glb_remote_info | awk '{ print $1 }'` new_master=`echo $new_master_info | cut -d'|' -f1` new_master_log_file=`echo $new_master_info | cut -d'|' -f2` new_master_log_pos=`echo $new_master_info | cut -d'|' -f3` fi fi if [ "${#new_master_info}" -eq "0" ]; then new_master=`echo $glb_local_info | cut -d'|' -f1` new_master_log_file=`echo $glb_local_info | cut -d'|' -f2` new_master_log_pos=`echo $glb_local_info | cut -d'|' -f3` fi # Keep replication position get_slave_info if [ "$master_log_file" -a "$new_master" = "$master_host" ]; then # master_params=", MASTER_LOG_FILE='$master_log_file', \ # MASTER_LOG_POS=$master_log_pos" if [ "$1" = "nologging" ]; then : else ocf_log info "Kept master pos for $master_host : $master_log_file:$master_log_pos" fi return else if [ -n "$new_master_log_file" -a -n "$new_master_log_pos" ]; then master_params=", MASTER_LOG_FILE='$new_master_log_file', \ MASTER_LOG_POS=$new_master_log_pos" ocf_log info "Restored master pos for $new_master : $new_master_log_file:$new_master_log_pos" fi fi # Informs the MySQL server of the master to replicate # from. Accepts one mandatory argument which must contain the host # name of the new master host. The master must either be unchanged # from the laste master the slave replicated from, or freshly # reset with RESET MASTER. mysql_run -Q -sw $MYSQL $MYSQL_OPTIONS_REPL \ -e "STOP SLAVE;CHANGE MASTER TO MASTER_HOST='$new_master', \ MASTER_PORT=$OCF_RESKEY_replication_port, \ MASTER_USER='$OCF_RESKEY_replication_user', \ MASTER_PASSWORD='$OCF_RESKEY_replication_passwd' \ $OCF_RESKEY_replication_options $master_params;START SLAVE;" fi } unset_master(){ # Instructs the MySQL server to stop replicating from a master # host. # If we're currently not configured to be replicating from any # host, then there's nothing to do. But we do log a warning as # no-one but the CRM should be touching the MySQL master/slave # configuration. is_slave rc=$? if [ $rc -ne 0 ]; then ocf_log warn "Attempted to unset the replication master on an instance that is not configured as a replication slave" return $OCF_SUCCESS fi local tmpfile tmpfile=`mktemp ${HA_RSCTMP}/unset_master.${OCF_RESOURCE_INSTANCE}.XXXXXX` # At this point, the master is read only so there should not be much binlogs to transfer # Let's wait for the last bits while true; do get_slave_info rc=$? # Is the slave_io thread running? if [ "$slave_io" != 'Yes' ]; then ocf_log info "Slave IO thread not running, master likely dead or stopped" break; fi mysql_run -Q -sw -O $MYSQL $MYSQL_OPTIONS_REPL \ -e 'SHOW PROCESSLIST\G' > $tmpfile if grep -i 'Master has sent all binlog to slave' $tmpfile >/dev/null; then ocf_log info "MySQL slave has finished reading master binary log" break fi if grep -i 'Waiting for master to send event' $tmpfile >/dev/null; then ocf_log info "MySQL slave has finished reading master binary log" break fi if grep -i 'Waiting to reconnect after' $tmpfile >/dev/null; then ocf_log info "Master is down, no more binary logs to come" break fi if grep -i 'Reconnecting after a failed' $tmpfile >/dev/null; then ocf_log info "Master is down, no more binary logs to come" break fi if grep -i 'Connecting to master' $tmpfile >/dev/null; then ocf_log info "Master is down, no more binary logs to come" break fi if ! grep 'system user' $tmpfile >/dev/null; then ocf_log info "Slave is not running - not waiting to finish" break fi sleep 1 done # Now, stop the slave I/O thread and wait for relay log # processing to complete mysql_run -Q -sw $MYSQL $MYSQL_OPTIONS_REPL \ -e "STOP SLAVE IO_THREAD" if [ $? -gt 0 ]; then ocf_log err "Error stopping slave IO thread" rm -f $tmpfile exit $OCF_ERR_GENERIC fi while true; do mysql_run -Q -sw -O $MYSQL $MYSQL_OPTIONS_REPL \ -e 'SHOW PROCESSLIST\G' > $tmpfile # Of course, slave messages changed over MySQL versions... if grep -i 'Has read all relay log' $tmpfile >/dev/null; then ocf_log info "MySQL slave has finished processing relay log" break fi if ! grep -q 'system user' $tmpfile; then ocf_log info "Slave not runnig - not waiting to finish" break fi ocf_log info "Waiting for MySQL slave to finish processing relay log" sleep 1 done rm -f $tmpfile # Now, stop all slave activity and unset the master host mysql_run -Q -sw $MYSQL $MYSQL_OPTIONS_REPL \ -e "STOP SLAVE" if [ $? -gt 0 ]; then ocf_log err "Error stopping rest slave threads" exit $OCF_ERR_GENERIC fi # a last get_slave_info to save the status in variables may be needed # by pre-promote notification unset master_host # need to unset for get_slave_info to run get_slave_info # First, where are the relay logs? That will be easier when the using_multi_config # branch will be merged. If the path is not defined, the output will be "." relaylog_path=`${OCF_RESKEY_binary} --defaults-file=$OCF_RESKEY_config --verbose --help --user=$OCF_RESKEY_user | grep -e '^relay-log ' | awk '{ print $2 }'` relaylog_path=`dirname $relaylog_path` if [ "$relaylog_path" == "." ]; then relaylog_path=$OCF_RESKEY_datadir fi last_relaylog_file="$relay_log_file" cp ${relaylog_path}/${last_relaylog_file} /tmp/${last_relaylog_file} #ok now we need to find the md5 of the last trx last_trx_md5=$(get_last_binlog /tmp/${last_relaylog_file} 1 60) last_trx_md5=`echo $last_trx_md5 | cut -d',' -f2` if [ ! -z "$last_trx_md5" ]; then $CRM_ATTR_NODE_LAST_TRX_MD5 -v $last_trx_md5 fi # Let's establish the master score based on the following # rule. # score = ((file number diff)*master_max_binlog_size # + fileposdiff/10 + constante # # All events are at least 10 bytes so dividing by 10 doesn't reduce # the resolution and increases the span. # # Since the master publishes its status only once per few # seconds, the fileposdiff is likely positive. # We'll cap all values to int signed range et target # 1B as the value if a slave is fully in sync with the master # Master-score is normally calculated from slave-lag but it is better to use # binlog offset position to pick the most up to date slave local master_status_attr local Iter=5 while [ "$Iter" -gt 0 ]; do master_status_attr=`$CRM_ATTR_REPL_STATUS --query` if [ "$?" -eq 0 ]; then break; else sleep 1 let Iter-=1 fi done if [ $? -eq 0 ]; then # There's a master status entry although we don't know if it is # a valid one local last_reported_master_file last_reported_master_pos master_max_binlog_size local last_reported_master_file_number master_log_file_number master_score last_reported_master_file=`echo $master_status_attr | cut -d'|' -f1` last_reported_master_pos=`echo $master_status_attr | cut -d'|' -f2` master_max_binlog_size=`echo $master_status_attr | cut -d'|' -f3` last_reported_master_file_number=`echo $last_reported_master_file | cut -d'.' -f2 | sed -ne "s/^0*\([1-9][0-9]*\)$/\1/p"` master_log_file_number=`echo $master_log_file | cut -d'.' -f2 | sed -ne "s/^0*\([1-9][0-9]*\)$/\1/p"` master_score=$((1000000000+(\ ($master_log_file_number-$last_reported_master_file_number)*\ $master_max_binlog_size+$master_log_pos-$last_reported_master_pos)/100)) # now, the caps, the upper cap is unlikely if [ $master_score -gt 2147483647 ]; then master_score=2147483647 fi # the lower cap could happened if a slave lags behind by # more then 30GB of binlog. In that case... do we really care # if we floor the value if [ $master_score -lt -2147483647 ]; then master_score=-2147483647 fi set_master_score $master_score # Next, we need a reminder that the master crashed and when, that will be # used to publish the last trx in the promote event if we are picket # as the new master. $CRM_ATTR_MASTER_CRASHED_TS -v `date +%s` # Let's give pacemaker some time to realize there are new scores sleep 1 fi mysql_run -Q -sw $MYSQL $MYSQL_OPTIONS_REPL \ -e "RESET SLAVE /*!50516 ALL */;" if [ $? -gt 0 ]; then ocf_log err "Failed to reset slave" exit $OCF_ERR_GENERIC fi } # Start replication as slave start_slave() { mysql_run -Q -sw $MYSQL $MYSQL_OPTIONS_REPL \ -e "START SLAVE" } # Set the attribute controlling the readers VIP set_reader_attr() { local curr_attr_value curr_attr_value=$(get_reader_attr) if [ "$1" -eq "0" ]; then if [ "$curr_attr_value" -gt "0" ]; then curr_attr_value=$((${curr_attr_value}-1)) $CRM_ATTR -l reboot --name ${OCF_RESKEY_reader_attribute} -v $curr_attr_value else $CRM_ATTR -l reboot --name ${OCF_RESKEY_reader_attribute} -v 0 fi else if [ "$curr_attr_value" -ne "$OCF_RESKEY_reader_failcount" ]; then $CRM_ATTR -l reboot --name ${OCF_RESKEY_reader_attribute} -v $OCF_RESKEY_reader_failcount fi fi } is_master_side() { #Returns true (0) if this cluster has a grant for the booth ticket OCF_RESKEY_booth_master_ticket local ticket crmTicketRet if [ "${#OCF_RESKEY_geo_remote_IP}" -gt "0" ]; then #Try the new format crmTicketRet=`file $CRM_TICKET | grep -c 'Bourne-Again shell script'` if [ "$crmTicketRet" -eq "1" ]; then # got an error, we assume the old format ticket=`$CRM_TICKET -t $OCF_RESKEY_booth_master_ticket -Q | grep -c 'true'` else ticket=`$CRM_TICKET --info | grep $OCF_RESKEY_booth_master_ticket | awk '{ print $2 }' | grep -c granted` fi if [ "$ticket" -eq "1" ]; then return 0 else return 1 fi else return 0 fi } # get the attribute controlling the readers VIP get_reader_attr() { local attr_value local rc attr_value=`$CRM_ATTR -l reboot --name ${OCF_RESKEY_reader_attribute} --query -q` rc=$? if [ "$rc" -eq "0" ]; then echo $attr_value else echo -1 fi } # Stores data for MASTER STATUS from MySQL update_data_master_status() { master_status_file="${HA_RSCTMP}/master_status.${OCF_RESOURCE_INSTANCE}" mysql_run -Q -sw -O $MYSQL $MYSQL_OPTIONS_REPL -e "SHOW MASTER STATUS\G" > $master_status_file } # Returns the specified value from the stored copy of SHOW MASTER STATUS. # should be call after update_data_master_status for tmpfile # Arguments: # $1 The value to get. get_master_status() { awk -v var="$1" '$1 == var ":" {print substr($0, index($0, ":") + 2)}' "$master_status_file" } # Determines what IP address is attached to the current host. The output of the # crm_attribute command looks like this: # scope=nodes name=IP value=10.2.2.1611 # If the ${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP node attribute is not defined, fallback is to uname -n # The ${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP is the IP address that will be used for the # change master to command. get_local_ip() { local IP IP=`$CRM_ATTR -l forever -n ${INSTANCE_ATTR_NAME}_mysql_master_IP -q -G` if [ ! $? -eq 0 ]; then uname -n else echo $IP fi } # Determine if the datadir is full or almost full, the threshold is 97% check_datadir_state() { # Get the free space of the binlogdir FREE_SPC_PCT=`/bin/df -P $OCF_RESKEY_datadir | /bin/grep -v Filesystem \ | /bin/sed -e 's/ \+/ /g' | /usr/bin/cut -d' ' -f 5 \ | /usr/bin/tr -d '%'` if [ "$FREE_SPC_PCT" -ge "97" ]; then ocf_log warn "Partition $OCF_RESKEY_datadir usage is at " \ "or more than 97, " \ "unsuitable for master..." return 1 fi return 0 } ####################################################################### # Functions invoked by resource manager actions mysql_validate() { check_binary $OCF_RESKEY_binary check_binary $OCF_RESKEY_client_binary if [ ! -f $OCF_RESKEY_config ]; then ocf_log err "Config $OCF_RESKEY_config doesn't exist"; return $OCF_ERR_INSTALLED; fi if [ ! -d $OCF_RESKEY_datadir ]; then ocf_log err "Datadir $OCF_RESKEY_datadir doesn't exist"; return $OCF_ERR_INSTALLED; fi getent passwd $OCF_RESKEY_user >/dev/null 2>&1 if [ ! $? -eq 0 ]; then ocf_log err "User $OCF_RESKEY_user doesn't exit"; return $OCF_ERR_INSTALLED; fi getent group $OCF_RESKEY_group >/dev/null 2>&1 if [ ! $? -eq 0 ]; then ocf_log err "Group $OCF_RESKEY_group doesn't exist"; return $OCF_ERR_INSTALLED; fi true } # Return the status of mysqld # $1 the loglevel to use (mandatory) # $2 Override async_stop if 1, default to 0 mysql_status() { local last_restart_ts local kill_exit_code local witness_pid local override_async_stop override_async_stop=0 test $2 -eq 0 2>/dev/null if [ $? -ne 2 ]; then override_async_stop=$2 fi if [ ! -e $OCF_RESKEY_pid ]; then ocf_log $1 "MySQL is not running" if [ -e $ASYNC_STOP_WITNESS_FILE ]; then # MySQL is stopped and there's a witness file, cleanup rm -f $ASYNC_STOP_WITNESS_FILE fi return $OCF_NOT_RUNNING; fi pid=`cat $OCF_RESKEY_pid`; if [ -d /proc -a -d /proc/1 ]; then [ "u$pid" != "u" -a -d /proc/$pid ] else kill -s 0 $pid >/dev/null 2>&1 fi kill_exit_code=$? if [ "$OCF_RESKEY_async_stop" -eq "1" -a \ -e $ASYNC_STOP_WITNESS_FILE ]; then # Async stop seems to be in progress witness_pid=`cat $ASYNC_STOP_WITNESS_FILE | grep pid | cut -d':' -f2` if [ $kill_exit_code -eq 0 -a $witness_pid -eq $pid ]; then # Should we lie or tell the truth if [ "$override_async_stop" -eq "0" ]; then # we lie # still running but because of async, we report stopped return $OCF_NOT_RUNNING else # we tell the truth return $OCF_STOPPING #custom error code fi else # That shouldn't happen execpt if SIGKILL, cleanup rm -f $ASYNC_STOP_WITNESS_FILE fi fi if [ $kill_exit_code -eq 0 ]; then return $OCF_SUCCESS else ocf_log $1 "MySQL not running: removing old PID file" rm -f $OCF_RESKEY_pid # This is abnormal, is this host the master defined in the cib? # Also confirm it succeed in starting with the socket file if [ "$glb_master_exists" -eq "1" -a "$glb_cib_master" = $(get_local_ip) \ -a -e "$OCF_RESKEY_socket" ]; then #This is a crashed master if [ "$OCF_RESKEY_try_restart_crashed_master" -eq "1" ]; then # This is the master, let's give it a change to restart # that will allow the slaves a better chance to sync but we # need to avoid letting it restart forever. Has it tried to # restart within the last hour last_crash_ts=`$CRM_ATTR -l reboot --name ${INSTANCE_ATTR_NAME}_last_crash --query -q --default=0` if [ "$last_crash_ts" -ne "0" ]; then if [ `date +%s` -lt "$((${last_crash_ts}+3600))" ]; then # too soon, multiple crash, let's error out set_master_score 0 return $OCF_NOT_RUNNING fi fi $CRM_ATTR -l reboot --name ${INSTANCE_ATTR_NAME}_last_crash -v `date +%s` mysql_start_low rc=$? if [ "$rc" -eq "0" ]; then set_read_only OFF fi return $rc else $CRM_ATTR -l reboot --name ${INSTANCE_ATTR_NAME}_last_crash -v `date +%s` # OCF_ERR_ARGS is a hard error, won't wait for restart set_master_score 0 return $OCF_ERR_ARGS fi fi return $OCF_NOT_RUNNING fi } mysql_monitor() { local rc local status_loglevel="err" local master_resource local master_status_attr local new_master_status_attr : ${OCF_RESKEY_CRM_meta_interval=0} # Set loglevel to info during probe if ocf_is_probe; then status_loglevel="info" fi mysql_status $status_loglevel rc=$? # TODO: check max connections error # If status returned an error, return that immediately if [ $rc -ne $OCF_SUCCESS ]; then return $rc fi if [ $OCF_CHECK_LEVEL -gt 0 -a -n "$OCF_RESKEY_test_table" ]; then # Check if this instance is configured as a slave, and if so # check slave status # Are we currently having a master? if [ "$glb_master_exists" -ne "0" ]; then is_slave rc=$? if [ $rc -eq 0 -o "$OCF_RESKEY_CRM_meta_role" = "Slave" ]; then check_slave else update_data_master_status master_status_attr=`$CRM_ATTR_REPL_STATUS --query -q` new_master_status_attr="$(get_master_status File)|$(get_master_status Position)|$(get_max_binlog_size)" rm -f $master_status_file if [ "$master_status_attr" != "$new_master_status_attr" ]; then # Doing in bg, no need to wait and that can hang if a node is lost at the same time $CRM_ATTR_REPL_STATUS -v "$new_master_status_attr" & fi # Is this following a recent master crash? master_crashed_ts=`$CRM_ATTR_MASTER_CRASHED_TS --query --default=0` if [ "$master_crashed_ts" -gt "0" ]; then if [ `date +%s` -gt "$((${master_crashed_ts}+3600))" ]; then #Let's cleanup the cib $CRM_ATTR_MASTER_CRASHED_TS -D $CRM_ATTR_LAST_TRX -D fi fi fi else is_slave rc=$? # Need to cover for crashed master... if so, unset_master will set # the last trx md5 in the cib ... if [ $rc -eq 0 -o "$OCF_RESKEY_CRM_meta_role" = "Slave" ]; then # If there no quorum, we will not reset master # This is optimistic setting as the original master could be part of # majority. If there is no quorum, this node will restart in any case if [ "$cluster_has_quorum" -ne "0" ]; then ocf_log info "Cluster has quorum, resetting replication" unset_master fi set_reader_attr 0 fi fi # Check for test table mysql_run -Q -sw $MYSQL $MYSQL_OPTIONS_TEST \ -e "SELECT COUNT(*) FROM $OCF_RESKEY_test_table" if [ $MYSQL_LAST_ERR -ne "$MYSQL_TOO_MANY_CONN_ERR" ]; then if [ $MYSQL_LAST_ERR -ne 0 ]; then ocf_log err "Failed to select from $OCF_RESKEY_test_table"; return $OCF_ERR_GENERIC; fi else ocf_log info "Master hit max_connections" fi fi if ocf_is_ms && ! get_read_only; then ocf_log debug "MySQL monitor succeeded (master)"; if [ "$OCF_RESKEY_CRM_meta_interval" -eq "0" ]; then # this is a probe and this server is a master so need to set master_score set_master_score 2147483647 set_reader_attr 1 fi if ! check_datadir_state; then set_master_score -2147483640 fi return $OCF_RUNNING_MASTER else ocf_log debug "MySQL monitor succeeded"; return $OCF_SUCCESS fi } # Start MySQL in the master-slave context mysql_start() { local current_status if ocf_is_ms; then # Initialize the ReaderVIP attribute, monitor will enable it set_reader_attr 0 # set master_score to 0 in case mysql crashes on startup set_master_score 0 fi mysql_status info 1 # Adding 2nd param here to get the true state in case # async_stop is used current_status=$? if [ "$current_status" = "$OCF_SUCCESS" ]; then ocf_log info "MySQL already running" return $OCF_SUCCESS fi # Is MySQL still stopping, OCF_STOPPING is a custom error code if [ "$current_status" = "$OCF_STOPPING" ]; then # Waiting 5s less than the op timeout shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5)) count=0 while [ $count -lt $shutdown_timeout ] do sleep 1 mysql_status info 1 if [ "$?" != "$OCF_STOPPING" ]; then break fi count=$(($count+1)) ocf_log debug "MySQL hasn't stopped yet (async_stop). Waiting before trying to start." done # Has MySQL stopped in time if [ $count -eq $shutdown_timeout ]; then # No, the start request was too soon ocf_log err "MySQL asked to start while still stopping" return $OCF_ERR_GENERIC fi fi mysql_start_low rc=$? if [ $rc != $OCF_SUCCESS ]; then ocf_log err "Wasn't able to start MySQL, stopping 'start'." return $rc fi if ocf_is_ms; then # We're configured as a stateful resource. We must start as # slave by default. At this point we don't know if the CRM has # already promoted a master. So, we simply start in read only # mode. Should already be from command line. set_read_only on # Now, let's see whether there is a master. We might be a new # node that is just joining the cluster, and the CRM may have # promoted a master before. if [ "$glb_master_exists" -ne 0 -a "$glb_cib_master" != $(get_local_ip) ]; then # In case of network issues, we need to make sure not to reset replication # because REPL_INFO stored on CIBADMIN can be outdated, if that happens # we will break replication. # First, since this is a slave, let's check for current replication # info, if the master host matches current master IP, we should just # resume replication, otherwise we should reset. get_slave_info rc=$? if [ $rc -eq 0 -a "$glb_cib_master" == "$master_host" ]; then ocf_log info "Current Master_Host matches current cluster master, starting slave" else ocf_log info "Changing MySQL configuration to replicate from $master_host." set_master fi start_slave if [ $? -ne 0 ]; then ocf_log err "Failed to start slave" return $OCF_ERR_GENERIC fi else ocf_log info "No MySQL master present - clearing replication state" unset_master fi # We also need to set a master preference, otherwise Pacemaker # won't ever promote us in the absence of any explicit # preference set by the administrator. We choose a low # greater-than-zero preference. set_master_score 1 fi # Initial monitor action if [ -n "$OCF_RESKEY_test_table" -a -n "$OCF_RESKEY_test_user" \ -a -n "$OCF_RESKEY_test_passwd" ]; then OCF_CHECK_LEVEL=10 fi mysql_monitor rc=$? if [ $rc != $OCF_SUCCESS -a $rc != $OCF_RUNNING_MASTER ]; then ocf_log err "Failed initial monitor action" return $rc fi ocf_log info "MySQL started" return $OCF_SUCCESS } # low level MySQL start mysql_start_low() { touch $OCF_RESKEY_log chown $OCF_RESKEY_user:$OCF_RESKEY_group $OCF_RESKEY_log chmod 0640 $OCF_RESKEY_log [ -x /sbin/restorecon ] && /sbin/restorecon $OCF_RESKEY_log if ocf_is_true "$OCF_RESKEY_enable_creation" && [ ! -d $OCF_RESKEY_datadir/mysql ] ; then ocf_log info "Initializing MySQL database: " $MYSQL_SBINDIR/mysql_install_db --datadir=$OCF_RESKEY_datadir rc=$? if [ $rc -ne 0 ] ; then ocf_log err "Initialization failed: $rc"; exit $OCF_ERR_GENERIC fi chown -R $OCF_RESKEY_user:$OCF_RESKEY_group $OCF_RESKEY_datadir fi pid_dir=`dirname $OCF_RESKEY_pid` if [ ! -d $pid_dir ] ; then ocf_log info "Creating PID dir: $pid_dir" mkdir -p $pid_dir chown $OCF_RESKEY_user:$OCF_RESKEY_group $pid_dir fi socket_dir=`dirname $OCF_RESKEY_socket` if [ ! -d $socket_dir ] ; then ocf_log info "Creating socket dir: $socket_dir" mkdir -p $socket_dir chown $OCF_RESKEY_user:$OCF_RESKEY_group $socket_dir fi # Regardless of whether we just created the directory or it # already existed, check whether it is writable by the configured # user for dir in $pid_dir $socket_dir; do # needed to wrap around su a bit, sssd causing issue if [ `su - $OCF_RESKEY_user -s /bin/bash -c "if test -w $dir; then echo yes; else echo no; fi" 2> /dev/null` != "yes" ]; then ocf_log err "Directory $dir is not writable by $OCF_RESKEY_user" exit $OCF_ERR_PERM; fi done # Uncomment to perform permission clensing # - not convinced this should be enabled by default # #chmod 0755 $OCF_RESKEY_datadir #chown -R $OCF_RESKEY_user $OCF_RESKEY_datadir #chgrp -R $OCF_RESKEY_group $OCF_RESKEY_datadir mysql_extra_params= if ocf_is_ms; then mysql_extra_params="$mysql_extra_params --skip-slave-start --read-only" fi ${OCF_RESKEY_binary_prefix} ${OCF_RESKEY_binary} --defaults-file=$OCF_RESKEY_config \ --pid-file=$OCF_RESKEY_pid \ --socket=$OCF_RESKEY_socket \ --datadir=$OCF_RESKEY_datadir \ --user=$OCF_RESKEY_user $OCF_RESKEY_additional_parameters \ $mysql_extra_params >/dev/null 2>&1 & rc=$? # we also get the process id from $! because the PID file is only # created by mysql as soon as mysql is fully up and running # for example, when recovery is busy, the pid file does not exist yet # this part already creates the PID file as the mysql user # so that other PRM checks know # When recovery happens, the PID file does not exist yet. process_pid=$! # mysql_status expects that if the pid is there and it's running # that mysql is completely active #su $OCF_RESKEY_user -c "echo '$process_pid' > $OCF_RESKEY_pid" echo "$process_pid" > ${OCF_RESKEY_pid}.starting if [ $rc != 0 ]; then ocf_log err "MySQL start command failed: $rc" return $rc fi # Spin waiting for the server to come up. # Let the CRM/LRM time us out if required. start_wait=1 while [ $start_wait = 1 ]; do mysql_status info rc=$? if [ $rc = $OCF_SUCCESS ]; then start_wait=0 elif [ $rc != $OCF_NOT_RUNNING ]; then ocf_log info "MySQL start failed: $rc" return $rc fi # if mysql died in the meantime, we shall not wait # until the timeout is reached. kill -s 0 $process_pid > /dev/null mysqld_pid_status=$? if [ "$mysqld_pid_status" -ne "0" ]; then ocf_log err "MySQL daemon died during start, giving up." return $OCF_ERR_GENERIC fi sleep 2 done return $OCF_SUCCESS } mysql_stop() { if ocf_is_ms; then # clear preference for becoming master $CRM_MASTER -D # Remove VIP capability set_reader_attr 0 fi # we rely only on ${OCF_RESKEY_pid}.starting # as this certainly contains the file we need with the PID if [ ! -f ${OCF_RESKEY_pid}.starting ]; then ocf_log info "MySQL is not running" return $OCF_SUCCESS fi pid=`cat ${OCF_RESKEY_pid}.starting 2> /dev/null` /bin/kill $pid > /dev/null rc=$? if [ $rc != 0 ]; then ocf_log err "MySQL couldn't be stopped" return $OCF_ERR_GENERIC fi if [ "$OCF_RESKEY_async_stop" -eq "1" ]; then #Ok, MySQL is stopping and the async_stop option is set, just put the #pid and a timestamp in the witness file and return echo "pid:$pid" > $ASYNC_STOP_WITNESS_FILE echo "ts:`date +%s`" >> $ASYNC_STOP_WITNESS_FILE #Don't know yet why the ts, just seems useful for debugging for now ocf_log info "MySQL async stopped"; return $OCF_SUCCESS fi # stop waiting shutdown_timeout=15 if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5)) fi #Normally, pacemaker handles timeout but here we want to be able to do #a SIGKILL (-9) before the timeout occurs. count=0 while [ $count -lt $shutdown_timeout ] do kill -s 0 $pid rc=$? if [ $rc -ne 0 ]; then break fi count=`expr $count + 1` sleep 1 ocf_log debug "MySQL still hasn't stopped yet. Waiting..." done kill -s 0 $pid if [ $? -eq 0 ]; then ocf_log info "MySQL failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL..." /bin/kill -KILL $pid > /dev/null fi rm ${OCF_RESKEY_pid}.starting ocf_log info "MySQL stopped"; rm -f /var/lock/subsys/mysqld rm -f $OCF_RESKEY_socket return $OCF_SUCCESS } mysql_promote() { local master_info local master_crashed_ts local log_bin_path local tmpfiletrx local nb_trx local last_binlog_number if ( ! mysql_status err ); then return $OCF_NOT_RUNNING fi unset_master # Set Master Info in CIB, cluster level attribute update_data_master_status master_info="$(get_local_ip)|$(get_master_status File)|$(get_master_status Position)" ${CRM_ATTR_REPL_INFO} -v "$master_info" rm -f $master_status_file master_crashed_ts=`$CRM_ATTR_MASTER_CRASHED_TS --query` if [ ! -z "$master_crashed_ts" ]; then if [ `date +%s` -lt "$((${master_crashed_ts}+3600))" ]; then # Master crashed less than 1h ago, let's publish the last trx # First, where are the binlogs? That will be easier when the using_multi_config # branch will be merged. If the path is not defined, the output will be "." log_bin_path=`${OCF_RESKEY_binary} --defaults-file=$OCF_RESKEY_config --verbose --help --user=$OCF_RESKEY_user | grep -e '^log-bin ' | awk '{ print $2 }'` log_bin_path=`dirname $log_bin_path` if [ "$log_bin_path" == "." ]; then log_bin_path=$OCF_RESKEY_datadir fi # Let's find the last binlog file update_data_master_status last_binlog_file="$(get_master_status File)" tmpfiletrx=`mktemp ${HA_RSCTMP}/trx.${OCF_RESOURCE_INSTANCE}.XXXXXX` # Are we at the beginning of a binlog file, if so mtime of the previous will # be less than 1min last_binlog_number=`echo $last_binlog_file | cut -d'.' -f2 | sed -ne "s/^0*\([1-9][0-9]*\)$/\1/p"` prev_binlog_number=$(($last_binlog_number-1)) last_binlog_file_prefix="`echo $last_binlog_file | cut -d'.' -f1`" if [ "$prev_binlog_number" -gt 0 ]; then # re-adding the 0 padding while [ "${#prev_binlog_number}" -lt "6" ]; do prev_binlog_number="0${prev_binlog_number}" done prev_binlog_mtime=`stat -c %Y ${log_bin_path}/${last_binlog_file_prefix}.${prev_binlog_number}` now=`date +%s` tDiff=$(($now-$prev_binlog_mtime)) if [ "$tDiff" -lt 60 ]; then get_last_binlog '${log_bin_path}/${last_binlog_file_prefix}.${prev_binlog_number} ${log_bin_path}/${last_binlog_file}' 3000 60 > $tmpfiletrx else get_last_binlog ${log_bin_path}/${last_binlog_file} 3000 60 > $tmpfiletrx fi else get_last_binlog ${log_bin_path}/${last_binlog_file} 3000 60 > $tmpfiletrx fi #now we load all that to the cib so that it reaches the other nodes $CRM_ATTR_LAST_TRX -v "`cat $tmpfiletrx | tr '\n' '|'`" rm -f $tmpfiletrx fi fi set_read_only off || return $OCF_ERR_GENERIC # Existing master gets a higher-than-default master preference, so # the cluster manager does not shuffle the master role around # unnecessarily $CRM_ATTR_MASTER_CRASHED_TS -D 2> /dev/null $CRM_MASTER -v 2147483647 # A master can accept reads set_reader_attr 1 if [ "${#OCF_RESKEY_post_promote_script}" -gt "0" -a \ -x "${OCF_RESKEY_post_promote_script}" -a \ ! -L "${OCF_RESKEY_post_promote_script}" ]; then ${OCF_RESKEY_post_promote_script} fi return $OCF_SUCCESS } mysql_demote() { if ! mysql_status err; then set_master_score 0 exit $OCF_SUCCESS else # Return master preference to default, so the cluster manager gets # a chance to select a new master set_master_score 1 exit $OCF_SUCCESS fi } mysql_notify() { local master_crashed_ts # If not configured as a Stateful resource, we make no sense of # notifications. if ! ocf_is_ms; then ocf_log info "This agent makes no use of notifications unless running in master/slave mode." return $OCF_SUCCESS fi local type_op type_op="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}" ocf_log debug "Received $type_op notification." case "$type_op" in 'pre-promote') get_slave_info if [ $? -eq "$OCF_SUCCESS" ]; then # We'll be here only if the master crashed and there has been no # monitor event since. In the event of # a graceful demote, a post-demote notification event would have occurred. # The post-demote include an unset-master that # resets the slave after the completion of the IO and SQL # threads. The post-demote doesn't run if the master host # crashed. # Unset master will publish the updated master_score and the md5 of the # last trx. The update of the master_score will trigger another # pre-promote notification unset_master else # Apparently we sometimes need to help Pacemaker choose the best host. # We'll get here after running pre-promote a first time because # the previous will have run unset_master master_crashed_ts=`$CRM_ATTR_MASTER_CRASHED_TS --query` if [ ! -z "$master_crashed_ts" ]; then if [ `date +%s` -lt "$((${master_crashed_ts}+3600))" ]; then highestScore=-2147483647 thisNodeScore=0 for node in $OCF_RESKEY_CRM_meta_notify_slave_uname; do score=`$CRM_MASTER -N $node -G -q` if [ "$node" == "$HOSTNAME" ]; then thisNodeScore=$score fi if [ "$highestScore" -lt "$score" ]; then highestScore=$score fi done if [ "$thisNodeScore" -eq "$highestScore" ]; then if [ "$thisNodeScore" -ne "2147483647" ]; then # This node should be the next master $CRM_MASTER -v 2147483647 fi else # This node shouldn't be the next master if [ "$thisNodeScore" -ne "0" ]; then $CRM_MASTER -v 0 fi fi fi fi fi ;; 'post-promote') # The master has completed its promotion. Now is a good # time to check whether our replication slave is working # correctly. # Is the notification for our set notify_resource=`echo $OCF_RESKEY_CRM_meta_notify_promote_resource|cut -d: -f1` my_resource=`echo $OCF_RESOURCE_INSTANCE|cut -d: -f1` if [ $notify_resource != ${my_resource} ]; then ocf_log debug "Notification is not for us" return $OCF_SUCCESS fi master_host=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname|tr -d " "` if [ "$master_host" = ${HOSTNAME} ]; then ocf_log info "This will be the new master, ignoring post-promote notification." else # Is this following a recent master crash? master_crashed_ts=`$CRM_ATTR_MASTER_CRASHED_TS --query` if [ ! -z "$master_crashed_ts" ]; then if [ `date +%s` -lt "$((${master_crashed_ts}+3600))" ]; then # Master crashed less than 1h ago, let's see what our last trx was. # Since the master crashed, we didn't have the post-demote notification # so the slave may still configured if no monitor ops has run # if a monitor ops occurred, it should be saved in the cib last_trx_md5=`$CRM_ATTR_NODE_LAST_TRX_MD5 --query` if [ -z "$last_trx_md5" ]; then # no last_trx_md5 set in the cib, let's try to find it # First, where are the relay logs? That will be easier when the using_multi_config # branch will be merged. If the path is not defined, the output will be "." relaylog_path=`${OCF_RESKEY_binary} --defaults-file=$OCF_RESKEY_config --verbose --help --user=$OCF_RESKEY_user | grep -e '^relay-log ' | awk '{ print $2 }'` relaylog_path=`dirname $relaylog_path` if [ "$log_bin_path" == "." ]; then relaylog_path=$OCF_RESKEY_datadir fi # Let's find the last binlog file get_slave_info last_relaylog_file="$relay_log_file" #ok now we need to find the md5 of the last trx last_trx_md5=$(get_last_binlog ${relaylog_path}/${last_relaylog_file} 1 60) last_trx_md5=`echo $last_trx_md5 | cut -d',' -f2` fi if [ ! -z "$last_trx_md5" ]; then # now, let's try to find this md5 in the NEW_MASTER_LAST_TRX attribute # There maybe up to 2 binlog files in the attribute #1st file binlog_file=`$CRM_ATTR_LAST_TRX --query | cut -d'@' -f1` binlog_pos=`$CRM_ATTR_LAST_TRX --query | cut -d'@' -f2 | tr '|' '\n' | grep $last_trx_md5 | cut -d',' -f1` #found? if [ -z "$binlog_pos" ]; then #no, let's try if there's a 2nd file binlog_file=`$CRM_ATTR_LAST_TRX --query | cut -d'@' -f3` if [ -z "$binlog_file" ]; then binlog_pos=`$CRM_ATTR_LAST_TRX --query | cut -d'@' -f4 | tr '|' '\n' | grep $last_trx_md5 | cut -d',' -f1` fi fi # TODO: we could be at the end of the 2nd file so we should point to the first entry of the first # file. Edge case, will deal with it later. # have we found something? if [ ! -z "$binlog_file" -a ! -z "$binlog_pos" ]; then # Let's overwrite the glb_local_info variable glb_local_info="`echo $glb_local_info | cut -d'|' -f1`|$binlog_file|$binlog_pos" fi $CRM_ATTR_NODE_LAST_TRX_MD5 -D fi fi fi ocf_log info "Resetting replication" unset_master #Should be unset already execpt if master crashed if [ $? -ne 0 ]; then return $OCF_ERR_GENERIC fi ocf_log info "Changing MySQL configuration to replicate from $master_host" set_master if [ $? -ne 0 ]; then return $OCF_ERR_GENERIC fi start_slave if [ $? -ne 0 ]; then ocf_log err "Failed to start slave" return $OCF_ERR_GENERIC fi # Remove the master_crashed_ts attribute on this node since there has been a promotion $CRM_ATTR_MASTER_CRASHED_TS -D 2> /dev/null fi return $OCF_SUCCESS ;; 'pre-demote') # Is the notification for our set notify_resource=`echo $OCF_RESKEY_CRM_meta_notify_demote_resource|cut -d: -f1` my_resource=`echo $OCF_RESOURCE_INSTANCE|cut -d: -f1` if [ $notify_resource != ${my_resource} ]; then ocf_log debug "Notification is not for us" return $OCF_SUCCESS fi demote_host=`echo $OCF_RESKEY_CRM_meta_notify_demote_uname|tr -d " "` if [ $demote_host = ${HOSTNAME} ]; then ocf_log info "post-demote notification for $demote_host" while ! get_read_only do # This may timeout (5s) if there are long running queries (issue #44) set_read_only on # Must kill all existing user threads because they are still Read/write # in order for the slaves to complete the read of binlogs local tmpfile tmpfile=`mktemp ${HA_RSCTMP}/threads.${OCF_RESOURCE_INSTANCE}.XXXXXX` mysql_run -Q -sw -O $MYSQL $MYSQL_OPTIONS_REPL \ -e "SHOW PROCESSLIST" > $tmpfile local killthreads="" for thread in `awk '$0 !~ /Binlog Dump|system user|event_scheduler|SHOW PROCESSLIST/ && $0 ~ /^[0-9]/ {print $1}' $tmpfile` do killthreads="${killthreads} KILL ${thread};" done mysql_run -Q -sw $MYSQL $MYSQL_OPTIONS_REPL \ -e "${killthreads}" rm -f $tmpfile done else ocf_log info "Ignoring post-demote notification execpt for my own demotion." fi return $OCF_SUCCESS ;; 'post-demote') # Is the notification for our set notify_resource=`echo $OCF_RESKEY_CRM_meta_notify_demote_resource|cut -d: -f1` my_resource=`echo $OCF_RESOURCE_INSTANCE|cut -d: -f1` if [ $notify_resource != ${my_resource} ]; then ocf_log debug "Notification is not for us" return $OCF_SUCCESS fi demote_host=`echo $OCF_RESKEY_CRM_meta_notify_demote_uname|tr -d " "` if [ $demote_host = ${HOSTNAME} ]; then ocf_log info "Ignoring post-demote notification for my own demotion." return $OCF_SUCCESS fi ocf_log info "post-demote notification for $demote_host." # The former master has just been gracefully demoted. unset_master ;; *) return $OCF_SUCCESS ;; esac } # # mysql_run: Run a mysql command, log its output and return the proper error code. # Usage: mysql_run [-Q] [-info|-warn|-err] [-O] [-sw] # -Q: don't log the output of the command if it succeeds # -info|-warn|-err: log the output of the command at given # severity if it fails (defaults to err) # -O: echo the output of the command # -sw: Suppress 5.6 client warning when password is used on the command line # Adapted from ocf_run. # mysql_run() { local rc local output outputfile local verbose=1 local returnoutput local loglevel=err local suppress_56_password_warning local var for var in 1 2 3 4 do case "$1" in "-Q") verbose="" shift 1;; "-info"|"-warn"|"-err") loglevel=`echo $1 | sed -e s/-//g` shift 1;; "-O") returnoutput=1 shift 1;; "-sw") suppress_56_password_warning=1 shift 1;; *) ;; esac done outputfile=`mktemp ${HA_RSCTMP}/mysql_run.${OCF_RESOURCE_INSTANCE}.XXXXXX` error=`"$@" 2>&1 1>$outputfile` rc=$? if [ "$suppress_56_password_warning" -eq 1 ]; then error=`echo "$error" | egrep -v '^Warning: Using a password on the command line'` fi output=`cat $outputfile` rm -f $outputfile if [ $rc -eq 0 ]; then if [ "$verbose" -a ! -z "$output" ]; then ocf_log info "$output" fi if [ "$returnoutput" -a ! -z "$output" ]; then echo "$output" fi MYSQL_LAST_ERR=$OCF_SUCCESS return $OCF_SUCCESS else if [ ! -z "$error" ]; then ocf_log $loglevel "$error" regex='^ERROR ([[:digit:]]{4}).*' if [[ $error =~ $regex ]]; then mysql_code=${BASH_REMATCH[1]} if [ -n "$mysql_code" ]; then MYSQL_LAST_ERR=$mysql_code return $rc fi fi else ocf_log $loglevel "command failed: $*" fi # No output to parse so return the standard exit code. MYSQL_LAST_ERR=$rc return $rc fi } ####################################################################### ########################################################################## # If DEBUG_LOG is set, make this resource agent easy to debug: set up the # debug log and direct all output to it. Otherwise, redirect to /dev/null. # The log directory must be a directory owned by root, with permissions 0700, # and the log must be writable and not a symlink. ########################################################################## DEBUG_LOG="/tmp/mysql.ocf.ra.debug/log" if [ "${DEBUG_LOG}" -a -w "${DEBUG_LOG}" -a ! -L "${DEBUG_LOG}" ]; then DEBUG_LOG_DIR="${DEBUG_LOG%/*}" if [ -d "${DEBUG_LOG_DIR}" ]; then exec 9>>"$DEBUG_LOG" exec 2>&9 date >&9 if [ "$OCF_RESKEY_CRM_meta_notify" == "true" ]; then echo "$OCF_RESKEY_CRM_meta_notify_key_type - $OCF_RESKEY_CRM_meta_notify_key_operation" >&9 fi echo "$*" >&9 env | grep OCF_ | sort >&9 set -x else exec 9>/dev/null fi fi case "$1" in meta-data) meta_data exit $OCF_SUCCESS;; usage|help) usage exit $OCF_SUCCESS;; esac if [ "$#" -lt "1" ]; then usage exit $OCF_SUCCESS fi mysql_validate rc=$? LSB_STATUS_STOPPED=3 if [ $rc -ne 0 ]; then case "$1" in stop) exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $LSB_STATUS_STOPPED;; *) exit $rc;; esac fi #Global info missing from OCF_RESKEY resources=`$CRM_RES --list` # now we need the master-slave clone set name, need to walk around limitations # of older pacemaker if [[ "$OCF_RESKEY_crm_feature_set" > "3.0.1" ]]; then glb_master_resource=`echo "$resources" | egrep "\[$INSTANCE_ATTR_NAME\]" | awk '{print $3}' | head -n 1` else # older versions of Pacemaker don't write the primitive name in the resources list for msr in `echo "$resources" | grep 'Master/Slave' | awk '{print $3}'`; do isThere=`$CRM_RES -q -r $msr | grep primitive | grep -c $INSTANCE_ATTR_NAME` if [ "$isThere" -gt "0" ]; then glb_master_resource=$msr fi done fi # We check if there is a location constraint against this node # where $OCF_RESOURCE_INSTANCE should not be running here i.e. # -INFINITY score, if so we ignore monitor call for this node contrnt=$(timeout 5 cibadmin --query --xpath "/cib/configuration/constraints/rsc_location[@rsc='${glb_master_resource}']/rule[@score='-INFINITY']/expression[@attribute='#uname' and @value='${HOSTNAME}']" > /dev/null 2> /dev/null) if [ "$?" -eq "0" ]; then exit $OCF_SUCCESS fi is_master_side glb_master_side=$? if [ "${#OCF_RESKEY_geo_remote_IP}" -gt "0" -a $glb_master_side -ne 0 ]; then # geo_remote_IP is defined, let's query the remote side # the variable content will be like: pacemaker-1-1|binlog.000156|107 1 # The commands here are boolean tested so if any of them fails we will # not get a MASTER_OK which indicates we got a useful value at least ssh_cmd="$SSH $OCF_RESKEY_geo_remote_IP -l $OCF_RESKEY_geo_remote_user -o ConnectTimeout=3 -o PasswordAuthentication=no -q $OCF_RESKEY_geo_remote_opts" ssh_query="$CRM_ATTR_REPL_INFO --query -q 2> /dev/null" ssh_query="$ssh_query && $CRM_RES --list | egrep -A2 ' $glb_master_resource ' | egrep -c 'Master[^\/]'" ssh_query="$ssh_query && echo 'MASTER_OK' || echo 'MASTER_NOT_FOUND'" glb_remote_info=`$ssh_cmd "$ssh_query" | tr '\n' ' '` echo "$glb_remote_info" | grep MASTER_OK > /dev/null 2>&1 if [ "$?" -ne "0" ]; then glb_master_exists=0 else glb_master_exists=`echo $glb_remote_info | awk '{ print $(NF-1) }'` if [[ -z "$glb_master_exists" ]]; then glb_master_exists=0 fi fi else glb_master_exists=`echo "$resources" | grep -A2 " $glb_master_resource " | egrep -c 'Master[^\/]'` fi if [ "$glb_master_exists" -eq "1" ]; then if [ "${#glb_remote_info}" -gt "0" ]; then glb_cib_master=`echo $glb_remote_info | awk '{ print $1 }' | cut -d'|' -f1` else glb_local_info=`$CRM_ATTR_REPL_INFO --query -q` glb_cib_master=`echo $glb_local_info | cut -d'|' -f1` fi fi # Also check if the cluster has quorum, we may need to preserve some state # If there was split brain and the cluster recovers cluster_has_quorum=$(timeout 5 cibadmin --query|egrep -c 'have-quorum=\"1\"') # What kind of method was invoked? case "$1" in start) mysql_start;; stop) mysql_stop;; status) mysql_status err;; monitor) mysql_monitor;; promote) mysql_promote;; demote) mysql_demote;; notify) mysql_notify;; validate-all) exit $OCF_SUCCESS;; *) usage exit $OCF_ERR_UNIMPLEMENTED;; esac