From 9689ea270632f64b5367e8839cd7a3297575f5a9 Mon Sep 17 00:00:00 2001 From: Mike Bayer Date: Thu, 19 Apr 2018 17:28:16 -0400 Subject: [PATCH] Enhance galera to interact over multiple clusters This change adds a new resource agent "stretch_galera" which builds off of the existing "galera" agent. To accommodate this, the "galera" agent's shell script structure is modified slightly so that it can be sourced for its functions. The new resource agent adds a new parameter "remote_node_map" to the Galera resource agent which allows it to consider galera node names that are in other clusters as part of its Galera quorum. To achieve this, it launches read-only pcs commands to the remote clusters in order to view and modify remote state variables. Additionally, the stretch agent honors an optional pcs attribute -initial-bootstrap which when applied to the local pcs nodes, will allow Galera to be bootstrapped with only that subset of nodes, without the additional remote nodes being available yet. An installer can set these attributes to allow the first pcs cluster to come online before subsequent clusters, and then remove the attributes. --- heartbeat/galera | 158 ++++++++++++---------- heartbeat/stretch_galera | 279 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 366 insertions(+), 71 deletions(-) create mode 100755 heartbeat/stretch_galera diff --git a/heartbeat/galera b/heartbeat/galera index c8f47151ed..35134b77c2 100755 --- a/heartbeat/galera +++ b/heartbeat/galera @@ -25,7 +25,7 @@ ## # README. -# +# # This agent only supports being configured as a multistate Master # resource. # @@ -49,7 +49,7 @@ # pcs resource create db galera enable_creation=true \ # wsrep_cluster_address="gcomm://rhel7-auto1,rhel7-auto2,rhel7-auto3" meta master-max=3 --master # -# By setting the 'enable_creation' option, the database will be automatically +# By setting the 'enable_creation' option, the database will be automatically # generated at startup. The meta attribute 'master-max=3' means that all 3 # nodes listed in the wsrep_cluster_address list will be allowed to connect # to the galera cluster and perform replication. @@ -57,7 +57,7 @@ # NOTE: If you have more nodes in the pacemaker cluster then you wish # to have in the galera cluster, make sure to use location contraints to prevent # pacemaker from attempting to place a galera instance on a node that is -# not in the 'wsrep_cluster_address" list. +# not in the 'wsrep_cluster_address" list. # ## @@ -101,7 +101,9 @@ UEND } meta_data() { - cat < @@ -249,6 +251,8 @@ Cluster check user password +${extra_parameters} + @@ -331,7 +335,7 @@ get_last_commit() if [ -z "$node" ]; then ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" --quiet 2>/dev/null - else + else ${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" --quiet 2>/dev/null fi } @@ -420,7 +424,7 @@ clear_master_score() local node=$(ocf_attribute_target $1) if [ -z "$node" ]; then $CRM_MASTER -D - else + else $CRM_MASTER -D -N $node fi } @@ -431,7 +435,7 @@ set_master_score() if [ -z "$node" ]; then $CRM_MASTER -v 100 - else + else $CRM_MASTER -N $node -v 100 fi } @@ -480,6 +484,23 @@ pcmk_to_galera_name() } +all_bootstrap_candidates() +{ + local pcmk_nodes="" + all_nodes=$(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' ') + + for node in $all_nodes; do + local pcmk_node=$(galera_to_pcmk_name $node) + if [ -z "$pcmk_node" ]; then + ocf_log err "Could not determine pacemaker node from galera name <${node}>." + return + fi + + pcmk_nodes="$pcmk_nodes $pcmk_node" + done + echo "$pcmk_nodes" +} + detect_first_master() { local best_commit=0 @@ -492,24 +513,11 @@ detect_first_master() local best_node local safe_to_bootstrap - all_nodes=$(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' ') - best_node_gcomm=$(echo "$all_nodes" | sed 's/^.* \(.*\)$/\1/') - best_node=$(galera_to_pcmk_name $best_node_gcomm) - if [ -z "$best_node" ]; then - ocf_log err "Could not determine initial best node from galera name <${best_node_gcomm}>." - return - fi + all_nodes=$(all_bootstrap_candidates) + best_node=$(echo "$all_nodes" | sed 's/^.* \(.*\)$/\1/') # avoid selecting a recovered node as bootstrap if possible for node in $all_nodes; do - local pcmk_node=$(galera_to_pcmk_name $node) - if [ -z "$pcmk_node" ]; then - ocf_log err "Could not determine pacemaker node from galera name <${node}>." - return - else - node=$pcmk_node - fi - if is_no_grastate $node; then nodes_recovered="$nodes_recovered $node" else @@ -529,6 +537,8 @@ detect_first_master() # We don't need to wait for the other nodes to report state in this case missing_nodes=0 break + else + ocf_log info "Node <${node}> is not marked as safe to bootstrap, continuing to look." fi last_commit=$(get_last_commit $node) @@ -914,64 +924,70 @@ galera_validate() mysql_common_validate } -case "$1" in - meta-data) meta_data - exit $OCF_SUCCESS;; - usage|help) usage - exit $OCF_SUCCESS;; -esac - -galera_validate -rc=$? -LSB_STATUS_STOPPED=3 -if [ $rc -ne 0 ]; then +cmd_main() { case "$1" in - stop) exit $OCF_SUCCESS;; - monitor) exit $OCF_NOT_RUNNING;; - status) exit $LSB_STATUS_STOPPED;; - *) exit $rc;; + meta-data) meta_data + exit $OCF_SUCCESS;; + usage|help) usage + exit $OCF_SUCCESS;; esac -fi -if [ -z "${OCF_RESKEY_check_passwd}" ]; then - # This value is automatically sourced from /etc/sysconfig/checkcluster if available - OCF_RESKEY_check_passwd=${MYSQL_PASSWORD} -fi -if [ -z "${OCF_RESKEY_check_user}" ]; then - # This value is automatically sourced from /etc/sysconfig/checkcluster if available - OCF_RESKEY_check_user=${MYSQL_USERNAME} -fi -: ${OCF_RESKEY_check_user="root"} + galera_validate + rc=$? + LSB_STATUS_STOPPED=3 + if [ $rc -ne 0 ]; then + case "$1" in + stop) exit $OCF_SUCCESS;; + monitor) exit $OCF_NOT_RUNNING;; + status) exit $LSB_STATUS_STOPPED;; + *) exit $rc;; + esac + fi -MYSQL_OPTIONS_CHECK="-nNE --user=${OCF_RESKEY_check_user}" -if [ -n "${OCF_RESKEY_check_passwd}" ]; then - MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK --password=${OCF_RESKEY_check_passwd}" -fi + if [ -z "${OCF_RESKEY_check_passwd}" ]; then + # This value is automatically sourced from /etc/sysconfig/checkcluster if available + OCF_RESKEY_check_passwd=${MYSQL_PASSWORD} + fi + if [ -z "${OCF_RESKEY_check_user}" ]; then + # This value is automatically sourced from /etc/sysconfig/checkcluster if available + OCF_RESKEY_check_user=${MYSQL_USERNAME} + fi + : ${OCF_RESKEY_check_user="root"} -# This value is automatically sourced from /etc/sysconfig/checkcluster if available -if [ -n "${MYSQL_HOST}" ]; then - MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK -h ${MYSQL_HOST}" -fi + MYSQL_OPTIONS_CHECK="-nNE --user=${OCF_RESKEY_check_user}" + if [ -n "${OCF_RESKEY_check_passwd}" ]; then + MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK --password=${OCF_RESKEY_check_passwd}" + fi -# This value is automatically sourced from /etc/sysconfig/checkcluster if available -if [ -n "${MYSQL_PORT}" ]; then - MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK -P ${MYSQL_PORT}" -fi + # This value is automatically sourced from /etc/sysconfig/checkcluster if available + if [ -n "${MYSQL_HOST}" ]; then + MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK -h ${MYSQL_HOST}" + fi + # This value is automatically sourced from /etc/sysconfig/checkcluster if available + if [ -n "${MYSQL_PORT}" ]; then + MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK -P ${MYSQL_PORT}" + fi + # What kind of method was invoked? + case "$1" in + start) galera_start;; + stop) galera_stop;; + status) mysql_common_status err;; + monitor) galera_monitor;; + promote) galera_promote;; + demote) galera_demote;; + validate-all) exit $OCF_SUCCESS;; + + *) usage + exit $OCF_ERR_UNIMPLEMENTED;; + esac +} -# What kind of method was invoked? -case "$1" in - start) galera_start;; - stop) galera_stop;; - status) mysql_common_status err;; - monitor) galera_monitor;; - promote) galera_promote;; - demote) galera_demote;; - validate-all) exit $OCF_SUCCESS;; +# run 'main' if we aren't "sourceonly" +if [ $1 != "sourceonly" ]; then + cmd_main $@ +fi - *) usage - exit $OCF_ERR_UNIMPLEMENTED;; -esac # vi:sw=4:ts=4:et: diff --git a/heartbeat/stretch_galera b/heartbeat/stretch_galera new file mode 100755 index 0000000000..d743878926 --- /dev/null +++ b/heartbeat/stretch_galera @@ -0,0 +1,279 @@ +#!/bin/sh +# +# Copyright (c) 2014 David Vossel +# All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# + +## +# README. +# +# Extend the Galera resource agent to support joining into a galera cluster +# that is managed by multiple pacemaker clusters. +## + +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_ROOT}/resource.d/heartbeat/galera sourceonly + + +SSH_CMD="ssh -oConnectTimeout=5 -oStrictHostKeyChecking=no" + +# copy original meta_data to galera_meta_data +eval "$(echo "galera_meta_data()"; declare -f meta_data | tail -n +2)" + +meta_data() { +IFS='' read -r -d '' extraparams < + +A mapping of pacemaker node names to remote hosts. + +Allows pacemaker nodes in remote pacemaker clusters to be part of this +Galera cluster: + +root@pacemakerhost/pcmk1:node.1.galera;root@pacemakerhost/pcmk2:node.2.galera + + +Pacemaker to Galera name mapping + + + +END + +galera_meta_data "$extraparams" + +} + +is_initial_bootstrap() +{ + # look for the initial-bootstrap attribute, which is an optional attribute that + # can be set externally as the resource is first run, to indicate a subset of nodes + # that are sufficient to do an initial bootstrap, without needing the additional + # nodes to be available yet. the flag is turned off as soon as this event proceeds. + local node=$(ocf_attribute_target $1) + + # note there is no "-l reboot". because this value would have been set before the resource + # was run, reboot indicates it's only from the current run which means we'd never get it + local_crm_attribute $node --name "${INSTANCE_ATTR_NAME}-initial-bootstrap" --quiet 2>/dev/null +} + +set_bootstrap_node() +{ + local node=$(ocf_attribute_target $1) + + local_crm_attribute $node -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" -v "true" + +} + + +is_no_grastate() +{ + local node=$(ocf_attribute_target $1) + remote_crm_attribute $node -l reboot --name "${INSTANCE_ATTR_NAME}-no-grastate" --quiet 2>/dev/null +} + +get_last_commit() +{ + local node=$(ocf_attribute_target $1) + + if [ -z "$node" ]; then + ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" --quiet 2>/dev/null + else + remote_crm_attribute $node -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" --quiet 2>/dev/null + fi +} + +get_safe_to_bootstrap() +{ + local node=$(ocf_attribute_target $1) + + if [ -z "$node" ]; then + ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-safe-to-bootstrap" --quiet 2>/dev/null + else + remote_crm_attribute $node -l reboot --name "${INSTANCE_ATTR_NAME}-safe-to-bootstrap" --quiet 2>/dev/null + fi +} + +all_bootstrap_candidates() +{ + + local initial_bootstrap_nodes="" + local is_initial_boot + local all_nodes_pcmk="" + + all_nodes=$(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' ') + + ocf_log info "searching for initial bootstrap nodes in FQDN list: $all_nodes" + for gcomm_node in $all_nodes; do + local pcmk_node=$(galera_to_pcmk_name $gcomm_node) + if [ -z "$pcmk_node" ]; then + ocf_log err "Could not determine pacemaker node from galera name <${gcomm_node}>." + return + fi + + all_nodes_pcmk="$all_nodes_pcmk $pcmk_node" + + is_initial_boot=$(is_initial_bootstrap $pcmk_node) + ocf_log info "for node $pcmk_node, got initial boot flag value: $is_initial_boot" + + if [ x"$is_initial_boot" != x ]; then + initial_bootstrap_nodes="$initial_bootstrap_nodes $pcmk_node" + fi + + done + + if [ x"$initial_bootstrap_nodes" != x ]; then + ocf_log info "found initial bootstrap nodes, returning FQDN list $initial_bootstrap_nodes" + echo "$initial_bootstrap_nodes" + else + ocf_log info "past looking at initial bootstrap nodes" + # return normal list of all nodes + echo "$all_nodes_pcmk" + fi + +} + +master_exists() +{ + if [ "$__OCF_ACTION" = "demote" ]; then + # We don't want to detect master instances during demote. + # 1. we could be detecting ourselves as being master, which is no longer the case. + # 2. we could be detecting other master instances that are in the process of shutting down. + # by not detecting other master instances in "demote" we are deferring this check + # to the next recurring monitor operation which will be much more accurate + return 1 + fi + # determine if a master instance is already up and is healthy + crm_mon --as-xml | grep "resource.*id=\"${INSTANCE_ATTR_NAME}\".*role=\"Master\".*active=\"true\".*orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1 + + local master_exists_local=$? + + if [ $master_exists_local -eq 0 ]; then + ocf_log info "Detected that a master exists for the local cluster" + fi + + # if not, and we have remote nodes, check those also + if [ $master_exists_local -ne 0 ] && [ -n "$OCF_RESKEY_remote_node_map" ]; then + for remote_ssh in $(echo "$OCF_RESKEY_remote_node_map" | tr ';' '\n' | tr -d ' ' | sed 's/:/ /' | awk -F' ' '{print $2;}' | sort | uniq); do + $SSH_CMD $remote_ssh crm_mon --as-xml | grep "resource.*id=\"${INSTANCE_ATTR_NAME}\".*role=\"Master\".*active=\"true\".*orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1 + if [ $? -eq 0 ]; then + ocf_log info "Detected that a master exists for the remote cluster $remote_ssh" + return $? + fi + done + fi + + return $master_exists_local +} + +clear_master_score() +{ + local node=$(ocf_attribute_target $1) + if [ -z "$node" ]; then + $CRM_MASTER -D + else + local_crm_master $node -D + fi +} + +set_master_score() +{ + local node=$(ocf_attribute_target $1) + + if [ -z "$node" ]; then + $CRM_MASTER -v 100 + else + local_crm_master $node -v 100 + fi +} + +get_remote_node() +{ + local node=$1 + if [ -z "$OCF_RESKEY_remote_node_map" ]; then + return + else + local retval=$(echo "$OCF_RESKEY_remote_node_map" | tr ';' '\n' | tr -d ' ' | sed 's/:/ /' | awk -F' ' '$1=="'"$node"'" {print $2;exit}') + if [ -z "$retval" ]; then + return + else + echo $retval + fi + fi +} + +local_crm_master() +{ + local node=$1 + shift + + local remote_ssh=$(get_remote_node $node) + + if [ -z "$remote_ssh" ]; then + $CRM_MASTER -N $node $@ + fi + + # if this is a remote node, don't set master; this will be set up + # from that pacemaker cluster. +} + +local_crm_attribute() +{ + local node=$1 + shift + + local remote_ssh=$(get_remote_node $node) + + if [ -z "$remote_ssh" ]; then + ${HA_SBIN_DIR}/crm_attribute -N $node $@ + fi + + # if this is a remote node, don't run any command + +} + +remote_crm_attribute() +{ + local node=$1 + shift + + local remote_ssh=$(get_remote_node $node) + + if [ -z "$remote_ssh" ]; then + ${HA_SBIN_DIR}/crm_attribute -N $node $@ + else + $SSH_CMD $remote_ssh ${HA_SBIN_DIR}/crm_attribute -N $node $@ + fi +} + + +promote_everyone() +{ + # turn into a no-op + echo; +} + +cmd_main $@ + + +# vi:sw=4:ts=4:et: