diff --git a/.github/workflows/crmsh-ci.yml b/.github/workflows/crmsh-ci.yml index 0d5727c1be..8e306fcdb6 100644 --- a/.github/workflows/crmsh-ci.yml +++ b/.github/workflows/crmsh-ci.yml @@ -127,6 +127,16 @@ jobs: $DOCKER_SCRIPT resource before_install $DOCKER_SCRIPT resource run + functional_test_operation: + runs-on: ubuntu-latest + timeout-minutes: 20 + steps: + - uses: actions/checkout@v2 + - name: functional test for operations + run: | + $DOCKER_SCRIPT operation before_install + $DOCKER_SCRIPT operation run + functional_test_configure_sublevel: runs-on: ubuntu-latest timeout-minutes: 20 diff --git a/crmsh/bootstrap.py b/crmsh/bootstrap.py index 45326c3d35..3118616ce1 100644 --- a/crmsh/bootstrap.py +++ b/crmsh/bootstrap.py @@ -2201,7 +2201,7 @@ def bootstrap_remove(context): remove_self() return - if _context.cluster_node in xmlutil.listnodes(): + if _context.cluster_node in xmlutil.NodeState().list_nodes(): remove_node_from_cluster() else: error("Specified node {} is not configured in cluster! Unable to remove.".format(_context.cluster_node)) @@ -2210,7 +2210,7 @@ def bootstrap_remove(context): def remove_self(): me = _context.cluster_node yes_to_all = _context.yes_to_all - nodes = xmlutil.listnodes(include_remote_nodes=False) + nodes = xmlutil.NodeState().list_nodes(include_remote_nodes=False) othernode = next((x for x in nodes if x != me), None) if othernode is not None: # remove from other node diff --git a/crmsh/completers.py b/crmsh/completers.py index 46e1474c96..3b79f7e04e 100644 --- a/crmsh/completers.py +++ b/crmsh/completers.py @@ -75,7 +75,7 @@ def primitives(args): return [x.get("id") for x in nodes if xmlutil.is_primitive(x)] -nodes = call(xmlutil.listnodes) +nodes = call(xmlutil.NodeState().list_nodes) shadows = call(xmlutil.listshadows) diff --git a/crmsh/ui_node.py b/crmsh/ui_node.py index d14f1b0354..58fc76e1d5 100644 --- a/crmsh/ui_node.py +++ b/crmsh/ui_node.py @@ -299,6 +299,8 @@ def do_standby(self, context, *args): else: syntax_err(args, context=context.get_command_name()) return False + if xmlutil.NodeState().is_node_in_maintenance(node): + context.warning("Node \"{}\" is in maintenance".format(node)) opts = '' if lifetime: opts = "--lifetime='%s'" % lifetime diff --git a/crmsh/ui_resource.py b/crmsh/ui_resource.py index dbc97a8069..01a4f3033b 100644 --- a/crmsh/ui_resource.py +++ b/crmsh/ui_resource.py @@ -293,6 +293,9 @@ def _commit_meta_attrs(self, context, resources, name, value): rc = True for rsc in resources: + rc_managed, reason = xmlutil.RscState().is_managed(rsc) + if not rc_managed: + context.warning("Resource {} is unmanaged ({})".format(rsc, reason)) rc = rc and set_deep_meta_attr(rsc, name, value, commit=False) if commit and rc: ok = cib_factory.commit() diff --git a/crmsh/xmlutil.py b/crmsh/xmlutil.py index 7b35e8650c..ee005b8b21 100644 --- a/crmsh/xmlutil.py +++ b/crmsh/xmlutil.py @@ -17,7 +17,7 @@ from . import userdir from .utils import add_sudo, str2file, str2tmp, get_boolean from .utils import get_stdout, stdout2list, crm_msec, crm_time_cmp -from .utils import olist, get_cib_in_use, get_tempdir, to_ascii +from .utils import olist, get_cib_in_use, get_tempdir, to_ascii, running_on def xmlparse(f): @@ -173,7 +173,71 @@ def get_top_cib_nodes(node, nodes_l): return nodes_l -class RscState(object): +class CibConfiguration(object): + """ + Base class to load current cib configuration + """ + def __init__(self): + self.current_cib = None + + def _init_cib(self): + self.current_cib = cibdump2elem() + if self.current_cib is None: + raise ValueError("Cannot dump cib configuration") + + +class NodeState(CibConfiguration): + """ + Class to get specific node state + """ + def list_nodes(self, include_remote_nodes=True): + """ + List current nodes in cib + """ + if self.current_cib is None: + self._init_cib() + local_nodes = self.current_cib.xpath('configuration/nodes/node/@uname') + if include_remote_nodes: + remote_nodes = self.current_cib.xpath('status/node_state[@remote_node="true"]/@uname') + else: + remote_nodes = [] + return list(set([n for n in local_nodes + remote_nodes if n])) + + def get_specific_node(self, uname): + """ + Get a node XML element given the uname. + """ + if self.current_cib is None: + self._init_cib() + if uname not in self.list_nodes(): + raise ValueError("Node \"{}\" not exist".format(uname)) + return self.current_cib.xpath("configuration//*[@uname=\"{}\"]".format(uname))[0] + + def is_node_in_maintenance(self, uname): + """ + Check if a node is in maintenance + """ + node_entry = self.get_specific_node(uname) + attr_entry = get_child_nvset_node(node_entry, attr_set="instance_attributes") + if attr_entry is None: + return False + attr = get_attr_value(attr_entry, "maintenance") + return is_xs_boolean_true(attr) if attr else False + + def are_all_nodes_in_maintenance(self): + """ + Check if all nodes are in maintenance + """ + return all([self.is_node_in_maintenance(node) for node in self.list_nodes()]) + + def is_node_in_maintenance_for_the_running_resource(self, rsc_id): + """ + Check if node running this resource is in maintenance + """ + return any([self.is_node_in_maintenance(node) for node in running_on(rsc_id)]) + + +class RscState(CibConfiguration): ''' Get the resource status and some other relevant bits. In particular, this class should allow for a bit of caching @@ -184,17 +248,17 @@ class RscState(object): rsc_status = "crm_resource -W -r '%s'" def __init__(self): - self.current_cib = None + super(self.__class__, self).__init__() self.rsc_elem = None self.prop_elem = None self.rsc_dflt_elem = None - def _init_cib(self): - cib = cibdump2elem("configuration") - self.current_cib = cib - self.rsc_elem = get_first_conf_elem(cib, "resources") - self.prop_elem = get_first_conf_elem(cib, "crm_config/cluster_property_set") - self.rsc_dflt_elem = get_first_conf_elem(cib, "rsc_defaults/meta_attributes") + def _load_cib(self): + if self.current_cib is None: + self._init_cib() + self.rsc_elem = get_first_conf_elem(self.current_cib, "resources") + self.prop_elem = get_first_conf_elem(self.current_cib, "crm_config/cluster_property_set") + self.rsc_dflt_elem = get_first_conf_elem(self.current_cib, "rsc_defaults/meta_attributes") def rsc2node(self, ident): ''' @@ -204,23 +268,21 @@ def rsc2node(self, ident): expensive. ''' if self.rsc_elem is None: - self._init_cib() + self._load_cib() if self.rsc_elem is None: - return None + raise ValueError("Failed to load resources cib") # does this need to be optimized? expr = './/*[@id="%s"]' % ident try: return self.rsc_elem.xpath(expr)[0] except (IndexError, AttributeError): - return None + raise ValueError("Cannot find resource \"{}\"".format(ident)) def is_ms(self, ident): ''' Test if the resource is master-slave. ''' rsc_node = self.rsc2node(ident) - if rsc_node is None: - return False return is_ms(rsc_node) def rsc_clone(self, ident): @@ -229,8 +291,6 @@ def rsc_clone(self, ident): or None if it's not cloned. ''' rsc_node = self.rsc2node(ident) - if rsc_node is None: - return None pnode = rsc_node.getparent() if pnode is None: return None @@ -243,28 +303,33 @@ def rsc_clone(self, ident): def is_managed(self, ident): ''' Is this resource managed? + Return (boolean, reason) ''' rsc_node = self.rsc2node(ident) - if rsc_node is None: - return False # maintenance-mode, if true, overrides all attr = get_attr_value(self.prop_elem, "maintenance-mode") if attr and is_xs_boolean_true(attr): - return False - # then check the rsc is-managed meta attribute + return False, "cluster property maintenance-mode is true" + # then check if all nodes are in maintenance + if NodeState().are_all_nodes_in_maintenance(): + return False, "all nodes are in maintenance" + # then check if node running this resource is in maintenance + if NodeState().is_node_in_maintenance_for_the_running_resource(ident): + return False, "node which running \"{}\" is in maintenance".format(ident) rsc_meta_node = get_rsc_meta_node(rsc_node) + # then check the rsc maintenance meta attribute + attr = get_attr_value(rsc_meta_node, "maintenance") + if attr and is_xs_boolean_true(attr): + return False, "resource \"{}\" is in maintenance".format(ident) + # then check the rsc is-managed meta attribute attr = get_attr_value(rsc_meta_node, "is-managed") - if attr: - return is_xs_boolean_true(attr) + if attr and not is_xs_boolean_true(attr): + return False, "resource \"{}\" meta_attributes is-managed is false".format(ident) # then rsc_defaults is-managed attribute attr = get_attr_value(self.rsc_dflt_elem, "is-managed") - if attr: - return is_xs_boolean_true(attr) - # finally the is-managed-default property - attr = get_attr_value(self.prop_elem, "is-managed-default") - if attr: - return is_xs_boolean_true(attr) - return True + if attr and not is_xs_boolean_true(attr): + return False, "resource defaults meta_attributes is-managed is false" + return True, None def is_running(self, ident): ''' @@ -281,8 +346,6 @@ def is_group(self, ident): Test if the resource is a group ''' rsc_node = self.rsc2node(ident) - if rsc_node is None: - return False return is_group(rsc_node) def can_delete(self, ident): @@ -290,7 +353,7 @@ def can_delete(self, ident): Can a resource be deleted? The order below is important! ''' - return not (self.is_running(ident) and not self.is_group(ident) and self.is_managed(ident)) + return not (self.is_running(ident) and not self.is_group(ident) and self.is_managed(ident)[0]) def resources_xml(): @@ -334,18 +397,6 @@ def mk_rsc_type(n): return ''.join((s1, s2, ra_type)) -def listnodes(include_remote_nodes=True): - cib = cibdump2elem() - if cib is None: - return [] - local_nodes = cib.xpath('/cib/configuration/nodes/node/@uname') - if include_remote_nodes: - remote_nodes = cib.xpath('/cib/status/node_state[@remote_node="true"]/@uname') - else: - remote_nodes = [] - return list(set([n for n in local_nodes + remote_nodes if n])) - - def is_our_node(s): ''' Check if s is in a list of our nodes (ignore case). @@ -353,7 +404,7 @@ def is_our_node(s): Includes remote nodes as well ''' - for n in listnodes(): + for n in NodeState().list_nodes(): if n.lower() == s.lower(): return True return False diff --git a/data-manifest b/data-manifest index 61eeb85d0a..5203977309 100644 --- a/data-manifest +++ b/data-manifest @@ -75,6 +75,7 @@ test/features/environment.py test/features/geo_setup.feature test/features/hb_report_bugs.feature test/features/ocfs2.feature +test/features/operation_maintenance.feature test/features/qdevice_options.feature test/features/qdevice_setup_remove.feature test/features/qdevice_usercase.feature @@ -194,8 +195,8 @@ test/unittests/test_objset.py test/unittests/test_ocfs2.py test/unittests/test_parallax.py test/unittests/test_parse.py -test/unittests/test_ratrace.py test/unittests/test_qdevice.py +test/unittests/test_ratrace.py test/unittests/test_report.py test/unittests/test_sbd.py test/unittests/test_scripts.py diff --git a/test/features/operation_maintenance.feature b/test/features/operation_maintenance.feature new file mode 100644 index 0000000000..b8c42f1de2 --- /dev/null +++ b/test/features/operation_maintenance.feature @@ -0,0 +1,57 @@ +@operation +Feature: Test cluster/node/resources maintenance + + Tag @clean means need to stop cluster service if the service is available + + Background: Setup one node cluster and configure some resources + Given Cluster service is "stopped" on "hanode1" + Given Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + When Run "crm configure primitive d Dummy op monitor interval=3s" on "hanode1" + Then Resource "d" type "Dummy" is "Started" + + @clean + Scenario: Give error when start/stop resources while cluster in maintenance + When Run "crm maintenance on" on "hanode1" + And Try "crm resource stop d" on "hanode1" + Then Except "ERROR: resource.stop: Resource d is unmanaged" in stderr + Then Resource "d" type "Dummy" is "Started" + When Run "crm maintenance off" on "hanode1" + When Run "crm resource stop d" on "hanode1" + Then Resource "d" type "Dummy" is "Stopped" + + @clean + Scenario: Give error when start/stop resources while all nodes in maintenance + When Run "crm node maintenance hanode1" on "hanode1" + When Run "crm node maintenance hanode2" on "hanode2" + And Try "crm resource stop d" on "hanode1" + Then Except "ERROR: resource.stop: Resource d is unmanaged" in stderr + Then Resource "d" type "Dummy" is "Started" + When Run "crm node ready hanode1" on "hanode1" + When Run "crm node ready hanode2" on "hanode2" + When Run "crm resource stop d" on "hanode1" + Then Resource "d" type "Dummy" is "Stopped" + + @clean + Scenario: Give error when start/stop resources while node running this RA in maintenance + When Run "crm configure location loc1 d 100: hanode1" on "hanode1" + And Run "crm node maintenance hanode1" on "hanode1" + And Try "crm resource stop d" on "hanode1" + Then Except "ERROR: resource.stop: Resource d is unmanaged" in stderr + Then Resource "d" type "Dummy" is "Started" + When Run "crm node ready hanode1" on "hanode1" + When Run "crm resource stop d" on "hanode1" + Then Resource "d" type "Dummy" is "Stopped" + + @clean + Scenario: Give error when start/stop resources while this RA in maintenance + When Run "crm resource maintenance d on" on "hanode1" + And Try "crm resource stop d" on "hanode1" + Then Except "ERROR: resource.stop: Resource d is unmanaged" in stderr + Then Resource "d" type "Dummy" is "Started" + When Run "crm resource maintenance d off" on "hanode1" + When Run "crm resource stop d" on "hanode1" + Then Resource "d" type "Dummy" is "Stopped" diff --git a/test/run-in-travis.sh b/test/run-in-travis.sh index 849ae40e6f..352435b100 100755 --- a/test/run-in-travis.sh +++ b/test/run-in-travis.sh @@ -27,7 +27,7 @@ case "$1" in configure make_install exit $?;; - bootstrap|qdevice|hb_report|resource|geo|configure|constraints|ocfs2) + bootstrap|qdevice|hb_report|resource|geo|configure|constraints|ocfs2|operation) functional_tests $1 $2 exit $?;; *|original) diff --git a/test/unittests/test_bootstrap.py b/test/unittests/test_bootstrap.py index 610b25d9cd..2d83366553 100644 --- a/test/unittests/test_bootstrap.py +++ b/test/unittests/test_bootstrap.py @@ -1249,7 +1249,7 @@ def test_bootstrap_remove_self(self, mock_context, mock_init, mock_active, mock_error.assert_not_called() mock_self.assert_called_once_with() - @mock.patch('crmsh.xmlutil.listnodes') + @mock.patch('crmsh.xmlutil.NodeState.list_nodes') @mock.patch('crmsh.utils.this_node') @mock.patch('crmsh.bootstrap.confirm') @mock.patch('crmsh.bootstrap.get_cluster_node_hostname') @@ -1280,7 +1280,7 @@ def test_bootstrap_remove_not_in_cluster(self, mock_context, mock_init, mock_act mock_error.assert_called_once_with("Specified node node2 is not configured in cluster! Unable to remove.") @mock.patch('crmsh.bootstrap.remove_node_from_cluster') - @mock.patch('crmsh.xmlutil.listnodes') + @mock.patch('crmsh.xmlutil.NodeState.list_nodes') @mock.patch('crmsh.utils.this_node') @mock.patch('crmsh.bootstrap.confirm') @mock.patch('crmsh.bootstrap.get_cluster_node_hostname') @@ -1312,7 +1312,7 @@ def test_bootstrap_remove(self, mock_context, mock_init, mock_active, @mock.patch('crmsh.bootstrap.error') @mock.patch('crmsh.utils.ext_cmd_nosudo') - @mock.patch('crmsh.xmlutil.listnodes') + @mock.patch('crmsh.xmlutil.NodeState.list_nodes') def test_remove_self_other_nodes(self, mock_list, mock_ext, mock_error): mock_list.return_value = ["node1", "node2"] mock_ext.return_value = 1 @@ -1329,7 +1329,7 @@ def test_remove_self_other_nodes(self, mock_list, mock_ext, mock_error): @mock.patch('crmsh.bootstrap.error') @mock.patch('crmsh.bootstrap.invokerc') @mock.patch('crmsh.bootstrap.stop_services') - @mock.patch('crmsh.xmlutil.listnodes') + @mock.patch('crmsh.xmlutil.NodeState.list_nodes') def test_remove_self_rm_failed(self, mock_list, mock_stop_service, mock_invoke, mock_error): mock_list.return_value = ["node1"] mock_invoke.return_value = False