From 1e414c75b5b5fd42457c0df8c12518426066b78e Mon Sep 17 00:00:00 2001 From: Tiago Castro Date: Tue, 17 Dec 2024 15:56:36 +0000 Subject: [PATCH] fix(topology): use topology when decreasing replicas When removing volume replicas, take the topology into account and prefer to remove replicas which don't line up with the volume topology. This can happen when a volume's topology has been modified. Signed-off-by: Tiago Castro --- .../src/bin/core/controller/scheduling/mod.rs | 15 ++++++++ .../controller/scheduling/resources/mod.rs | 26 +++++++++++++ .../bin/core/controller/scheduling/volume.rs | 20 +++++++++- .../scheduling/volume_policy/node.rs | 38 +++++++++++++++---- .../scheduling/volume_policy/pool.rs | 36 ++++++++++++------ 5 files changed, 113 insertions(+), 22 deletions(-) diff --git a/control-plane/agents/src/bin/core/controller/scheduling/mod.rs b/control-plane/agents/src/bin/core/controller/scheduling/mod.rs index 147e0e475..1c645222e 100644 --- a/control-plane/agents/src/bin/core/controller/scheduling/mod.rs +++ b/control-plane/agents/src/bin/core/controller/scheduling/mod.rs @@ -163,6 +163,21 @@ impl ChildSorters { match Self::sort_by_health(a, b) { Ordering::Equal => match Self::sort_by_child(a, b) { Ordering::Equal => { + // remove mismatched topology replicas first + if let (Some(a), Some(b)) = (a.valid_node_topology(), b.valid_node_topology()) { + match a.cmp(b) { + Ordering::Equal => {} + // todo: what if the pool and node topology are at odds with each other? + _else => return _else, + } + } + if let (Some(a), Some(b)) = (a.valid_pool_topology(), b.valid_pool_topology()) { + match a.cmp(b) { + Ordering::Equal => {} + _else => return _else, + } + } + let childa_is_local = !a.spec().share.shared(); let childb_is_local = !b.spec().share.shared(); if childa_is_local == childb_is_local { diff --git a/control-plane/agents/src/bin/core/controller/scheduling/resources/mod.rs b/control-plane/agents/src/bin/core/controller/scheduling/resources/mod.rs index 6193d57c6..e187c0bef 100644 --- a/control-plane/agents/src/bin/core/controller/scheduling/resources/mod.rs +++ b/control-plane/agents/src/bin/core/controller/scheduling/resources/mod.rs @@ -176,6 +176,8 @@ pub(crate) struct ReplicaItem { /// a Affinity Group and the already created volumes have replicas /// on this pool. ag_replicas_on_pool: Option, + valid_pool_topology: Option, + valid_node_topology: Option, } impl ReplicaItem { @@ -197,8 +199,32 @@ impl ReplicaItem { child_spec, child_info, ag_replicas_on_pool, + valid_pool_topology: None, + valid_node_topology: None, } } + /// Set the validity of the replica's node topology. + /// If set to false, this means the replica is not in sync with the volume topology and + /// therefore should be replaced by another replica which is in sync. + pub(crate) fn with_node_topology(mut self, valid_node_topology: Option) -> ReplicaItem { + self.valid_node_topology = valid_node_topology; + self + } + /// Set the validity of the replica's pool topology. + /// If set to false, this means the replica is not in sync with the volume topology and + /// therefore should be replaced by another replica which is in sync. + pub(crate) fn with_pool_topology(mut self, valid_pool_topology: Option) -> ReplicaItem { + self.valid_pool_topology = valid_pool_topology; + self + } + /// Get a reference to the node topology validity flag. + pub(crate) fn valid_node_topology(&self) -> &Option { + &self.valid_node_topology + } + /// Get a reference to the pool topology validity flag. + pub(crate) fn valid_pool_topology(&self) -> &Option { + &self.valid_pool_topology + } /// Get a reference to the replica spec. pub(crate) fn spec(&self) -> &ReplicaSpec { &self.replica_spec diff --git a/control-plane/agents/src/bin/core/controller/scheduling/volume.rs b/control-plane/agents/src/bin/core/controller/scheduling/volume.rs index e3a1ef513..09d3d499a 100644 --- a/control-plane/agents/src/bin/core/controller/scheduling/volume.rs +++ b/control-plane/agents/src/bin/core/controller/scheduling/volume.rs @@ -5,13 +5,12 @@ use crate::controller::{ affinity_group::{get_pool_ag_replica_count, get_restricted_nodes}, pool::replica_rebuildable, resources::{ChildItem, PoolItem, PoolItemLister, ReplicaItem}, - volume_policy::{SimplePolicy, ThickPolicy}, + volume_policy::{node::NodeFilters, SimplePolicy, ThickPolicy}, AddReplicaFilters, AddReplicaSorters, ChildSorters, ResourceData, ResourceFilter, }, wrapper::PoolWrapper, }; use agents::errors::SvcError; -use std::{collections::HashMap, ops::Deref}; use stor_port::types::v0::{ store::{ nexus::NexusSpec, nexus_persistence::NexusInfo, snapshots::replica::ReplicaSnapshot, @@ -20,6 +19,8 @@ use stor_port::types::v0::{ transport::{NodeId, PoolId, VolumeState}, }; +use std::{collections::HashMap, ops::Deref}; + /// Move replica to another pool. #[derive(Default, Clone)] pub(crate) struct MoveReplica { @@ -343,6 +344,21 @@ impl GetChildForRemovalContext { }), ag_rep_count, ) + .with_node_topology({ + Some(NodeFilters::topology_replica( + &self.spec, + &self.registry, + replica_spec.pool_name(), + )) + }) + .with_pool_topology({ + use crate::controller::scheduling::volume_policy::pool::PoolBaseFilters; + Some(PoolBaseFilters::topology_( + &self.spec, + &self.registry, + replica_spec.pool_name(), + )) + }) }) .collect::>() } diff --git a/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/node.rs b/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/node.rs index 857e85396..c076f2bc5 100644 --- a/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/node.rs +++ b/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/node.rs @@ -1,11 +1,18 @@ -use crate::controller::scheduling::{ - nexus::GetSuitableNodesContext, - resources::{NodeItem, PoolItem}, - volume::GetSuitablePoolsContext, - volume_policy::qualifies_label_criteria, +use crate::controller::{ + registry::Registry, + scheduling::{ + nexus::GetSuitableNodesContext, + resources::{NodeItem, PoolItem}, + volume::GetSuitablePoolsContext, + volume_policy::qualifies_label_criteria, + }, }; +use stor_port::types::v0::{ + store::volume::VolumeSpec, + transport::{NodeId, NodeTopology, PoolId}, +}; + use std::collections::HashMap; -use stor_port::types::v0::transport::NodeTopology; /// Filter nodes used for replica creation. pub(crate) struct NodeFilters {} @@ -72,9 +79,24 @@ impl NodeFilters { } /// Should only attempt to use nodes having specific creation label if topology has it. pub(crate) fn topology(request: &GetSuitablePoolsContext, item: &PoolItem) -> bool { + Self::topology_(request, request.registry(), &item.pool.node) + } + /// Should only attempt to use nodes having specific creation label if topology has it. + pub(crate) fn topology_replica( + volume: &VolumeSpec, + registry: &Registry, + pool_id: &PoolId, + ) -> bool { + let Ok(pool) = registry.specs().pool(pool_id) else { + return false; + }; + Self::topology_(volume, registry, &pool.node) + } + /// Should only attempt to use nodes having specific creation label if topology has it. + pub(crate) fn topology_(volume: &VolumeSpec, registry: &Registry, node_id: &NodeId) -> bool { let volume_node_topology_inclusion_labels: HashMap; let volume_node_topology_exclusion_labels: HashMap; - match &request.topology { + match &volume.topology { None => return true, Some(topology) => match &topology.node { None => return true, @@ -99,7 +121,7 @@ impl NodeFilters { }; // We will reach this part of code only if the volume has inclusion/exclusion labels. - match request.registry().specs().node(&item.pool.node) { + match registry.specs().node(node_id) { Ok(spec) => { qualifies_label_criteria(volume_node_topology_inclusion_labels, spec.labels()) && qualifies_label_criteria( diff --git a/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/pool.rs b/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/pool.rs index 6dd80ef93..467062b4b 100644 --- a/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/pool.rs +++ b/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/pool.rs @@ -1,10 +1,17 @@ -use crate::controller::scheduling::{ - resources::{ChildItem, PoolItem}, - volume::{GetSuitablePoolsContext, ReplicaResizePoolsContext}, - volume_policy::qualifies_label_criteria, +use crate::controller::{ + registry::Registry, + scheduling::{ + resources::{ChildItem, PoolItem}, + volume::{GetSuitablePoolsContext, ReplicaResizePoolsContext}, + volume_policy::qualifies_label_criteria, + }, }; +use stor_port::types::v0::{ + store::volume::VolumeSpec, + transport::{PoolId, PoolStatus, PoolTopology}, +}; + use std::collections::HashMap; -use stor_port::types::v0::transport::{PoolStatus, PoolTopology}; /// Filter pools used for replica creation. pub(crate) struct PoolBaseFilters {} @@ -77,27 +84,32 @@ impl PoolBaseFilters { /// Should only attempt to use pools having specific creation label if topology has it. pub(crate) fn topology(request: &GetSuitablePoolsContext, item: &PoolItem) -> bool { + Self::topology_(request, request.registry(), &item.pool.id) + } + + /// Should only attempt to use pools having specific creation label if topology has it. + pub(crate) fn topology_(volume: &VolumeSpec, registry: &Registry, pool_id: &PoolId) -> bool { let volume_pool_topology_inclusion_labels: HashMap; - match request.topology.clone() { + match &volume.topology { None => return true, - Some(topology) => match topology.pool { + Some(topology) => match &topology.pool { None => return true, - Some(pool_topology) => match pool_topology { + Some(pool_topology) => match &pool_topology { PoolTopology::Labelled(labelled_topology) => { // The labels in Volume Pool Topology should match the pool labels if // present, otherwise selection of any pool is allowed. - if !labelled_topology.inclusion.is_empty() { - volume_pool_topology_inclusion_labels = labelled_topology.inclusion - } else { + if labelled_topology.inclusion.is_empty() { + // todo: missing exclusion check? return true; } + volume_pool_topology_inclusion_labels = labelled_topology.inclusion.clone() } }, }, }; // We will reach this part of code only if the volume has inclusion/exclusion labels. - match request.registry().specs().pool(&item.pool.id) { + match registry.specs().pool(pool_id) { Ok(spec) => match spec.labels { None => false, Some(pool_labels) => {