diff --git a/control-plane/agents/src/bin/core/controller/scheduling/mod.rs b/control-plane/agents/src/bin/core/controller/scheduling/mod.rs index 147e0e475..1c645222e 100644 --- a/control-plane/agents/src/bin/core/controller/scheduling/mod.rs +++ b/control-plane/agents/src/bin/core/controller/scheduling/mod.rs @@ -163,6 +163,21 @@ impl ChildSorters { match Self::sort_by_health(a, b) { Ordering::Equal => match Self::sort_by_child(a, b) { Ordering::Equal => { + // remove mismatched topology replicas first + if let (Some(a), Some(b)) = (a.valid_node_topology(), b.valid_node_topology()) { + match a.cmp(b) { + Ordering::Equal => {} + // todo: what if the pool and node topology are at odds with each other? + _else => return _else, + } + } + if let (Some(a), Some(b)) = (a.valid_pool_topology(), b.valid_pool_topology()) { + match a.cmp(b) { + Ordering::Equal => {} + _else => return _else, + } + } + let childa_is_local = !a.spec().share.shared(); let childb_is_local = !b.spec().share.shared(); if childa_is_local == childb_is_local { diff --git a/control-plane/agents/src/bin/core/controller/scheduling/resources/mod.rs b/control-plane/agents/src/bin/core/controller/scheduling/resources/mod.rs index 6193d57c6..e187c0bef 100644 --- a/control-plane/agents/src/bin/core/controller/scheduling/resources/mod.rs +++ b/control-plane/agents/src/bin/core/controller/scheduling/resources/mod.rs @@ -176,6 +176,8 @@ pub(crate) struct ReplicaItem { /// a Affinity Group and the already created volumes have replicas /// on this pool. ag_replicas_on_pool: Option, + valid_pool_topology: Option, + valid_node_topology: Option, } impl ReplicaItem { @@ -197,8 +199,32 @@ impl ReplicaItem { child_spec, child_info, ag_replicas_on_pool, + valid_pool_topology: None, + valid_node_topology: None, } } + /// Set the validity of the replica's node topology. + /// If set to false, this means the replica is not in sync with the volume topology and + /// therefore should be replaced by another replica which is in sync. + pub(crate) fn with_node_topology(mut self, valid_node_topology: Option) -> ReplicaItem { + self.valid_node_topology = valid_node_topology; + self + } + /// Set the validity of the replica's pool topology. + /// If set to false, this means the replica is not in sync with the volume topology and + /// therefore should be replaced by another replica which is in sync. + pub(crate) fn with_pool_topology(mut self, valid_pool_topology: Option) -> ReplicaItem { + self.valid_pool_topology = valid_pool_topology; + self + } + /// Get a reference to the node topology validity flag. + pub(crate) fn valid_node_topology(&self) -> &Option { + &self.valid_node_topology + } + /// Get a reference to the pool topology validity flag. + pub(crate) fn valid_pool_topology(&self) -> &Option { + &self.valid_pool_topology + } /// Get a reference to the replica spec. pub(crate) fn spec(&self) -> &ReplicaSpec { &self.replica_spec diff --git a/control-plane/agents/src/bin/core/controller/scheduling/volume.rs b/control-plane/agents/src/bin/core/controller/scheduling/volume.rs index e3a1ef513..09d3d499a 100644 --- a/control-plane/agents/src/bin/core/controller/scheduling/volume.rs +++ b/control-plane/agents/src/bin/core/controller/scheduling/volume.rs @@ -5,13 +5,12 @@ use crate::controller::{ affinity_group::{get_pool_ag_replica_count, get_restricted_nodes}, pool::replica_rebuildable, resources::{ChildItem, PoolItem, PoolItemLister, ReplicaItem}, - volume_policy::{SimplePolicy, ThickPolicy}, + volume_policy::{node::NodeFilters, SimplePolicy, ThickPolicy}, AddReplicaFilters, AddReplicaSorters, ChildSorters, ResourceData, ResourceFilter, }, wrapper::PoolWrapper, }; use agents::errors::SvcError; -use std::{collections::HashMap, ops::Deref}; use stor_port::types::v0::{ store::{ nexus::NexusSpec, nexus_persistence::NexusInfo, snapshots::replica::ReplicaSnapshot, @@ -20,6 +19,8 @@ use stor_port::types::v0::{ transport::{NodeId, PoolId, VolumeState}, }; +use std::{collections::HashMap, ops::Deref}; + /// Move replica to another pool. #[derive(Default, Clone)] pub(crate) struct MoveReplica { @@ -343,6 +344,21 @@ impl GetChildForRemovalContext { }), ag_rep_count, ) + .with_node_topology({ + Some(NodeFilters::topology_replica( + &self.spec, + &self.registry, + replica_spec.pool_name(), + )) + }) + .with_pool_topology({ + use crate::controller::scheduling::volume_policy::pool::PoolBaseFilters; + Some(PoolBaseFilters::topology_( + &self.spec, + &self.registry, + replica_spec.pool_name(), + )) + }) }) .collect::>() } diff --git a/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/node.rs b/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/node.rs index 857e85396..c076f2bc5 100644 --- a/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/node.rs +++ b/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/node.rs @@ -1,11 +1,18 @@ -use crate::controller::scheduling::{ - nexus::GetSuitableNodesContext, - resources::{NodeItem, PoolItem}, - volume::GetSuitablePoolsContext, - volume_policy::qualifies_label_criteria, +use crate::controller::{ + registry::Registry, + scheduling::{ + nexus::GetSuitableNodesContext, + resources::{NodeItem, PoolItem}, + volume::GetSuitablePoolsContext, + volume_policy::qualifies_label_criteria, + }, }; +use stor_port::types::v0::{ + store::volume::VolumeSpec, + transport::{NodeId, NodeTopology, PoolId}, +}; + use std::collections::HashMap; -use stor_port::types::v0::transport::NodeTopology; /// Filter nodes used for replica creation. pub(crate) struct NodeFilters {} @@ -72,9 +79,24 @@ impl NodeFilters { } /// Should only attempt to use nodes having specific creation label if topology has it. pub(crate) fn topology(request: &GetSuitablePoolsContext, item: &PoolItem) -> bool { + Self::topology_(request, request.registry(), &item.pool.node) + } + /// Should only attempt to use nodes having specific creation label if topology has it. + pub(crate) fn topology_replica( + volume: &VolumeSpec, + registry: &Registry, + pool_id: &PoolId, + ) -> bool { + let Ok(pool) = registry.specs().pool(pool_id) else { + return false; + }; + Self::topology_(volume, registry, &pool.node) + } + /// Should only attempt to use nodes having specific creation label if topology has it. + pub(crate) fn topology_(volume: &VolumeSpec, registry: &Registry, node_id: &NodeId) -> bool { let volume_node_topology_inclusion_labels: HashMap; let volume_node_topology_exclusion_labels: HashMap; - match &request.topology { + match &volume.topology { None => return true, Some(topology) => match &topology.node { None => return true, @@ -99,7 +121,7 @@ impl NodeFilters { }; // We will reach this part of code only if the volume has inclusion/exclusion labels. - match request.registry().specs().node(&item.pool.node) { + match registry.specs().node(node_id) { Ok(spec) => { qualifies_label_criteria(volume_node_topology_inclusion_labels, spec.labels()) && qualifies_label_criteria( diff --git a/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/pool.rs b/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/pool.rs index 6dd80ef93..467062b4b 100644 --- a/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/pool.rs +++ b/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/pool.rs @@ -1,10 +1,17 @@ -use crate::controller::scheduling::{ - resources::{ChildItem, PoolItem}, - volume::{GetSuitablePoolsContext, ReplicaResizePoolsContext}, - volume_policy::qualifies_label_criteria, +use crate::controller::{ + registry::Registry, + scheduling::{ + resources::{ChildItem, PoolItem}, + volume::{GetSuitablePoolsContext, ReplicaResizePoolsContext}, + volume_policy::qualifies_label_criteria, + }, }; +use stor_port::types::v0::{ + store::volume::VolumeSpec, + transport::{PoolId, PoolStatus, PoolTopology}, +}; + use std::collections::HashMap; -use stor_port::types::v0::transport::{PoolStatus, PoolTopology}; /// Filter pools used for replica creation. pub(crate) struct PoolBaseFilters {} @@ -77,27 +84,32 @@ impl PoolBaseFilters { /// Should only attempt to use pools having specific creation label if topology has it. pub(crate) fn topology(request: &GetSuitablePoolsContext, item: &PoolItem) -> bool { + Self::topology_(request, request.registry(), &item.pool.id) + } + + /// Should only attempt to use pools having specific creation label if topology has it. + pub(crate) fn topology_(volume: &VolumeSpec, registry: &Registry, pool_id: &PoolId) -> bool { let volume_pool_topology_inclusion_labels: HashMap; - match request.topology.clone() { + match &volume.topology { None => return true, - Some(topology) => match topology.pool { + Some(topology) => match &topology.pool { None => return true, - Some(pool_topology) => match pool_topology { + Some(pool_topology) => match &pool_topology { PoolTopology::Labelled(labelled_topology) => { // The labels in Volume Pool Topology should match the pool labels if // present, otherwise selection of any pool is allowed. - if !labelled_topology.inclusion.is_empty() { - volume_pool_topology_inclusion_labels = labelled_topology.inclusion - } else { + if labelled_topology.inclusion.is_empty() { + // todo: missing exclusion check? return true; } + volume_pool_topology_inclusion_labels = labelled_topology.inclusion.clone() } }, }, }; // We will reach this part of code only if the volume has inclusion/exclusion labels. - match request.registry().specs().pool(&item.pool.id) { + match registry.specs().pool(pool_id) { Ok(spec) => match spec.labels { None => false, Some(pool_labels) => {