Skip to content

Commit

Permalink
fix(topology): use topology when decreasing replicas
Browse files Browse the repository at this point in the history
When removing volume replicas, take the topology into account and prefer to
remove replicas which don't line up with the volume topology.
This can happen when a volume's topology has been modified.

Signed-off-by: Tiago Castro <[email protected]>
  • Loading branch information
tiagolobocastro committed Jan 6, 2025
1 parent 4104acf commit 1e414c7
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 22 deletions.
15 changes: 15 additions & 0 deletions control-plane/agents/src/bin/core/controller/scheduling/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,21 @@ impl ChildSorters {
match Self::sort_by_health(a, b) {
Ordering::Equal => match Self::sort_by_child(a, b) {
Ordering::Equal => {
// remove mismatched topology replicas first
if let (Some(a), Some(b)) = (a.valid_node_topology(), b.valid_node_topology()) {
match a.cmp(b) {
Ordering::Equal => {}
// todo: what if the pool and node topology are at odds with each other?
_else => return _else,
}
}
if let (Some(a), Some(b)) = (a.valid_pool_topology(), b.valid_pool_topology()) {
match a.cmp(b) {
Ordering::Equal => {}
_else => return _else,
}
}

let childa_is_local = !a.spec().share.shared();
let childb_is_local = !b.spec().share.shared();
if childa_is_local == childb_is_local {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,8 @@ pub(crate) struct ReplicaItem {
/// a Affinity Group and the already created volumes have replicas
/// on this pool.
ag_replicas_on_pool: Option<u64>,
valid_pool_topology: Option<bool>,
valid_node_topology: Option<bool>,
}

impl ReplicaItem {
Expand All @@ -197,8 +199,32 @@ impl ReplicaItem {
child_spec,
child_info,
ag_replicas_on_pool,
valid_pool_topology: None,
valid_node_topology: None,
}
}
/// Set the validity of the replica's node topology.
/// If set to false, this means the replica is not in sync with the volume topology and
/// therefore should be replaced by another replica which is in sync.
pub(crate) fn with_node_topology(mut self, valid_node_topology: Option<bool>) -> ReplicaItem {
self.valid_node_topology = valid_node_topology;
self
}
/// Set the validity of the replica's pool topology.
/// If set to false, this means the replica is not in sync with the volume topology and
/// therefore should be replaced by another replica which is in sync.
pub(crate) fn with_pool_topology(mut self, valid_pool_topology: Option<bool>) -> ReplicaItem {
self.valid_pool_topology = valid_pool_topology;
self
}
/// Get a reference to the node topology validity flag.
pub(crate) fn valid_node_topology(&self) -> &Option<bool> {
&self.valid_node_topology
}
/// Get a reference to the pool topology validity flag.
pub(crate) fn valid_pool_topology(&self) -> &Option<bool> {
&self.valid_pool_topology
}
/// Get a reference to the replica spec.
pub(crate) fn spec(&self) -> &ReplicaSpec {
&self.replica_spec
Expand Down
20 changes: 18 additions & 2 deletions control-plane/agents/src/bin/core/controller/scheduling/volume.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,12 @@ use crate::controller::{
affinity_group::{get_pool_ag_replica_count, get_restricted_nodes},
pool::replica_rebuildable,
resources::{ChildItem, PoolItem, PoolItemLister, ReplicaItem},
volume_policy::{SimplePolicy, ThickPolicy},
volume_policy::{node::NodeFilters, SimplePolicy, ThickPolicy},
AddReplicaFilters, AddReplicaSorters, ChildSorters, ResourceData, ResourceFilter,
},
wrapper::PoolWrapper,
};
use agents::errors::SvcError;
use std::{collections::HashMap, ops::Deref};
use stor_port::types::v0::{
store::{
nexus::NexusSpec, nexus_persistence::NexusInfo, snapshots::replica::ReplicaSnapshot,
Expand All @@ -20,6 +19,8 @@ use stor_port::types::v0::{
transport::{NodeId, PoolId, VolumeState},
};

use std::{collections::HashMap, ops::Deref};

/// Move replica to another pool.
#[derive(Default, Clone)]
pub(crate) struct MoveReplica {
Expand Down Expand Up @@ -343,6 +344,21 @@ impl GetChildForRemovalContext {
}),
ag_rep_count,
)
.with_node_topology({
Some(NodeFilters::topology_replica(
&self.spec,
&self.registry,
replica_spec.pool_name(),
))
})
.with_pool_topology({
use crate::controller::scheduling::volume_policy::pool::PoolBaseFilters;
Some(PoolBaseFilters::topology_(
&self.spec,
&self.registry,
replica_spec.pool_name(),
))
})
})
.collect::<Vec<_>>()
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
use crate::controller::scheduling::{
nexus::GetSuitableNodesContext,
resources::{NodeItem, PoolItem},
volume::GetSuitablePoolsContext,
volume_policy::qualifies_label_criteria,
use crate::controller::{
registry::Registry,
scheduling::{
nexus::GetSuitableNodesContext,
resources::{NodeItem, PoolItem},
volume::GetSuitablePoolsContext,
volume_policy::qualifies_label_criteria,
},
};
use stor_port::types::v0::{
store::volume::VolumeSpec,
transport::{NodeId, NodeTopology, PoolId},
};

use std::collections::HashMap;
use stor_port::types::v0::transport::NodeTopology;

/// Filter nodes used for replica creation.
pub(crate) struct NodeFilters {}
Expand Down Expand Up @@ -72,9 +79,24 @@ impl NodeFilters {
}
/// Should only attempt to use nodes having specific creation label if topology has it.
pub(crate) fn topology(request: &GetSuitablePoolsContext, item: &PoolItem) -> bool {
Self::topology_(request, request.registry(), &item.pool.node)
}
/// Should only attempt to use nodes having specific creation label if topology has it.
pub(crate) fn topology_replica(
volume: &VolumeSpec,
registry: &Registry,
pool_id: &PoolId,
) -> bool {
let Ok(pool) = registry.specs().pool(pool_id) else {
return false;
};
Self::topology_(volume, registry, &pool.node)
}
/// Should only attempt to use nodes having specific creation label if topology has it.
pub(crate) fn topology_(volume: &VolumeSpec, registry: &Registry, node_id: &NodeId) -> bool {
let volume_node_topology_inclusion_labels: HashMap<String, String>;
let volume_node_topology_exclusion_labels: HashMap<String, String>;
match &request.topology {
match &volume.topology {
None => return true,
Some(topology) => match &topology.node {
None => return true,
Expand All @@ -99,7 +121,7 @@ impl NodeFilters {
};

// We will reach this part of code only if the volume has inclusion/exclusion labels.
match request.registry().specs().node(&item.pool.node) {
match registry.specs().node(node_id) {
Ok(spec) => {
qualifies_label_criteria(volume_node_topology_inclusion_labels, spec.labels())
&& qualifies_label_criteria(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
use crate::controller::scheduling::{
resources::{ChildItem, PoolItem},
volume::{GetSuitablePoolsContext, ReplicaResizePoolsContext},
volume_policy::qualifies_label_criteria,
use crate::controller::{
registry::Registry,
scheduling::{
resources::{ChildItem, PoolItem},
volume::{GetSuitablePoolsContext, ReplicaResizePoolsContext},
volume_policy::qualifies_label_criteria,
},
};
use stor_port::types::v0::{
store::volume::VolumeSpec,
transport::{PoolId, PoolStatus, PoolTopology},
};

use std::collections::HashMap;
use stor_port::types::v0::transport::{PoolStatus, PoolTopology};

/// Filter pools used for replica creation.
pub(crate) struct PoolBaseFilters {}
Expand Down Expand Up @@ -77,27 +84,32 @@ impl PoolBaseFilters {

/// Should only attempt to use pools having specific creation label if topology has it.
pub(crate) fn topology(request: &GetSuitablePoolsContext, item: &PoolItem) -> bool {
Self::topology_(request, request.registry(), &item.pool.id)
}

/// Should only attempt to use pools having specific creation label if topology has it.
pub(crate) fn topology_(volume: &VolumeSpec, registry: &Registry, pool_id: &PoolId) -> bool {
let volume_pool_topology_inclusion_labels: HashMap<String, String>;
match request.topology.clone() {
match &volume.topology {
None => return true,
Some(topology) => match topology.pool {
Some(topology) => match &topology.pool {
None => return true,
Some(pool_topology) => match pool_topology {
Some(pool_topology) => match &pool_topology {
PoolTopology::Labelled(labelled_topology) => {
// The labels in Volume Pool Topology should match the pool labels if
// present, otherwise selection of any pool is allowed.
if !labelled_topology.inclusion.is_empty() {
volume_pool_topology_inclusion_labels = labelled_topology.inclusion
} else {
if labelled_topology.inclusion.is_empty() {
// todo: missing exclusion check?
return true;
}
volume_pool_topology_inclusion_labels = labelled_topology.inclusion.clone()
}
},
},
};

// We will reach this part of code only if the volume has inclusion/exclusion labels.
match request.registry().specs().pool(&item.pool.id) {
match registry.specs().pool(pool_id) {
Ok(spec) => match spec.labels {
None => false,
Some(pool_labels) => {
Expand Down

0 comments on commit 1e414c7

Please sign in to comment.