Skip to content

Commit

Permalink
fix(volume_reconciler): changes in replica disown logic
Browse files Browse the repository at this point in the history
Signed-off-by: Abhilash Shetty <[email protected]>
  • Loading branch information
abhilashshetty04 committed Jan 22, 2025
1 parent b613964 commit cab6ad9
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ async fn disown_unused_nexuses(
context: &PollContext,
) -> PollResult {
let mut results = vec![];

let vol_id = volume.uuid().clone();
for nexus in context.specs().volume_nexuses(volume.uuid()) {
if let Ok(mut nexus) = nexus.operation_guard() {
if let Some(target) = volume.as_ref().target() {
Expand All @@ -159,7 +159,9 @@ async fn disown_unused_nexuses(
}
}

nexus.warn_span(|| tracing::warn!("Attempting to disown unused nexus"));
nexus.info_span(|| {
tracing::info!("Attempting to disown unused nexus for vol : {}", vol_id)
});
// the nexus garbage collector will destroy the disowned nexuses
let owner = NexusOwners::Volume(volume.uuid().clone());
match nexus.remove_owners(context.registry(), &owner, false).await {
Expand Down Expand Up @@ -207,6 +209,11 @@ async fn disown_unused_replicas(
return PollResult::Ok(PollerState::Busy);
}

let nexus_id = volume
.as_ref()
.target_config
.as_ref()
.map(|target| target.uuid());
let mut nexus_info = None; // defer reading from the persistent store unless we find a candidate
let mut results = vec![];

Expand All @@ -218,15 +225,20 @@ async fn disown_unused_replicas(

let replica_in_target = target.as_ref().contains_replica(&replica.as_ref().uuid);
let replica_online = matches!(context.registry().replica(&replica.as_ref().uuid).await, Ok(state) if state.online());
let replica_dirty = replica.as_ref().dirty();
let replica_owned_by_volume = replica.as_ref().owners.owned_by(volume.uuid());
let is_replica_owned_by_volume_target = replica.as_ref().owned_by_nexus(nexus_id);
let is_replica_healthy =
is_replica_healthy(context, &mut nexus_info, replica.as_ref(), volume.as_ref()).await?;

if !replica_online
&& !replica.as_ref().dirty()
&& replica.as_ref().owners.owned_by(volume.uuid())
&& !replica.as_ref().owners.owned_by_a_nexus()
&& !replica_dirty
&& replica_owned_by_volume
&& !is_replica_owned_by_volume_target
&& !replica_in_target
&& !is_replica_healthy(context, &mut nexus_info, replica.as_ref(), volume.as_ref())
.await?
&& !is_replica_healthy
{
volume.warn_span(|| tracing::warn!(replica.uuid = %replica.as_ref().uuid, "Attempting to disown unused replica"));
volume.info_span(|| tracing::info!(replica.uuid = %replica.as_ref().uuid, "Attempting to disown unused replica"));

// the replica garbage collector will destroy the disowned replica
match replica
Expand Down Expand Up @@ -387,7 +399,7 @@ async fn disown_non_reservable_replica(
mut replica: OperationGuardArc<ReplicaSpec>,
context: &PollContext,
) -> PollResult {
volume.warn_span(|| tracing::warn!(replica.uuid = %replica.as_ref().uuid, "Attempting to disown non reservable replica"));
volume.info_span(|| tracing::info!(replica.uuid = %replica.as_ref().uuid, "Attempting to disown non reservable replica"));

// Since it's a local replica of a shutting down nexus i.e. a failed
// shutdown nexus we don't want to be a part of anything until such nexus is destroyed.
Expand Down
13 changes: 11 additions & 2 deletions control-plane/stor-port/src/types/v0/store/replica.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ use crate::{
AsOperationSequencer, OperationSequence, SpecStatus, SpecTransaction,
},
transport::{
self, CreateReplica, HostNqn, PoolId, PoolUuid, Protocol, ReplicaId, ReplicaKind,
ReplicaName, ReplicaOwners, ReplicaShareProtocol, SnapshotCloneSpecParams, VolumeId,
self, CreateReplica, HostNqn, NexusId, PoolId, PoolUuid, Protocol, ReplicaId,
ReplicaKind, ReplicaName, ReplicaOwners, ReplicaShareProtocol, SnapshotCloneSpecParams,
VolumeId,
},
},
IntoOption,
Expand Down Expand Up @@ -108,6 +109,14 @@ impl ReplicaSpec {
pub fn owned_by(&self, id: &VolumeId) -> bool {
self.owners.owned_by(id)
}
/// Check if this replica is owned by the nexus.
pub fn owned_by_nexus(&self, nexus_id: Option<&NexusId>) -> bool {
if let Some(nexus_id) = nexus_id {
self.owners.is_owned_by_nexus(nexus_id)
} else {
false
}
}
}

/// Reference of a pool.
Expand Down
4 changes: 4 additions & 0 deletions control-plane/stor-port/src/types/v0/transport/replica.rs
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,10 @@ impl ReplicaOwners {
self.nexuses.push(new.clone())
}
}
/// Checks if nexus is present on the nexus list.
pub fn is_owned_by_nexus(&self, nexus: &NexusId) -> bool {
self.nexuses.contains(nexus)
}
}

impl From<ReplicaOwners> for models::ReplicaSpecOwners {
Expand Down

0 comments on commit cab6ad9

Please sign in to comment.