Skip to content

Commit

Permalink
chore(bors): merge pull request #892
Browse files Browse the repository at this point in the history
892: fix(csi-node): handle devices for existing subsystems r=tiagolobocastro a=tiagolobocastro

When a device is broken but the volume is not unstaged properly, the subsytem ends up lingering in the system.
If the device is staged again, the device is assined a different namespace id. Change the regex which is used in the device parameter fo account for this. Also extend the existing csi-node BDD testing.

Co-authored-by: Tiago Castro <[email protected]>
  • Loading branch information
mayastor-bors and tiagolobocastro committed Dec 3, 2024
2 parents 00190e9 + d831ac0 commit 02aa5aa
Show file tree
Hide file tree
Showing 10 changed files with 258 additions and 36 deletions.
2 changes: 1 addition & 1 deletion control-plane/csi-driver/src/bin/node/dev.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ impl Device {
continue;
}

if let Some(devname) = match_dev::match_nvmf_device(&device, &nvmf_key) {
if let Some(devname) = match_dev::match_nvmf_device_valid(&device, &nvmf_key)? {
let nqn = if std::env::var("MOAC").is_ok() {
format!("{}:nexus-{uuid}", nvme_target_nqn_prefix())
} else {
Expand Down
96 changes: 75 additions & 21 deletions control-plane/csi-driver/src/bin/node/dev/nvmf.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
use nvmeadm::{
error::NvmeError,
nvmf_discovery::{disconnect, ConnectArgsBuilder, TrType},
};
use std::{
collections::HashMap,
convert::{From, TryFrom},
path::Path,
str::FromStr,
};

use nvmeadm::{
error::NvmeError,
nvmf_discovery::{disconnect, ConnectArgsBuilder, TrType},
};

use csi_driver::PublishParams;
use glob::glob;
use nvmeadm::nvmf_subsystem::Subsystem;
Expand All @@ -29,7 +28,7 @@ use crate::{
use super::{Attach, Detach, DeviceError, DeviceName};

lazy_static::lazy_static! {
static ref DEVICE_REGEX: Regex = Regex::new(r"nvme(\d{1,3})n1").unwrap();
static ref DEVICE_REGEX: Regex = Regex::new(r"nvme(\d{1,5})n(\d{1,5})").unwrap();
}

pub(super) struct NvmfAttach {
Expand All @@ -43,6 +42,7 @@ pub(super) struct NvmfAttach {
ctrl_loss_tmo: Option<u32>,
keep_alive_tmo: Option<u32>,
hostnqn: Option<String>,
warn_bad: std::sync::atomic::AtomicBool,
}

impl NvmfAttach {
Expand Down Expand Up @@ -70,6 +70,7 @@ impl NvmfAttach {
ctrl_loss_tmo,
keep_alive_tmo,
hostnqn,
warn_bad: std::sync::atomic::AtomicBool::new(true),
}
}

Expand All @@ -80,13 +81,20 @@ impl NvmfAttach {
enumerator.match_subsystem("block")?;
enumerator.match_property("DEVTYPE", "disk")?;

let mut first_error = Ok(None);
for device in enumerator.scan_devices()? {
if match_nvmf_device(&device, &key).is_some() {
return Ok(Some(device));
match match_device(&device, &key, &self.warn_bad) {
Ok(name) if name.is_some() => {
return Ok(Some(device));
}
Err(error) if first_error.is_ok() => {
first_error = Err(error);
}
_ => {}
}
}

Ok(None)
first_error
}
}

Expand Down Expand Up @@ -153,6 +161,12 @@ impl Attach for NvmfAttach {
if let Some(keep_alive_tmo) = nvme_config.keep_alive_tmo() {
self.keep_alive_tmo = Some(keep_alive_tmo);
}
if self.io_tmo.is_none() {
if let Some(io_tmo) = publish_context.io_timeout() {
self.io_tmo = Some(*io_tmo);
}
}

Ok(())
}

Expand Down Expand Up @@ -221,18 +235,16 @@ impl Attach for NvmfAttach {
.get_device()?
.ok_or_else(|| DeviceError::new("NVMe device not found"))?;
let dev_name = device.sysname().to_str().unwrap();
let major = DEVICE_REGEX
.captures(dev_name)
.ok_or_else(|| {
DeviceError::new(&format!(
"NVMe device \"{}\" does not match \"{}\"",
dev_name, *DEVICE_REGEX,
))
})?
.get(1)
.unwrap()
.as_str();
let pattern = format!("/sys/class/block/nvme{major}c*n1/queue");
let captures = DEVICE_REGEX.captures(dev_name).ok_or_else(|| {
DeviceError::new(&format!(
"NVMe device \"{}\" does not match \"{}\"",
dev_name, *DEVICE_REGEX,
))
})?;
let major = captures.get(1).unwrap().as_str();
let nid = captures.get(2).unwrap().as_str();

let pattern = format!("/sys/class/block/nvme{major}c*n{nid}/queue");
let glob = glob(&pattern).unwrap();
let result = glob
.into_iter()
Expand Down Expand Up @@ -302,6 +314,48 @@ impl Detach for NvmfDetach {
}
}

/// Get the sysfs block device queue path for the given udev::Device.
fn block_dev_q(device: &Device) -> Result<String, DeviceError> {
let dev_name = device.sysname().to_str().unwrap();
let captures = DEVICE_REGEX.captures(dev_name).ok_or_else(|| {
DeviceError::new(&format!(
"NVMe device \"{}\" does not match \"{}\"",
dev_name, *DEVICE_REGEX,
))
})?;
let major = captures.get(1).unwrap().as_str();
let nid = captures.get(2).unwrap().as_str();
Ok(format!("/sys/class/block/nvme{major}c*n{nid}/queue"))
}

/// Check if the given device is a valid NVMf device.
/// # NOTE
/// In older kernels when a device with an existing mount is lost, the nvmf controller
/// is lost, but the block device remains, in a broken state.
/// On newer kernels, the block device is also gone.
pub(crate) fn match_device<'a>(
device: &'a Device,
key: &str,
warn_bad: &std::sync::atomic::AtomicBool,
) -> Result<Option<&'a str>, DeviceError> {
let Some(devname) = match_nvmf_device(device, key) else {
return Ok(None);
};

let glob = glob(&block_dev_q(device)?).unwrap();
if !glob.into_iter().any(|glob_result| glob_result.is_ok()) {
if warn_bad.load(std::sync::atomic::Ordering::Relaxed) {
let name = device.sysname().to_string_lossy();
warn!("Block device {name} for volume {key} has no controller!");
// todo: shoot-down the stale mounts?
warn_bad.store(false, std::sync::atomic::Ordering::Relaxed);
}
return Ok(None);
}

Ok(Some(devname))
}

/// Check for the presence of nvme tcp kernel module.
pub(crate) fn check_nvme_tcp_module() -> Result<(), std::io::Error> {
let path = "/sys/module/nvme_tcp";
Expand Down
11 changes: 11 additions & 0 deletions control-plane/csi-driver/src/bin/node/match_dev.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//! Utility functions for matching a udev record against a known device type.
use crate::error::DeviceError;
use udev::Device;

macro_rules! require {
Expand Down Expand Up @@ -72,3 +73,13 @@ pub(super) fn match_nvmf_device<'a>(device: &'a Device, key: &str) -> Option<&'a

Some(devname)
}

/// Match the device, if it's a nvmef device, but only if it's a valid block device.
/// See [`super::dev::nvmf::match_device`].
pub(super) fn match_nvmf_device_valid<'a>(
device: &'a Device,
key: &str,
) -> Result<Option<&'a str>, DeviceError> {
let cell = std::sync::atomic::AtomicBool::new(true);
super::dev::nvmf::match_device(device, key, &cell)
}
2 changes: 1 addition & 1 deletion shell.nix
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ mkShell {
[ ! -z "${io-engine}" ] && cowsay "${io-engine-moth}"
[ ! -z "${io-engine}" ] && export IO_ENGINE_BIN="${io-engine-moth}"
export PATH="$PATH:$(pwd)/target/debug"
export SUDO=$(which sudo || echo /run/wrappers/bin/sudo)
export SUDO=$(which sudo 2>/dev/null || echo /run/wrappers/bin/sudo)
DOCKER_CONFIG=~/.docker/config.json
if [ -f "$DOCKER_CONFIG" ]; then
Expand Down
20 changes: 18 additions & 2 deletions tests/bdd/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,61 +2,77 @@

The BDD tests are written in Python and make use of the pytest-bdd library.

The feature files in the `features` directory define the behaviour expected of the control plane. These behaviours are described using the [Gherkin](https://cucumber.io/docs/gherkin/) syntax.
The feature files in the `features` directory define the behaviour expected of the control plane. These behaviours are
described using the [Gherkin](https://cucumber.io/docs/gherkin/) syntax.

The feature files can be used to auto-generate the test file. For example running `pytest-bdd generate replicas.feature > test_volume_replicas.py`
The feature files can be used to auto-generate the test file. For example
running `pytest-bdd generate replicas.feature > test_volume_replicas.py`
generates the `test_volume_replicas.py` test file from the `replicas.feature` file.
When updating the feature file, you can also get some helpe updating the python code.
Example: `pytest --generate-missing --feature node.feature test_node.py`

**:warning: Note: Running pytest-bdd generate will overwrite any existing files with the same name**

## Running the Tests by entering the python virtual environment

Before running any tests run the `setup.sh` script. This sets up the necessary environment to run the tests:

```bash
# NOTE: you should be inside the nix-shell to begin
source ./setup.sh
```

To run all the tests:

```bash
pytest .
```

To run individual test files:

```bash
pytest features/volume/create/test_feature.py
```

To run an individual test within a test file use the `-k` option followed by the test name:

```bash
pytest features/volume/create/test_feature.py -k test_sufficient_suitable_pools
```

## Running the Tests

The script in `../../scripts/python/test.sh` can be used to run the tests without entering the venv.
This script will implicitly enter and exit the venv during test execution.

To run all the tests:

```bash
../../scripts/python/test.sh
```

Arguments will be passed directly to pytest. Example running individual tests:

```bash
../../scripts/python/test.sh features/volume/create/test_feature.py -k test_sufficient_suitable_pools
```

## Debugging the Tests

Typically, the test code cleans up after itself and so it's impossible to debug the test cluster.
The environmental variable `CLEAN` can be set to `false` to skip tearing down the cluster when a test ends.
It should be paired with the pytest option `-x` or `--exitfirst` to exit when the first test fails otherwise the other
tests may end up tearing down the cluster anyway.

Example:

```bash
CLEAN=false ../../scripts/python/test.sh features/volume/create/test_feature.py -k test_sufficient_suitable_pools -x
```

The script `../../scripts/python/test.sh` does a lot of repetitive work of regenerating the auto-generated code.
This step can be skipped with the `FAST` environment variable to speed up the test cycle:

```bash
FAST=true ../../scripts/python/test.sh features/volume/create/test_feature.py -k test_sufficient_suitable_pools -x
```
5 changes: 5 additions & 0 deletions tests/bdd/common/deployer.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,11 @@ def node_name(id: int):
assert id >= 0
return f"io-engine-{id + 1}"

@staticmethod
def csi_node_name(id: int):
assert id >= 0
return f"csi-node-{id + 1}"

@staticmethod
def create_disks(len=1, size=100 * 1024 * 1024):
host_tmp = workspace_tmp()
Expand Down
6 changes: 6 additions & 0 deletions tests/bdd/common/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,12 @@ def unpause_container(name):
container = docker_client.containers.get(name)
container.unpause()

@staticmethod
def execute(name, commands):
docker_client = docker.from_env()
container = docker_client.containers.get(name)
return container.exec_run(commands)

# Restart a container with the given name.
def restart_container(name):
docker_client = docker.from_env()
Expand Down
22 changes: 18 additions & 4 deletions tests/bdd/common/nvme.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,21 +307,35 @@ def identify_namespace(device):

def nvme_set_reconnect_delay_all(nqn, delay=10):
for controller in nvme_find_controllers(nqn):
nvme_controller_set_reconnect_delay(controller, delay)
nvme_controller_set_param(controller, "reconnect_delay", delay)


def nvme_set_reconnect_delay(uri, delay=10):
controller = nvme_find_controller(uri)
nvme_controller_set_reconnect_delay(controller, delay)
nvme_controller_set_param(controller, "reconnect_delay", delay)


def nvme_controller_set_reconnect_delay(controller, delay=10):
def nvme_set_ctrl_loss_tmo(uri, ctrl_loss_tmo=10):
controller = nvme_find_controller(uri)
nvme_controller_set_param(controller, "ctrl_loss_tmo", ctrl_loss_tmo)


def nvme_controller_set_param(controller, name: str, value: int):
controller = controller.get("Controller")
command = f"echo {delay} | sudo tee -a /sys/class/nvme/{controller}/reconnect_delay"
command = f"echo {value} | sudo tee -a /sys/class/nvme/{controller}/{name}"
print(command)
subprocess.run(command, check=True, shell=True, capture_output=True)


@retry(wait_fixed=100, stop_max_attempt_number=40)
def wait_nvme_find_device(uri):
return nvme_find_device(uri)


@retry(wait_fixed=100, stop_max_attempt_number=40)
def wait_nvme_gone_device(uri, nqn=None):
if nqn is None:
u = urlparse(uri)
nqn = u.path[1:]
devs = nvme_find_subsystem_devices(nqn)
assert len(devs) == 0
13 changes: 13 additions & 0 deletions tests/bdd/features/csi/node/node.feature
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,16 @@ Feature: CSI node plugin
Scenario: publishing a reader only block volume as rw
Given a block volume staged as "MULTI_NODE_READER_ONLY"
When publishing the block volume as "rw" should fail

Scenario: re-staging after controller loss timeout
Given a staged volume
When the kernel device is removed after a controller loss timeout
Then the volume should be unstageable
And the volume should be stageable again

Scenario: re-staging after controller loss timeout without unstaging
Given a staged volume
When the kernel device is removed after a controller loss timeout
Then the mounts become broken
But the volume should be stageable on a different path
And the nvme device should have a different controller and namespace
Loading

0 comments on commit 02aa5aa

Please sign in to comment.