Skip to content

Commit

Permalink
Take control over resizing to enforce required invariants (#383)
Browse files Browse the repository at this point in the history
* Add test related to attribute retention

* Take control over resizing to enforce required invariants

* Tweak comment
  • Loading branch information
DavisVaughan authored Aug 20, 2024
1 parent 6ddb13a commit acca2d2
Show file tree
Hide file tree
Showing 8 changed files with 195 additions and 3 deletions.
43 changes: 43 additions & 0 deletions cpp11test/src/test-r_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -325,4 +325,47 @@ context("r_vector-C++") {
expect_true(x.data() != R_NilValue);
expect_true(x.size() == 3);
}

test_that(
"writable vector truncation resizes names and retains attributes (but not dim or "
"dim names)") {
cpp11::writable::integers x(2);
x[0] = 1;
x[1] = 2;

// Doubles the capacity from 2 to 4, meaning the underlying SEXP has length 4 now.
x.push_back(3);
expect_true(Rf_xlength(x.data()) == 4);

// Set some names
SEXP names = PROTECT(Rf_allocVector(STRSXP, 3));
SET_STRING_ELT(names, 0, Rf_mkCharCE("x", CE_UTF8));
SET_STRING_ELT(names, 1, Rf_mkCharCE("y", CE_UTF8));
SET_STRING_ELT(names, 2, Rf_mkCharCE("z", CE_UTF8));
x.names() = names;

// Length of names SEXP is actually 4 now, extended by `setAttrib()` to match
// the internal capacity
expect_true(Rf_xlength(Rf_getAttrib(x.data(), R_NamesSymbol)) == 4);

// Set an attribute
SEXP bar = PROTECT(Rf_ScalarInteger(1));
x.attr("foo") = bar;

// Extract out the underlying SEXP using the operator:
// - This truncates to size 3
// - This truncates and keeps names
// - This copies over attributes like `"foo"`
// - This updates the internal SEXP in `x` to the one in `x_sexp` (gross but users
// probably expect this at this point)
SEXP x_sexp = x;

expect_true(Rf_xlength(x_sexp) == 3);
expect_true(Rf_xlength(Rf_getAttrib(x_sexp, R_NamesSymbol)) == 3);
expect_true(Rf_getAttrib(x_sexp, Rf_install("foo")) == bar);

expect_true(x.data() == x_sexp);

UNPROTECT(2);
}
}
6 changes: 6 additions & 0 deletions inst/include/cpp11/doubles.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ inline typename r_vector<double>::underlying_type* r_vector<double>::get_p(bool
}
}

template <>
inline typename r_vector<double>::underlying_type const* r_vector<double>::get_const_p(
bool is_altrep, SEXP data) {
return REAL_OR_NULL(data);
}

template <>
inline void r_vector<double>::get_region(SEXP x, R_xlen_t i, R_xlen_t n,
typename r_vector::underlying_type* buf) {
Expand Down
6 changes: 6 additions & 0 deletions inst/include/cpp11/integers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ inline typename r_vector<int>::underlying_type* r_vector<int>::get_p(bool is_alt
}
}

template <>
inline typename r_vector<int>::underlying_type const* r_vector<int>::get_const_p(
bool is_altrep, SEXP data) {
return INTEGER_OR_NULL(data);
}

template <>
inline void r_vector<int>::get_region(SEXP x, R_xlen_t i, R_xlen_t n,
typename r_vector::underlying_type* buf) {
Expand Down
12 changes: 12 additions & 0 deletions inst/include/cpp11/list.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,18 @@ inline typename r_vector<SEXP>::underlying_type* r_vector<SEXP>::get_p(bool, SEX
return nullptr;
}

template <>
inline typename r_vector<SEXP>::underlying_type const* r_vector<SEXP>::get_const_p(
bool is_altrep, SEXP data) {
// No `VECTOR_PTR_OR_NULL()`
if (is_altrep) {
return nullptr;
} else {
// TODO: Use `VECTOR_PTR_RO()` conditionally once R 4.5.0 is officially released
return static_cast<SEXP const*>(DATAPTR_RO(data));
}
}

/// Specialization for lists, where `x["oob"]` returns `R_NilValue`, like at the R level
template <>
inline SEXP r_vector<SEXP>::get_oob() {
Expand Down
6 changes: 6 additions & 0 deletions inst/include/cpp11/logicals.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ inline typename r_vector<r_bool>::underlying_type* r_vector<r_bool>::get_p(bool
}
}

template <>
inline typename r_vector<r_bool>::underlying_type const* r_vector<r_bool>::get_const_p(
bool is_altrep, SEXP data) {
return LOGICAL_OR_NULL(data);
}

template <>
inline void r_vector<r_bool>::get_region(SEXP x, R_xlen_t i, R_xlen_t n,
typename r_vector::underlying_type* buf) {
Expand Down
108 changes: 105 additions & 3 deletions inst/include/cpp11/r_vector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <algorithm> // for max
#include <array> // for array
#include <cstdio> // for snprintf
#include <cstring> // for memcpy
#include <exception> // for exception
#include <initializer_list> // for initializer_list
#include <iterator> // for forward_iterator_tag, random_ac...
Expand Down Expand Up @@ -145,6 +146,8 @@ class r_vector {
/// Implemented in specialization
static underlying_type* get_p(bool is_altrep, SEXP data);
/// Implemented in specialization
static underlying_type const* get_const_p(bool is_altrep, SEXP data);
/// Implemented in specialization
static void get_region(SEXP x, R_xlen_t i, R_xlen_t n, underlying_type* buf);
/// Implemented in specialization
static SEXPTYPE get_sexptype();
Expand Down Expand Up @@ -311,8 +314,13 @@ class r_vector : public cpp11::r_vector<T> {
/// Implemented in specialization
static void set_elt(SEXP x, R_xlen_t i, underlying_type value);

static SEXP reserve_data(SEXP x, bool is_altrep, R_xlen_t size);
static SEXP resize_data(SEXP x, bool is_altrep, R_xlen_t size);
static SEXP resize_names(SEXP x, R_xlen_t size);

using cpp11::r_vector<T>::get_elt;
using cpp11::r_vector<T>::get_p;
using cpp11::r_vector<T>::get_const_p;
using cpp11::r_vector<T>::get_sexptype;
using cpp11::r_vector<T>::valid_type;
using cpp11::r_vector<T>::valid_length;
Expand Down Expand Up @@ -759,8 +767,25 @@ inline r_vector<T>::r_vector(SEXP&& data, bool is_altrep)
: cpp11::r_vector<T>(data, is_altrep), capacity_(length_) {}

template <typename T>
inline r_vector<T>::r_vector(const r_vector& rhs)
: cpp11::r_vector<T>(safe[Rf_shallow_duplicate](rhs)), capacity_(rhs.capacity_) {}
inline r_vector<T>::r_vector(const r_vector& rhs) {
// We don't want to just pass through to the read-only constructor because we'd
// have to convert to `SEXP` first, which could truncate, and then we'd still have
// to shallow duplicate after that to ensure we have a duplicate, which can result in
// too many copies (#369).
//
// Instead we take control of setting all fields to try and only duplicate 1 time.
// We try and reclaim unused capacity during the duplication by only reserving up to
// the `rhs.length_`. This is nice because if the user returns this object, the
// truncation has already been done and they don't have to pay for another allocation.
// Importantly, `reserve_data()` always duplicates even if there wasn't extra capacity,
// which ensures we have our own copy.
data_ = reserve_data(rhs.data_, rhs.is_altrep_, rhs.length_);
protect_ = detail::store::insert(data_);
is_altrep_ = ALTREP(data_);
data_p_ = get_p(is_altrep_, data_);
length_ = rhs.length_;
capacity_ = rhs.length_;
}

template <typename T>
inline r_vector<T>::r_vector(r_vector&& rhs) {
Expand Down Expand Up @@ -1048,7 +1073,7 @@ inline void r_vector<T>::reserve(R_xlen_t new_capacity) {
SEXP old_protect = protect_;

data_ = (data_ == R_NilValue) ? safe[Rf_allocVector](get_sexptype(), new_capacity)
: safe[Rf_xlengthgets](data_, new_capacity);
: reserve_data(data_, is_altrep_, new_capacity);
protect_ = detail::store::insert(data_);
is_altrep_ = ALTREP(data_);
data_p_ = get_p(is_altrep_, data_);
Expand Down Expand Up @@ -1249,6 +1274,83 @@ inline typename r_vector<T>::iterator r_vector<T>::iterator::operator+(R_xlen_t
return it;
}

// Compared to `Rf_xlengthgets()`:
// - This always allocates, even if it is the same size, which is important when we use
// it in a constructor and need to ensure that it duplicates on the way in.
// - This copies over attributes with `Rf_copyMostAttrib()`, which is important when we
// use it in constructors and when we truncate right before returning from the `SEXP`
// operator.
// - This is more friendly to ALTREP `x`.
template <typename T>
inline SEXP r_vector<T>::reserve_data(SEXP x, bool is_altrep, R_xlen_t size) {
// Resize core data
SEXP out = PROTECT(resize_data(x, is_altrep, size));

// Resize names, if required
SEXP names = Rf_getAttrib(x, R_NamesSymbol);
if (names != R_NilValue) {
names = resize_names(names, size);
Rf_setAttrib(out, R_NamesSymbol, names);
}

// Copy over "most" attributes, and set OBJECT bit and S4 bit as needed.
// Does not copy over names, dim, or dim names.
// Names are handled already. Dim and dim names should not be applicable,
// as this is a vector.
// Does not look like it would ever error in our use cases, so no `safe[]`.
Rf_copyMostAttrib(x, out);

UNPROTECT(1);
return out;
}

template <typename T>
inline SEXP r_vector<T>::resize_data(SEXP x, bool is_altrep, R_xlen_t size) {
underlying_type const* v_x = get_const_p(is_altrep, x);

SEXP out = PROTECT(safe[Rf_allocVector](get_sexptype(), size));
underlying_type* v_out = get_p(ALTREP(out), out);

const R_xlen_t x_size = Rf_xlength(x);
const R_xlen_t copy_size = (x_size > size) ? size : x_size;

// Copy over data from `x` up to `copy_size` (we could be truncating so don't blindly
// copy everything from `x`)
if (v_x != nullptr && v_out != nullptr) {
std::memcpy(v_out, v_x, copy_size * sizeof(underlying_type));
} else {
// Handles ALTREP `x` with no const pointer, VECSXP, STRSXP
for (R_xlen_t i = 0; i < copy_size; ++i) {
set_elt(out, i, get_elt(x, i));
}
}

UNPROTECT(1);
return out;
}

template <typename T>
inline SEXP r_vector<T>::resize_names(SEXP x, R_xlen_t size) {
const SEXP* v_x = STRING_PTR_RO(x);

SEXP out = PROTECT(safe[Rf_allocVector](STRSXP, size));

const R_xlen_t x_size = Rf_xlength(x);
const R_xlen_t copy_size = (x_size > size) ? size : x_size;

for (R_xlen_t i = 0; i < copy_size; ++i) {
SET_STRING_ELT(out, i, v_x[i]);
}

// Ensure remaining names are initialized to `""`
for (R_xlen_t i = copy_size; i < size; ++i) {
SET_STRING_ELT(out, i, R_BlankString);
}

UNPROTECT(1);
return out;
}

} // namespace writable

// TODO: is there a better condition we could use, e.g. assert something true
Expand Down
6 changes: 6 additions & 0 deletions inst/include/cpp11/raws.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ inline typename r_vector<uint8_t>::underlying_type r_vector<uint8_t>::get_elt(
return RAW_ELT(x, i);
}

template <>
inline typename r_vector<uint8_t>::underlying_type const* r_vector<uint8_t>::get_const_p(
bool is_altrep, SEXP data) {
return RAW_OR_NULL(data);
}

template <>
inline typename r_vector<uint8_t>::underlying_type* r_vector<uint8_t>::get_p(
bool is_altrep, SEXP data) {
Expand Down
11 changes: 11 additions & 0 deletions inst/include/cpp11/strings.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,17 @@ inline typename r_vector<r_string>::underlying_type* r_vector<r_string>::get_p(b
return nullptr;
}

template <>
inline typename r_vector<r_string>::underlying_type const*
r_vector<r_string>::get_const_p(bool is_altrep, SEXP data) {
// No `STRING_PTR_OR_NULL()`
if (is_altrep) {
return nullptr;
} else {
return STRING_PTR_RO(data);
}
}

template <>
inline void r_vector<r_string>::get_region(SEXP x, R_xlen_t i, R_xlen_t n,
typename r_vector::underlying_type* buf) {
Expand Down

0 comments on commit acca2d2

Please sign in to comment.