Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compression for leaf Mixed arrays of integers and collection in mixed. #7501

Merged
Merged
32 changes: 22 additions & 10 deletions src/realm/bplustree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -833,7 +833,7 @@ std::unique_ptr<BPlusTreeNode> BPlusTreeBase::create_root_from_ref(ref_type ref)

// this should only be called for a column_type which we can safely compress.
ref_type BPlusTreeBase::typed_write(ref_type ref, _impl::ArrayWriterBase& out, Allocator& alloc, ColumnType col_type,
bool deep, bool only_modified, bool compress)
bool deep, bool only_modified, bool compress, bool collection_in_mixed)
{
if (only_modified && alloc.is_read_only(ref))
return ref;
Expand All @@ -854,8 +854,9 @@ ref_type BPlusTreeBase::typed_write(ref_type ref, _impl::ArrayWriterBase& out, A
written_node.set_as_ref(j, a.write(out, deep, only_modified, false));
}
else {
written_node.set_as_ref(j, BPlusTreeBase::typed_write(rot.get_as_ref(), out, alloc, col_type,
deep, only_modified, compress));
written_node.set_as_ref(j,
BPlusTreeBase::typed_write(rot.get_as_ref(), out, alloc, col_type, deep,
only_modified, compress, collection_in_mixed));
}
}
else
Expand All @@ -865,14 +866,25 @@ ref_type BPlusTreeBase::typed_write(ref_type ref, _impl::ArrayWriterBase& out, A
written_node.destroy();
return written_ref;
}
else {
if (node.has_refs()) {
// this should be extended to handle Mixed....
return node.write(out, deep, only_modified, false); // unknown substructure, don't compress
}
else {
return node.write(out, false, only_modified, compress); // leaf array - do compress
else if (node.has_refs()) {
// if collection in mixed is set, it means that this node is actually a mixed property that contains
// a collection in it. So we need to vist the collection that is part of the node and reach the final leaf,
// in order to determine whether the leaf can be compressed.
if (collection_in_mixed) {
const auto sz = node.size();
for (size_t j = 0; j < sz; ++j) {
RefOrTagged rot = node.get_as_ref_or_tagged(j);
if (rot.is_ref() && rot.get_as_ref()) {
const auto btree_ref = BPlusTreeBase::typed_write(rot.get_as_ref(), out, alloc, col_type, deep,
only_modified, compress, collection_in_mixed);
node.set_as_ref(j, btree_ref);
}
}
}
return node.write(out, deep, only_modified, false);
}
else {
return node.write(out, deep, only_modified, true); // leaf array - do compress
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/realm/bplustree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ class BPlusTreeBase {
}

static ref_type typed_write(ref_type ref, _impl::ArrayWriterBase& out, Allocator& alloc, ColumnType col_type,
bool deep, bool only_modified, bool compress);
bool deep, bool only_modified, bool compress, bool collection_in_mixed);
static void typed_print(std::string prefix, Allocator& alloc, ref_type root, ColumnType col_type);


Expand Down
64 changes: 55 additions & 9 deletions src/realm/cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1596,7 +1596,7 @@ ref_type Cluster::typed_write(ref_type ref, _impl::ArrayWriterBase& out, const T
if (bptree_rot.is_ref() && bptree_rot.get_as_ref()) {
written_leaf.set_as_ref(i, BPlusTreeBase::typed_write(bptree_rot.get_as_ref(), out, m_alloc,
col_type, deep, only_modified,
compress && compressible));
compress && compressible, false));
}
else
written_leaf.set(i, bptree_rot);
Expand Down Expand Up @@ -1629,11 +1629,11 @@ ref_type Cluster::typed_write(ref_type ref, _impl::ArrayWriterBase& out, const T
auto bptree_rot = dict_top.get_as_ref(0);
written_dict_top.set_as_ref(0, BPlusTreeBase::typed_write(bptree_rot, out, m_alloc, col_type,
deep, only_modified,
compress && compressible));
compress && compressible, false));
bptree_rot = dict_top.get_as_ref(1);
written_dict_top.set_as_ref(1, BPlusTreeBase::typed_write(bptree_rot, out, m_alloc, col_type,
deep, only_modified,
compress && compressible));
compress && compressible, false));
}
written_leaf.set_as_ref(i, written_dict_top.write(out, false, false, false));
written_dict_top.destroy();
Expand All @@ -1653,24 +1653,70 @@ ref_type Cluster::typed_write(ref_type ref, _impl::ArrayWriterBase& out, const T
else if (col_type == col_type_Mixed) {
const auto sz = leaf.size();
REALM_ASSERT(sz == 6);
// temporary disable mixed. in order to re-enable them in a separate PR

/*
Mixed stores things using different arrays. We need to take into account this in order to
understand what we need to compress and what we can instead leave not compressed.

The main subarrays are:

composite array : index 0
int array : index 1
pair_int array: index 2
string array: index 3
ref array: index 4
key array: index 5

Description of each array:
1. composite array: the data stored here is either a small int (< 32 bits) or an offset to one of
the other arrays where the actual data is.
2. int and pair int arrays, they are used for storing integers, timestamps, floats, doubles,
decimals, links. In general we can compress them, but we need to be careful, controlling the col_type
should prevent compressing data that we want to leave in the current format.
3. string array is for strings and binary data (no compression for now)
4. ref array is actually storing refs to collections. they can only be BPlusTree<int, Mixed> or
BPlusTree<string, Mixed>.
5. key array stores unique identifiers for collections in mixed (integers that can be compressed)
*/
for (size_t i = 0; i < sz; ++i) {
auto rot = leaf.get_as_ref_or_tagged(i);
if (rot.is_ref() && rot.get_as_ref()) {
// entries 0-2 are integral and can be compressed, entry 3 is strings and not compressed (yet)
// collections in mixed are stored at position 4.
bool do_compress = false; // (i < 3 || i == 4) ? true : false;
written_leaf.set_as_ref(
i, Array::write(rot.get_as_ref(), m_alloc, out, only_modified, do_compress));
if (i < 3) { // composite, int, and pair_int
// integer arrays
written_leaf.set_as_ref(
i, Array::write(rot.get_as_ref(), m_alloc, out, only_modified, compress));
}
else if (i == 4) { // collection in mixed
// we need to differenciate between a mixed that contains
// an objlink and a mixed that contains a collection.
// This flag is used to differentiate this while descending the
// cluster.
const bool collection_in_mixed = true;
const auto new_ref =
BPlusTreeBase::typed_write(rot.get_as_ref(), out, m_alloc, col_type, deep,
only_modified, compress, collection_in_mixed);
written_leaf.set_as_ref(i, new_ref);
}
else if (i == 5) { // unique keys associated to collections in mixed
written_leaf.set_as_ref(
i, Array::write(rot.get_as_ref(), m_alloc, out, only_modified, compress));
}
else {
// all the rest we don't want to compress it, at least for now (strings will be needed)
written_leaf.set_as_ref(
i, Array::write(rot.get_as_ref(), m_alloc, out, only_modified, false));
}
}
else {
// all the other data types that we don't compress
written_leaf.set(i, rot);
}
}
}
else {
REALM_ASSERT(false);
}

written_cluster.set_as_ref(j, written_leaf.write(out, false, false, false));
written_leaf.destroy();
}
Expand Down
35 changes: 35 additions & 0 deletions test/test_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,41 @@ TEST(List_AggOps)
test_lists_numeric_agg<Decimal128>(test_context, sg, type_Decimal, Decimal128(realm::null()), true);
}

TEST(Test_Write_List_Nested_In_Mixed)
{
SHARED_GROUP_TEST_PATH(path);
std::string message;
DBOptions options;
options.logger = test_context.logger;
DBRef db = DB::create(make_in_realm_history(), path, options);
auto tr = db->start_write();
auto table = tr->add_table("table");
auto col_any = table->add_column(type_Mixed, "something");

Obj obj = table->create_object();
obj.set_any(col_any, Mixed{20});
tr->verify();
tr->commit_and_continue_writing(); // commit simple mixed
tr->verify();

obj.set_collection(col_any, CollectionType::List);
auto list = obj.get_list_ptr<Mixed>(col_any);
list->add(Mixed{10});
list->add(Mixed{11});
tr->verify();
tr->commit_and_continue_writing(); // commit nested list in mixed
tr->verify();

// spicy it up a little bit...
list->insert_collection(2, CollectionType::List);
list->insert_collection(3, CollectionType::List);
list->get_list(2)->add(Mixed{20});
list->get_list(3)->add(Mixed{21});
tr->commit_and_continue_writing();
tr->verify();
tr->close();
}

TEST(List_Nested_InMixed)
{
SHARED_GROUP_TEST_PATH(path);
Expand Down
Loading