Skip to content

Commit

Permalink
try to handle nested collection compression
Browse files Browse the repository at this point in the history
  • Loading branch information
nicola-cab committed Mar 21, 2024
1 parent 655e18c commit 5a4a05a
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 75 deletions.
2 changes: 1 addition & 1 deletion src/realm/array_encode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ bool ArrayEncode::always_encode(const Array& origin, Array& arr, bool packed) co
bool ArrayEncode::encode(const Array& origin, Array& arr) const
{
// return false;
// return always_encode(origin, arr, true); // true packed, false flex
return always_encode(origin, arr, true); // true packed, false flex

std::vector<int64_t> values;
std::vector<size_t> indices;
Expand Down
44 changes: 19 additions & 25 deletions src/realm/bplustree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -865,33 +865,27 @@ ref_type BPlusTreeBase::typed_write(ref_type ref, _impl::ArrayWriterBase& out, A
written_node.destroy();
return written_ref;
}
else {
if (node.has_refs()) {
// TODO: handle collection in mixed here. This is breaking..
Array written_node(Allocator::get_default());
written_node.create(NodeHeader::type_InnerBptreeNode, false, node.size());
for (unsigned j = 0; j < node.size(); ++j) {
RefOrTagged rot = node.get_as_ref_or_tagged(j);
if (rot.is_ref() && rot.get_as_ref()) {
// it should/could only be a nested collection
compress = true;
written_node.set_as_ref(j, BPlusTreeBase::typed_write(rot.get_as_ref(), out, alloc, col_type,
deep, only_modified, compress));
}
else {
Array a(alloc);
a.init_from_ref(rot.get_as_ref());
written_node.set_as_ref(j, a.write(out, deep, only_modified, false));
}
else if (node.has_refs()) {
Array ref_node(Allocator::get_default());
ref_node.create(NodeHeader::type_HasRefs, false, node.size());
for (size_t j = 0; j < node.size(); ++j) {
RefOrTagged rot = node.get_as_ref_or_tagged(j);
if (rot.is_ref() && rot.get_as_ref()) {
auto btree_ref =
BPlusTreeBase::typed_write(rot.get_as_ref(), out, alloc, col_type, deep, only_modified, true);
ref_node.set_as_ref(j, btree_ref);
}
else {
ref_node.set(j, rot);
}
auto written_ref = written_node.write(out, false, false, false);
written_node.destroy();
return written_ref;
// return node.write(out, deep, only_modified, false); // unknown substructure, don't compress
}
else {
return node.write(out, false, only_modified, compress); // leaf array - do compress
}
auto new_ref = ref_node.write(out, false, false, false);
ref_node.destroy();
return new_ref;
// return node.write(out, deep, only_modified, false);
}
else {
return node.write(out, deep, only_modified, true); // leaf array - do compress
}
}

Expand Down
74 changes: 29 additions & 45 deletions src/realm/cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1674,59 +1674,43 @@ ref_type Cluster::typed_write(ref_type ref, _impl::ArrayWriterBase& out, const T
They can only be BPlusTree<int, Mixed> or BPlusTree<string, Mixed>.
5. Is the key array, marks whether the composite array at position i is a collection or not
*/
auto rot_parent = leaf.get_as_ref_or_tagged(0);
auto rot_int = leaf.get_as_ref_or_tagged(1);
auto rot_pair_int = leaf.get_as_ref_or_tagged(2);
auto rot_string = leaf.get_as_ref_or_tagged(3);
auto rot_composite = leaf.get_as_ref_or_tagged(4);
auto rot_key = leaf.get_as_ref_or_tagged(5);

if (rot_parent.get_as_ref())
written_leaf.set(0, Array::write(rot_parent.get_as_ref(), m_alloc, out, only_modified, false));
else
written_leaf.set(0, rot_parent);

if (rot_int.get_as_ref())
written_leaf.set_as_ref(1, Array::write(rot_int.get_as_ref(), m_alloc, out, only_modified, true));
else
written_leaf.set(1, rot_int);

if (rot_pair_int.get_as_ref())
written_leaf.set_as_ref(
2, Array::write(rot_pair_int.get_as_ref(), m_alloc, out, only_modified, true));
else
written_leaf.set(2, rot_pair_int);

written_leaf.set(3, rot_string); // no compression for strings now.

if (rot_composite.get_as_ref() && rot_key.get_as_ref()) {
Array composite(Allocator::get_default());
Array keys(Allocator::get_default());
composite.init_from_ref(rot_composite.get_as_ref());
keys.init_from_ref(rot_key.get_as_ref());

for (size_t i = 0; i < composite.size(); ++i) {
if (i < keys.size() && keys.get(i)) {
// collection.
auto rot = composite.get_as_ref_or_tagged(i);
REALM_ASSERT_DEBUG(rot.is_ref() && rot.get_as_ref());
std::cout << "type write for ref " << ref << std::endl;
for (size_t i = 0; i < sz; ++i) {
auto rot = leaf.get_as_ref_or_tagged(i);
if (rot.is_ref() && rot.get_as_ref()) {
if (i == 1 || i == 2) {
// this check is not fine grained.. we are compressing everything that fits into m_int and
// m_pair_int which means timestamps, doubles, floats and links alongside integers.
// TODO: build an array mixed and check the type
written_leaf.set_as_ref(
i, Array::write(rot.get_as_ref(), m_alloc, out, only_modified, compressible));
}
else if (i == 4) {
const auto new_ref = BPlusTreeBase::typed_write(rot.get_as_ref(), out, m_alloc, col_type,
deep, only_modified, true);
composite.set_as_ref(i, new_ref);
deep, only_modified, compressible);
written_leaf.set_as_ref(i, new_ref);
}
// else if(i == 5) {
// collection in mixed keys do not need to be compressed (or maybe yes, there are integers
// afterall)
//}
else {
// all the rest should be a ref we don't want to compress it.
written_leaf.set_as_ref(
i, Array::write(rot.get_as_ref(), m_alloc, out, only_modified, false));
}
}
written_leaf.set(4, rot_composite);
written_leaf.set(5, rot_key);
}
else {
written_leaf.set(4, rot_composite);
written_leaf.set(5, rot_key);
else {
// what about integers that are max 32 bits and we store stuff straight in the composite
// array. we are not compressing those.
written_leaf.set(i, rot);
}
}
}
else {
REALM_ASSERT(false);
}
written_cluster.set_as_ref(j, written_leaf.write(out, false, false, false));
written_cluster.set_as_ref(j, written_leaf.write(out, false, false, compressible));
written_leaf.destroy();
}
}
Expand Down
19 changes: 15 additions & 4 deletions test/test_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -633,28 +633,39 @@ TEST(List_AggOps)
test_lists_numeric_agg<Decimal128>(test_context, sg, type_Decimal, Decimal128(realm::null()), true);
}

ONLY(Test_Write_List_Nested_InMixed)
ONLY(Test_Write_List_Nested_In_Mixed)
{
SHARED_GROUP_TEST_PATH(path);
std::string message;
DBOptions options;
options.logger = test_context.logger;
DBRef db = DB::create(make_in_realm_history(), path, options);
auto tr = db->start_write();
auto table = tr->add_table_with_primary_key("table", type_Int, "id");
auto table = tr->add_table("table");
auto col_any = table->add_column(type_Mixed, "something");

Obj obj = table->create_object_with_primary_key(1);
Obj obj = table->create_object();
obj.set_any(col_any, Mixed{20});
tr->verify();
tr->commit_and_continue_writing(); // commit simple mixed
tr->verify();

obj.set_collection(col_any, CollectionType::List);
auto list = obj.get_list_ptr<Mixed>(col_any);
list->add(Mixed{10});
list->add(Mixed{11});
tr->verify();
tr->commit(); // commit nested list in mixed
tr->commit_and_continue_writing(); // commit nested list in mixed
tr->verify();

// spicy it up a little bit...
list->insert_collection(2, CollectionType::List);
list->insert_collection(3, CollectionType::List);
list->get_list(2)->add(Mixed{20});
list->get_list(3)->add(Mixed{21});
tr->commit_and_continue_writing();
tr->verify();
tr->close();
}

TEST(List_Nested_InMixed)
Expand Down

0 comments on commit 5a4a05a

Please sign in to comment.