From 5a4a05afa9eeb254638730534df500c1f120534e Mon Sep 17 00:00:00 2001 From: Nicola Cabiddu Date: Thu, 21 Mar 2024 19:10:07 +0000 Subject: [PATCH] try to handle nested collection compression --- src/realm/array_encode.cpp | 2 +- src/realm/bplustree.cpp | 44 ++++++++++------------- src/realm/cluster.cpp | 74 +++++++++++++++----------------------- test/test_list.cpp | 19 +++++++--- 4 files changed, 64 insertions(+), 75 deletions(-) diff --git a/src/realm/array_encode.cpp b/src/realm/array_encode.cpp index 4ead75b45dd..2bd5449bdb8 100644 --- a/src/realm/array_encode.cpp +++ b/src/realm/array_encode.cpp @@ -148,7 +148,7 @@ bool ArrayEncode::always_encode(const Array& origin, Array& arr, bool packed) co bool ArrayEncode::encode(const Array& origin, Array& arr) const { // return false; - // return always_encode(origin, arr, true); // true packed, false flex + return always_encode(origin, arr, true); // true packed, false flex std::vector values; std::vector indices; diff --git a/src/realm/bplustree.cpp b/src/realm/bplustree.cpp index c381d978093..d973478acd2 100644 --- a/src/realm/bplustree.cpp +++ b/src/realm/bplustree.cpp @@ -865,33 +865,27 @@ ref_type BPlusTreeBase::typed_write(ref_type ref, _impl::ArrayWriterBase& out, A written_node.destroy(); return written_ref; } - else { - if (node.has_refs()) { - // TODO: handle collection in mixed here. This is breaking.. - Array written_node(Allocator::get_default()); - written_node.create(NodeHeader::type_InnerBptreeNode, false, node.size()); - for (unsigned j = 0; j < node.size(); ++j) { - RefOrTagged rot = node.get_as_ref_or_tagged(j); - if (rot.is_ref() && rot.get_as_ref()) { - // it should/could only be a nested collection - compress = true; - written_node.set_as_ref(j, BPlusTreeBase::typed_write(rot.get_as_ref(), out, alloc, col_type, - deep, only_modified, compress)); - } - else { - Array a(alloc); - a.init_from_ref(rot.get_as_ref()); - written_node.set_as_ref(j, a.write(out, deep, only_modified, false)); - } + else if (node.has_refs()) { + Array ref_node(Allocator::get_default()); + ref_node.create(NodeHeader::type_HasRefs, false, node.size()); + for (size_t j = 0; j < node.size(); ++j) { + RefOrTagged rot = node.get_as_ref_or_tagged(j); + if (rot.is_ref() && rot.get_as_ref()) { + auto btree_ref = + BPlusTreeBase::typed_write(rot.get_as_ref(), out, alloc, col_type, deep, only_modified, true); + ref_node.set_as_ref(j, btree_ref); + } + else { + ref_node.set(j, rot); } - auto written_ref = written_node.write(out, false, false, false); - written_node.destroy(); - return written_ref; - // return node.write(out, deep, only_modified, false); // unknown substructure, don't compress - } - else { - return node.write(out, false, only_modified, compress); // leaf array - do compress } + auto new_ref = ref_node.write(out, false, false, false); + ref_node.destroy(); + return new_ref; + // return node.write(out, deep, only_modified, false); + } + else { + return node.write(out, deep, only_modified, true); // leaf array - do compress } } diff --git a/src/realm/cluster.cpp b/src/realm/cluster.cpp index 20268b8580a..e8ab8ebc30c 100644 --- a/src/realm/cluster.cpp +++ b/src/realm/cluster.cpp @@ -1674,59 +1674,43 @@ ref_type Cluster::typed_write(ref_type ref, _impl::ArrayWriterBase& out, const T They can only be BPlusTree or BPlusTree. 5. Is the key array, marks whether the composite array at position i is a collection or not */ - auto rot_parent = leaf.get_as_ref_or_tagged(0); - auto rot_int = leaf.get_as_ref_or_tagged(1); - auto rot_pair_int = leaf.get_as_ref_or_tagged(2); - auto rot_string = leaf.get_as_ref_or_tagged(3); - auto rot_composite = leaf.get_as_ref_or_tagged(4); - auto rot_key = leaf.get_as_ref_or_tagged(5); - - if (rot_parent.get_as_ref()) - written_leaf.set(0, Array::write(rot_parent.get_as_ref(), m_alloc, out, only_modified, false)); - else - written_leaf.set(0, rot_parent); - - if (rot_int.get_as_ref()) - written_leaf.set_as_ref(1, Array::write(rot_int.get_as_ref(), m_alloc, out, only_modified, true)); - else - written_leaf.set(1, rot_int); - - if (rot_pair_int.get_as_ref()) - written_leaf.set_as_ref( - 2, Array::write(rot_pair_int.get_as_ref(), m_alloc, out, only_modified, true)); - else - written_leaf.set(2, rot_pair_int); - - written_leaf.set(3, rot_string); // no compression for strings now. - - if (rot_composite.get_as_ref() && rot_key.get_as_ref()) { - Array composite(Allocator::get_default()); - Array keys(Allocator::get_default()); - composite.init_from_ref(rot_composite.get_as_ref()); - keys.init_from_ref(rot_key.get_as_ref()); - - for (size_t i = 0; i < composite.size(); ++i) { - if (i < keys.size() && keys.get(i)) { - // collection. - auto rot = composite.get_as_ref_or_tagged(i); - REALM_ASSERT_DEBUG(rot.is_ref() && rot.get_as_ref()); + std::cout << "type write for ref " << ref << std::endl; + for (size_t i = 0; i < sz; ++i) { + auto rot = leaf.get_as_ref_or_tagged(i); + if (rot.is_ref() && rot.get_as_ref()) { + if (i == 1 || i == 2) { + // this check is not fine grained.. we are compressing everything that fits into m_int and + // m_pair_int which means timestamps, doubles, floats and links alongside integers. + // TODO: build an array mixed and check the type + written_leaf.set_as_ref( + i, Array::write(rot.get_as_ref(), m_alloc, out, only_modified, compressible)); + } + else if (i == 4) { const auto new_ref = BPlusTreeBase::typed_write(rot.get_as_ref(), out, m_alloc, col_type, - deep, only_modified, true); - composite.set_as_ref(i, new_ref); + deep, only_modified, compressible); + written_leaf.set_as_ref(i, new_ref); + } + // else if(i == 5) { + // collection in mixed keys do not need to be compressed (or maybe yes, there are integers + // afterall) + //} + else { + // all the rest should be a ref we don't want to compress it. + written_leaf.set_as_ref( + i, Array::write(rot.get_as_ref(), m_alloc, out, only_modified, false)); } } - written_leaf.set(4, rot_composite); - written_leaf.set(5, rot_key); - } - else { - written_leaf.set(4, rot_composite); - written_leaf.set(5, rot_key); + else { + // what about integers that are max 32 bits and we store stuff straight in the composite + // array. we are not compressing those. + written_leaf.set(i, rot); + } } } else { REALM_ASSERT(false); } - written_cluster.set_as_ref(j, written_leaf.write(out, false, false, false)); + written_cluster.set_as_ref(j, written_leaf.write(out, false, false, compressible)); written_leaf.destroy(); } } diff --git a/test/test_list.cpp b/test/test_list.cpp index 7d1213afd94..089b7fa8e00 100644 --- a/test/test_list.cpp +++ b/test/test_list.cpp @@ -633,7 +633,7 @@ TEST(List_AggOps) test_lists_numeric_agg(test_context, sg, type_Decimal, Decimal128(realm::null()), true); } -ONLY(Test_Write_List_Nested_InMixed) +ONLY(Test_Write_List_Nested_In_Mixed) { SHARED_GROUP_TEST_PATH(path); std::string message; @@ -641,20 +641,31 @@ ONLY(Test_Write_List_Nested_InMixed) options.logger = test_context.logger; DBRef db = DB::create(make_in_realm_history(), path, options); auto tr = db->start_write(); - auto table = tr->add_table_with_primary_key("table", type_Int, "id"); + auto table = tr->add_table("table"); auto col_any = table->add_column(type_Mixed, "something"); - Obj obj = table->create_object_with_primary_key(1); + Obj obj = table->create_object(); obj.set_any(col_any, Mixed{20}); tr->verify(); tr->commit_and_continue_writing(); // commit simple mixed + tr->verify(); obj.set_collection(col_any, CollectionType::List); auto list = obj.get_list_ptr(col_any); list->add(Mixed{10}); list->add(Mixed{11}); tr->verify(); - tr->commit(); // commit nested list in mixed + tr->commit_and_continue_writing(); // commit nested list in mixed + tr->verify(); + + // spicy it up a little bit... + list->insert_collection(2, CollectionType::List); + list->insert_collection(3, CollectionType::List); + list->get_list(2)->add(Mixed{20}); + list->get_list(3)->add(Mixed{21}); + tr->commit_and_continue_writing(); + tr->verify(); + tr->close(); } TEST(List_Nested_InMixed)