From 9fea32557691c02cc10db6304202ea873837cdcc Mon Sep 17 00:00:00 2001 From: icristescu Date: Fri, 21 Oct 2022 18:42:10 +0200 Subject: [PATCH] gc-based snapshots --- CHANGES.md | 2 + src/irmin-pack/unix/errors.ml | 3 +- src/irmin-pack/unix/ext.ml | 85 ++++++++++++++++++------ src/irmin-pack/unix/file_manager.ml | 58 ++++++++++++++++ src/irmin-pack/unix/file_manager_intf.ml | 13 ++++ src/irmin-pack/unix/gc.ml | 16 ++++- src/irmin-pack/unix/gc.mli | 8 +++ src/irmin-pack/unix/gc_worker.ml | 19 ++++-- src/irmin-pack/unix/gc_worker.mli | 7 +- src/irmin-pack/unix/io.ml | 6 ++ src/irmin-pack/unix/io_errors.ml | 3 +- src/irmin-pack/unix/io_intf.ml | 3 + src/irmin-pack/unix/s.ml | 9 +++ test/irmin-pack/test_gc.ml | 65 ++++++++++++++++++ test/irmin-pack/test_gc.mli | 4 ++ test/irmin-pack/test_pack.ml | 1 + 16 files changed, 270 insertions(+), 32 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index cc64a3c353..661a227bd9 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -9,6 +9,8 @@ latest gc was called on. (#2110, @icristescu) - Add `split` to create a new suffix chunk. Subsequent writes will append to this chunk until `split` is called again. (#2118, @icristescu) + - Add `create_one_commit_store` to create a new store from the existing one, + containing only one commit. (#2125, @icristescu) ### Changed diff --git a/src/irmin-pack/unix/errors.ml b/src/irmin-pack/unix/errors.ml index e9b88db383..18588cc9a6 100644 --- a/src/irmin-pack/unix/errors.ml +++ b/src/irmin-pack/unix/errors.ml @@ -71,7 +71,8 @@ type base_error = | `Invalid_read_of_gced_object of string | `Inconsistent_store | `Split_forbidden_during_batch - | `Multiple_empty_chunks ] + | `Multiple_empty_chunks + | `Forbidden_during_gc ] [@@deriving irmin ~pp] (** [base_error] is the type of most errors that can occur in a [result], except for errors that have associated exceptions (see below) and backend-specific diff --git a/src/irmin-pack/unix/ext.ml b/src/irmin-pack/unix/ext.ml index bb0c8529e6..92b4888906 100644 --- a/src/irmin-pack/unix/ext.ml +++ b/src/irmin-pack/unix/ext.ml @@ -243,24 +243,26 @@ module Maker (Config : Conf.S) = struct cancelled | None -> false - let start ~unlink ~use_auto_finalisation t commit_key = + let direct_commit_key t key = + let state : _ Pack_key.state = Pack_key.inspect key in + match state with + | Direct _ -> Ok key + | Indexed h -> ( + match Commit.CA.index_direct_with_kind t.commit h with + | None -> + Error + (`Commit_key_is_dangling + (Irmin.Type.to_string XKey.t key)) + | Some (k, _kind) -> Ok k) + + let start ~unlink ~use_auto_finalisation ~new_files_path t commit_key + = let open Result_syntax in [%log.info "GC: Starting on %a" pp_key commit_key]; let* () = if t.during_batch then Error `Gc_forbidden_during_batch else Ok () in - let* commit_key = - let state : _ Pack_key.state = Pack_key.inspect commit_key in - match state with - | Direct _ -> Ok commit_key - | Indexed h -> ( - match Commit.CA.index_direct_with_kind t.commit h with - | None -> - Error - (`Commit_key_is_dangling - (Irmin.Type.to_string XKey.t commit_key)) - | Some (k, _kind) -> Ok k) - in + let* commit_key = direct_commit_key t commit_key in let root = Conf.root t.config in let* () = if not (File_manager.gc_allowed t.fm) then Error `Gc_disallowed @@ -271,19 +273,21 @@ module Maker (Config : Conf.S) = struct let gc = Gc.v ~root ~generation:next_generation ~unlink ~dispatcher:t.dispatcher ~fm:t.fm ~contents:t.contents - ~node:t.node ~commit:t.commit commit_key + ~node:t.node ~commit:t.commit ~new_files_path commit_key in t.running_gc <- Some { gc; use_auto_finalisation }; Ok () - let start_exn ?(unlink = true) ~use_auto_finalisation t commit_key = + let start_exn ?(unlink = true) ~use_auto_finalisation ~new_files_path + t commit_key = match t.running_gc with | Some _ -> [%log.info "Repo is alreadying running GC. Skipping."]; Lwt.return false | None -> ( let result = - start ~unlink ~use_auto_finalisation t commit_key + start ~unlink ~use_auto_finalisation ~new_files_path t + commit_key in match result with | Ok _ -> Lwt.return true @@ -346,6 +350,43 @@ module Maker (Config : Conf.S) = struct Pack_key.v_direct ~offset ~length ~hash:entry.hash in Some key) + + let create_one_commit_store t commit_key path = + let () = + match Io.classify_path path with + | `Directory -> () + | `No_such_file_or_directory -> + Io.mkdir path |> Errs.raise_if_error + | _ -> Errs.raise_error `Invalid_layout + in + let commit_key = + direct_commit_key t commit_key |> Errs.raise_if_error + in + let* launched = + start_exn ~use_auto_finalisation:false ~new_files_path:path t + commit_key + in + let () = + if not launched then Errs.raise_error `Forbidden_during_gc + in + let* latest_gc_target_offset, suffix_start_offset = + match t.running_gc with + | None -> assert false + | Some { gc; _ } -> Gc.finalise_without_swap gc + in + let generation = File_manager.generation t.fm + 1 in + let config = Irmin.Backend.Conf.add t.config Conf.Key.root path in + let () = + File_manager.create_one_commit_store t.fm config ~generation + ~latest_gc_target_offset ~suffix_start_offset commit_key + |> Errs.raise_if_error + in + let branch_path = Irmin_pack.Layout.V4.branch ~root:path in + let* branch_store = + Branch.v ~fresh:true ~readonly:false branch_path + in + let* () = Branch.close branch_store in + Lwt.return_unit end let batch t f = @@ -533,6 +574,7 @@ module Maker (Config : Conf.S) = struct let flush = X.Repo.flush let fsync = X.Repo.fsync let split = X.Repo.split_exn + let create_one_commit_store = X.Repo.Gc.create_one_commit_store module Gc = struct type msg = [ `Msg of string ] @@ -563,13 +605,18 @@ module Maker (Config : Conf.S) = struct `Msg err_msg let finalise_exn = X.Repo.Gc.finalise_exn - let start_exn = X.Repo.Gc.start_exn ~use_auto_finalisation:false + + let start_exn ?unlink t = + let root = Irmin_pack.Conf.root t.X.Repo.config in + X.Repo.Gc.start_exn ?unlink ~use_auto_finalisation:false + ~new_files_path:root t let start repo commit_key = + let root = Irmin_pack.Conf.root repo.X.Repo.config in try let* started = - X.Repo.Gc.start_exn ~unlink:true ~use_auto_finalisation:true repo - commit_key + X.Repo.Gc.start_exn ~unlink:true ~use_auto_finalisation:true + ~new_files_path:root repo commit_key in Lwt.return_ok started with exn -> catch_errors "Start GC" exn diff --git a/src/irmin-pack/unix/file_manager.ml b/src/irmin-pack/unix/file_manager.ml index f305d7e859..8a0d7866dd 100644 --- a/src/irmin-pack/unix/file_manager.ml +++ b/src/irmin-pack/unix/file_manager.ml @@ -796,4 +796,62 @@ struct let chunk_start_idx = pl.chunk_start_idx in let chunk_num = pl.chunk_num in cleanup ~root ~generation ~chunk_start_idx ~chunk_num + + let create_one_commit_store t config ~generation ~latest_gc_target_offset + ~suffix_start_offset commit_key = + let open Result_syntax in + let src_root = t.root in + let dst_root = Irmin_pack.Conf.root config in + (* Step 1. Copy the dict *) + let src_dict = Irmin_pack.Layout.V4.dict ~root:src_root in + let dst_dict = Irmin_pack.Layout.V4.dict ~root:dst_root in + let* () = Io.copy_file ~src:src_dict ~dst:dst_dict in + (* Step 2. Create an empty suffix and close it. *) + let* suffix = + Suffix.create_rw ~root:dst_root ~overwrite:false + ~auto_flush_threshold:1_000_000 ~auto_flush_procedure:`Internal + ~start_idx:1 + in + let* () = Suffix.close suffix in + (* Step 3. Create the control file and close it. *) + let status = + Payload.Gced + { + suffix_start_offset; + generation; + latest_gc_target_offset; + suffix_dead_bytes = Int63.zero; + } + in + let dict_end_poff = Io.size_of_path dst_dict |> Errs.raise_if_error in + let pl = + { + Payload.dict_end_poff; + suffix_end_poff = Int63.zero; + checksum = Int63.zero; + status; + upgraded_from_v3_to_v4 = false; + chunk_num = 1; + chunk_start_idx = 1; + } + in + let path = Irmin_pack.Layout.V4.control ~root:dst_root in + let* control = Control.create_rw ~path ~overwrite:false pl in + let* () = Control.close control in + (* Step 4. Create the index. *) + let* index = + let log_size = Conf.index_log_size config in + let throttle = Conf.merge_throttle config in + Index.v ~fresh:true ~flush_callback:Fun.id ~readonly:false ~throttle + ~log_size dst_root + in + (* Step 5. Add the commit to the index, close the index. *) + let () = + match Pack_key.inspect commit_key with + | Pack_key.Direct { hash; offset; length } -> + Index.add index hash (offset, length, Pack_value.Kind.Commit_v2) + | Indexed _ -> assert false + in + let* () = Index.close index in + Ok () end diff --git a/src/irmin-pack/unix/file_manager_intf.ml b/src/irmin-pack/unix/file_manager_intf.ml index 66cf6ee928..215a44fe4e 100644 --- a/src/irmin-pack/unix/file_manager_intf.ml +++ b/src/irmin-pack/unix/file_manager_intf.ml @@ -256,6 +256,19 @@ module type S = sig val generation : t -> int val gc_allowed : t -> bool val split : t -> (unit, [> Errs.t ]) result + + val create_one_commit_store : + t -> + Irmin.Backend.Conf.t -> + generation:int -> + latest_gc_target_offset:int63 -> + suffix_start_offset:int63 -> + Index.key Pack_key.t -> + (unit, [> open_rw_error | close_error ]) result + (** [create_one_commit_store t conf generation new_store_root key] is called + when creating a new store at [new_store_root] from the existing one, + containing only one commit, specified by the [key]. Ths new store will use + configuration options from [conf] and set to [generation]. *) end module type Sigs = sig diff --git a/src/irmin-pack/unix/gc.ml b/src/irmin-pack/unix/gc.ml index 8445fd275e..fc97dd6c0b 100644 --- a/src/irmin-pack/unix/gc.ml +++ b/src/irmin-pack/unix/gc.ml @@ -42,8 +42,8 @@ module Make (Args : Gc_args.S) = struct latest_gc_target_offset : int63; } - let v ~root ~generation ~unlink ~dispatcher ~fm ~contents ~node ~commit - commit_key = + let v ~root ~new_files_path ~generation ~unlink ~dispatcher ~fm ~contents + ~node ~commit commit_key = let new_suffix_start_offset, latest_gc_target_offset = let state : _ Pack_key.state = Pack_key.inspect commit_key in match state with @@ -84,7 +84,7 @@ module Make (Args : Gc_args.S) = struct let task = Async.async (fun () -> Worker.run_and_output_result root commit_key new_suffix_start_offset - ~generation) + ~generation ~new_files_path) in let partial_stats = Gc_stats.Main.finish_current_step partial_stats "before finalise" @@ -282,6 +282,16 @@ module Make (Args : Gc_args.S) = struct | `Running -> Lwt.return_ok `Running | #Async.outcome as status -> go status) + let finalise_without_swap t = + let* status = Async.await t.task in + match status with + | `Success -> + Lwt.return (t.latest_gc_target_offset, t.new_suffix_start_offset) + | _ -> + let gc_output = read_gc_output ~root:t.root ~generation:t.generation in + let r = gc_errors status gc_output |> Errs.raise_if_error in + Lwt.return r + let on_finalise t f = (* Ignore returned promise since the purpose of this function is to add asynchronous callbacks to the GC diff --git a/src/irmin-pack/unix/gc.mli b/src/irmin-pack/unix/gc.mli index cd5836f6fe..2d5ea7f22d 100644 --- a/src/irmin-pack/unix/gc.mli +++ b/src/irmin-pack/unix/gc.mli @@ -25,6 +25,7 @@ module Make (Args : Gc_args.S) : sig val v : root:string -> + new_files_path:string -> generation:int -> unlink:bool -> dispatcher:Args.Dispatcher.t -> @@ -51,5 +52,12 @@ module Make (Args : Gc_args.S) : sig finalises. *) val cancel : t -> bool + + val finalise_without_swap : t -> (int63 * int63) Lwt.t + (** Waits for the current gc to finish and returns immediately without + swapping the files and doing the other finalisation steps from [finalise]. + + It returns the [latest_gc_target_offset] and the + [new_suffix_start_offset]. *) end with module Args = Args diff --git a/src/irmin-pack/unix/gc_worker.ml b/src/irmin-pack/unix/gc_worker.ml index 6d1c6c4cd5..9c1b70d22a 100644 --- a/src/irmin-pack/unix/gc_worker.ml +++ b/src/irmin-pack/unix/gc_worker.ml @@ -134,7 +134,7 @@ module Make (Args : Gc_args.S) = struct type gc_output = (gc_results, Args.Errs.t) result [@@deriving irmin] - let run ~generation root commit_key new_suffix_start_offset = + let run ~generation ~new_files_path root commit_key new_suffix_start_offset = let open Result_syntax in let config = Irmin_pack.Conf.init ~fresh:false ~readonly:true ~lru_size:0 root @@ -179,7 +179,7 @@ module Make (Args : Gc_args.S) = struct stats := Gc_stats.Worker.add_file_size !stats "mapping" mapping_size in (fun f -> - Mapping_file.create ~report_file_sizes ~root ~generation + Mapping_file.create ~report_file_sizes ~root:new_files_path ~generation ~register_entries:f () |> Errs.raise_if_error) @@ fun ~register_entry -> @@ -230,7 +230,9 @@ module Make (Args : Gc_args.S) = struct (* Step 4. Create the new prefix. *) stats := Gc_stats.Worker.finish_current_step !stats "prefix: start"; let prefix = - let path = Irmin_pack.Layout.V4.prefix ~root ~generation in + let path = + Irmin_pack.Layout.V4.prefix ~root:new_files_path ~generation + in Ao.create_rw_exn ~path in let () = @@ -262,7 +264,9 @@ module Make (Args : Gc_args.S) = struct Dispatcher.read_exn dispatcher accessor buf in let prefix = - let path = Irmin_pack.Layout.V4.prefix ~root ~generation in + let path = + Irmin_pack.Layout.V4.prefix ~root:new_files_path ~generation + in Io.open_ ~path ~readonly:false |> Errs.raise_if_error in Errors.finalise_exn (fun _outcome -> @@ -357,11 +361,12 @@ module Make (Args : Gc_args.S) = struct (* No one catches errors when this function terminates. Write the result in a file and terminate. *) - let run_and_output_result ~generation root commit_key new_suffix_start_offset - = + let run_and_output_result ~generation ~new_files_path root commit_key + new_suffix_start_offset = let result = Errs.catch (fun () -> - run ~generation root commit_key new_suffix_start_offset) + run ~generation ~new_files_path root commit_key + new_suffix_start_offset) in let write_result = write_gc_output ~root ~generation result in write_result |> Errs.log_if_error "writing gc output" diff --git a/src/irmin-pack/unix/gc_worker.mli b/src/irmin-pack/unix/gc_worker.mli index d7e359502c..540986ec45 100644 --- a/src/irmin-pack/unix/gc_worker.mli +++ b/src/irmin-pack/unix/gc_worker.mli @@ -23,7 +23,12 @@ module Make (Args : Gc_args.S) : sig module Args : Gc_args.S val run_and_output_result : - generation:int -> string -> Args.key -> int63 -> unit + generation:int -> + new_files_path:string -> + string -> + Args.key -> + int63 -> + unit type suffix_params = { start_offset : int63; diff --git a/src/irmin-pack/unix/io.ml b/src/irmin-pack/unix/io.ml index 29cfa7af17..2c4d44f9fe 100644 --- a/src/irmin-pack/unix/io.ml +++ b/src/irmin-pack/unix/io.ml @@ -267,6 +267,12 @@ module Unix = struct Ok () with Sys_error msg -> Error (`Sys_error msg) + let copy_file ~src ~dst = + let cmd = Filename.quote_command "cp" [ "-p"; src; dst ] in + match Sys.command cmd with + | 0 -> Ok () + | n -> Error (`Sys_error (Int.to_string n)) + let mkdir path = match (classify_path (Filename.dirname path), classify_path path) with | `Directory, `No_such_file_or_directory -> ( diff --git a/src/irmin-pack/unix/io_errors.ml b/src/irmin-pack/unix/io_errors.ml index 9dee4b415e..fd1dd736d6 100644 --- a/src/irmin-pack/unix/io_errors.ml +++ b/src/irmin-pack/unix/io_errors.ml @@ -75,7 +75,8 @@ module Make (Io : Io.S) : S with module Io = Io = struct | `Ro_not_allowed | `Io_misc of Io.misc_error | `Split_forbidden_during_batch - | `Multiple_empty_chunks ] + | `Multiple_empty_chunks + | `Forbidden_during_gc ] [@@deriving irmin] let raise_error = function diff --git a/src/irmin-pack/unix/io_intf.ml b/src/irmin-pack/unix/io_intf.ml index f33aa1d635..23e4ac77f5 100644 --- a/src/irmin-pack/unix/io_intf.ml +++ b/src/irmin-pack/unix/io_intf.ml @@ -78,6 +78,9 @@ module type S = sig val move_file : src:string -> dst:string -> (unit, [> `Sys_error of string ]) result + val copy_file : + src:string -> dst:string -> (unit, [> `Sys_error of string ]) result + val mkdir : string -> (unit, [> mkdir_error ]) result val unlink : string -> (unit, [> `Sys_error of string ]) result diff --git a/src/irmin-pack/unix/s.ml b/src/irmin-pack/unix/s.ml index 527b6516a2..2e0dbf14bd 100644 --- a/src/irmin-pack/unix/s.ml +++ b/src/irmin-pack/unix/s.ml @@ -53,6 +53,15 @@ module type S = sig TODO: Detail exceptions raised. *) + val create_one_commit_store : repo -> commit_key -> string -> unit Lwt.t + (** [create_one_commit_store t key path] creates a new store at [path] from + the existing one, containing only one commit, specified by the [key]. Note + that this operation is blocking. + + It requires that the files existing on disk when the operation is + launched, remain on disk until the operation completes. In particular, a + Gc running in a different process could remove files from disk. *) + module Gc : sig (** GC *) diff --git a/test/irmin-pack/test_gc.ml b/test/irmin-pack/test_gc.ml index 1547cc196c..9f9b3c9221 100644 --- a/test/irmin-pack/test_gc.ml +++ b/test/irmin-pack/test_gc.ml @@ -1133,3 +1133,68 @@ module Split = struct tc "Test commits and splits during GC" commits_and_splits_during_gc; ] end + +module Snapshot = struct + let export t commit = + let commit_key = S.Commit.key commit in + S.create_one_commit_store t.repo commit_key + + let snapshot_rw () = + let* t = init () in + let* t, c1 = commit_1 t in + let root_snap = Filename.concat t.root "snap" in + let* () = export t c1 root_snap in + [%log.debug "store works after export"]; + let* t = checkout_exn t c1 in + let* t, c2 = commit_2 t in + let* () = check_1 t c1 in + let* () = check_2 t c2 in + let* () = S.Repo.close t.repo in + [%log.debug "open store from import in rw"]; + let* t = init ~readonly:false ~fresh:false ~root:root_snap () in + let* t = checkout_exn t c1 in + let* () = check_1 t c1 in + let* () = check_not_found t c2 "c2 not commited yet" in + let* t, c2 = commit_2 t in + let* () = check_2 t c2 in + S.Repo.close t.repo + + let snapshot_import_in_ro () = + let* t = init () in + let* t, c1 = commit_1 t in + let root_snap = Filename.concat t.root "snap" in + let* () = export t c1 root_snap in + let* () = S.Repo.close t.repo in + [%log.debug "open store from import in ro"]; + let* t = init ~readonly:true ~fresh:false ~root:root_snap () in + let* t = checkout_exn t c1 in + let* () = check_1 t c1 in + S.Repo.close t.repo + + let snapshot_export_in_ro () = + let* t = init () in + let* t, c1 = commit_1 t in + let* () = S.Repo.close t.repo in + [%log.debug "open store in readonly to export"]; + let* t = init ~readonly:false ~fresh:false ~root:t.root () in + let root_snap = Filename.concat t.root "snap" in + let* () = export t c1 root_snap in + [%log.debug "store works after export in readonly"]; + let* t = checkout_exn t c1 in + let* () = check_1 t c1 in + let* () = S.Repo.close t.repo in + [%log.debug "open store from snapshot"]; + let* t = init ~readonly:false ~fresh:false ~root:root_snap () in + let* t = checkout_exn t c1 in + let* t, c2 = commit_2 t in + let* () = check_1 t c1 in + let* () = check_2 t c2 in + S.Repo.close t.repo + + let tests = + [ + tc "Import/export in rw" snapshot_rw; + tc "Import in ro" snapshot_import_in_ro; + tc "Export in ro" snapshot_export_in_ro; + ] +end diff --git a/test/irmin-pack/test_gc.mli b/test/irmin-pack/test_gc.mli index 3b145a55f3..b36357043e 100644 --- a/test/irmin-pack/test_gc.mli +++ b/test/irmin-pack/test_gc.mli @@ -26,6 +26,10 @@ module Split : sig val tests : unit Alcotest_lwt.test_case list end +module Snapshot : sig + val tests : unit Alcotest_lwt.test_case list +end + module Store : sig module S : Irmin_pack.S diff --git a/test/irmin-pack/test_pack.ml b/test/irmin-pack/test_pack.ml index eab5829cce..97e941b4fa 100644 --- a/test/irmin-pack/test_pack.ml +++ b/test/irmin-pack/test_pack.ml @@ -553,4 +553,5 @@ let misc = ("layout", Layout.tests); ("dispatcher", Test_dispatcher.tests); ("corrupted", Test_corrupted.tests); + ("snapshot_gc", Test_gc.Snapshot.tests); ]