From 297b557bf3c8ef087b95c384d0dc88a0301752b7 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 11 Mar 2025 20:50:00 +0000 Subject: [PATCH 1/4] SST sub-blocks and block types (#475) Add block types, so that based on the content of blocks, a block can be split into different arrangements of sub-blocks. This will allow for head/tail of the block with index entries to be exposed, to allow for more efficient 2i. Also in general, when looking for the nth element, only a sub-block will need to be opened. The code trades performance for neatness, with precise pattern matching used to avoid overheads with copying when concatenating lists. --- include/leveled.hrl | 28 +- src/leveled_bookie.erl | 82 +-- src/leveled_codec.erl | 16 +- src/leveled_log.erl | 2 + src/leveled_sst.erl | 902 ++++++++++++++++------------- src/leveled_sstblock.erl | 808 ++++++++++++++++++++++++++ src/leveled_tree.erl | 120 ++-- src/leveled_util.erl | 2 +- test/end_to_end/iterator_SUITE.erl | 41 ++ test/end_to_end/perf_SUITE.erl | 12 +- test/end_to_end/recovery_SUITE.erl | 38 +- test/end_to_end/riak_SUITE.erl | 180 +++++- 12 files changed, 1712 insertions(+), 519 deletions(-) create mode 100644 src/leveled_sstblock.erl diff --git a/include/leveled.hrl b/include/leveled.hrl index e6596330..4c5fe810 100644 --- a/include/leveled.hrl +++ b/include/leveled.hrl @@ -27,6 +27,7 @@ -define(SST_PAGECACHELEVEL_LOOKUP, 4). -define(DEFAULT_STATS_PERC, 10). -define(DEFAULT_SYNC_STRATEGY, none). +-define(DEFAULT_BLOCK_VERSION, 1). %%%============================================================================ %%%============================================================================ @@ -105,17 +106,22 @@ :: leveled_monitor:monitor()}). -record(sst_options, - {press_method = ?COMPRESSION_METHOD - :: leveled_sst:press_method(), - press_level = ?COMPRESSION_LEVEL :: non_neg_integer(), - log_options = leveled_log:get_opts() - :: leveled_log:log_options(), - max_sstslots = ?MAX_SSTSLOTS :: pos_integer()|infinity, - max_mergebelow = ?MAX_MERGEBELOW :: pos_integer()|infinity, - pagecache_level = ?SST_PAGECACHELEVEL_NOLOOKUP - :: pos_integer(), - monitor = {no_monitor, 0} - :: leveled_monitor:monitor()}). + { + press_method = ?COMPRESSION_METHOD + :: leveled_sst:press_method(), + block_version = ?DEFAULT_BLOCK_VERSION + :: leveled_sst:block_version(), + press_level = ?COMPRESSION_LEVEL :: non_neg_integer(), + log_options = leveled_log:get_opts() + :: leveled_log:log_options(), + max_sstslots = ?MAX_SSTSLOTS :: pos_integer()|infinity, + max_mergebelow = ?MAX_MERGEBELOW :: pos_integer()|infinity, + pagecache_level = ?SST_PAGECACHELEVEL_NOLOOKUP + :: pos_integer(), + monitor = {no_monitor, 0} + :: leveled_monitor:monitor() + } + ). -record(inker_options, {cdb_max_size :: integer() | undefined, diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index 5236d448..372afd5f 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -127,6 +127,7 @@ {ledger_preloadpagecache_level, ?SST_PAGECACHELEVEL_LOOKUP}, {compression_method, ?COMPRESSION_METHOD}, {ledger_compression, as_store}, + {block_version, 1}, {compression_point, ?COMPRESSION_POINT}, {compression_level, ?COMPRESSION_LEVEL}, {log_level, ?LOG_LEVEL}, @@ -309,6 +310,10 @@ % Define an alternative to the compression method to be used by the % ledger only. Default is as_store - use the method defined as % compression_method for the whole store + {block_version, 0|1} | + % Version of the leveled_sst blocks. Block version 0 does not use + % sub-blocks, whereas block version 1 has multiple types of blocks + % which can be split into sub-blocks {compression_point, on_compact|on_receipt} | % The =compression point can be changed between on_receipt (all % values are compressed as they are received), to on_compact where @@ -1884,45 +1889,54 @@ set_options(Opts, Monitor) -> end, CompressionLevel = proplists:get_value(compression_level, Opts), + BlockVersion = proplists:get_value(block_version, Opts), MaxSSTSlots = proplists:get_value(max_sstslots, Opts), MaxMergeBelow = proplists:get_value(max_mergebelow, Opts), ScoreOneIn = proplists:get_value(journalcompaction_scoreonein, Opts), - {#inker_options{root_path = JournalFP, - reload_strategy = ReloadStrategy, - max_run_length = proplists:get_value(max_run_length, Opts), - singlefile_compactionperc = SFL_CompPerc, - maxrunlength_compactionperc = MRL_CompPerc, - waste_retention_period = WRP, - snaptimeout_long = SnapTimeoutLong, - compression_method = JournalCompression, - compress_on_receipt = CompressOnReceipt, - score_onein = ScoreOneIn, - cdb_options = - #cdb_options{ - max_size = MaxJournalSize, - max_count = MaxJournalCount, - binary_mode = true, - sync_strategy = SyncStrat, - log_options = leveled_log:get_opts(), - monitor = Monitor}, - monitor = Monitor}, - #penciller_options{root_path = LedgerFP, - max_inmemory_tablesize = PCLL0CacheSize, - levelzero_cointoss = true, - snaptimeout_short = SnapTimeoutShort, - snaptimeout_long = SnapTimeoutLong, - sst_options = - #sst_options{ - press_method = LedgerCompression, - press_level = CompressionLevel, - log_options = leveled_log:get_opts(), - max_sstslots = MaxSSTSlots, - max_mergebelow = MaxMergeBelow, - monitor = Monitor}, - monitor = Monitor} - }. + { + #inker_options{ + root_path = JournalFP, + reload_strategy = ReloadStrategy, + max_run_length = proplists:get_value(max_run_length, Opts), + singlefile_compactionperc = SFL_CompPerc, + maxrunlength_compactionperc = MRL_CompPerc, + waste_retention_period = WRP, + snaptimeout_long = SnapTimeoutLong, + compression_method = JournalCompression, + compress_on_receipt = CompressOnReceipt, + score_onein = ScoreOneIn, + cdb_options = + #cdb_options{ + max_size = MaxJournalSize, + max_count = MaxJournalCount, + binary_mode = true, + sync_strategy = SyncStrat, + log_options = leveled_log:get_opts(), + monitor = Monitor + }, + monitor = Monitor + }, + #penciller_options{ + root_path = LedgerFP, + max_inmemory_tablesize = PCLL0CacheSize, + levelzero_cointoss = true, + snaptimeout_short = SnapTimeoutShort, + snaptimeout_long = SnapTimeoutLong, + sst_options = + #sst_options{ + press_method = LedgerCompression, + press_level = CompressionLevel, + block_version = BlockVersion, + log_options = leveled_log:get_opts(), + max_sstslots = MaxSSTSlots, + max_mergebelow = MaxMergeBelow, + monitor = Monitor + }, + monitor = Monitor + } + }. -spec return_snapfun( diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index 81797993..ead95a41 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -468,16 +468,16 @@ isvalid_ledgerkey(_LK) -> %% false and further results may be required from further ranges. endkey_passed(all, _) -> false; -endkey_passed({K1, null, null, null}, {K1, _, _, _}) -> - false; -endkey_passed({K1, K2, null, null}, {K1, K2, _, _}) -> - false; -endkey_passed({K1, K2, K3, null}, {K1, K2, K3, _}) -> - false; -endkey_passed({K1, null}, {K1, _}) -> +endkey_passed({KQ1, null, null, null}, {KR1, _, _, _}) when KQ1 =/= null -> + KQ1 < KR1; +endkey_passed({K1, KQ2, null, null}, {K1, KR2, _, _}) when KQ2 =/= null -> + KQ2 < KR2; +endkey_passed({K1, K2, KQ3, null}, {K1, K2, KR3, _}) when KQ3 =/= null -> + KQ3 < KR3; +endkey_passed({KQ1, null}, {KR1, _}) when KQ1 =/= null -> % See leveled_sst SlotIndex implementation. Here keys may be slimmed to % single binaries or two element tuples before forming the index. - false; + KQ1 < KR1; endkey_passed(null, _) -> false; endkey_passed(QueryEndKey, RangeEndKey) -> diff --git a/src/leveled_log.erl b/src/leveled_log.erl index afa96425..d370dfbc 100644 --- a/src/leveled_log.erl +++ b/src/leveled_log.erl @@ -218,6 +218,8 @@ {info, <<"SST merge list build timings of fold_toslot=~w slot_hashlist=~w slot_serialise=~w slot_finish=~w is_basement=~w level=~w">>}, sst14 => {debug, <<"File ~s has completed BIC">>}, + sst15 => + {warning, <<"Default returned from block due to handling error ~0p">>}, i0001 => {info, <<"Unexpected failure to fetch value for Key=~w SQN=~w with reason ~w">>}, i0002 => diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index ddf0d988..e035cc7a 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -134,6 +134,8 @@ -export([in_range/3]). +-export([hmac/1, filterby_midblock/2]). + -record(slot_index_value, {slot_id :: integer(), start_position :: integer(), @@ -153,6 +155,10 @@ :: #slot_index_value{}. -type press_method() :: lz4|native|zstd|none. +-type block_version() + :: 0|1. +-type block_method() + :: {block_version(), press_method()}. -type range_endpoint() :: all|leveled_codec:ledger_key(). -type slot_pointer() @@ -227,7 +233,7 @@ root_path, filename, read_state :: read_state() | undefined, - compression_method = native :: press_method(), + block_method = {0, native} :: block_method(), index_moddate = ?INDEX_MODDATE :: boolean(), starting_pid :: pid()|undefined, new_slots :: list()|undefined, @@ -256,6 +262,8 @@ sst_pointer/0, slot_pointer/0, press_method/0, + block_version/0, + block_method/0, segment_check_fun/0, sst_options/0 ] @@ -568,6 +576,7 @@ starting({call, From}, leveled_log:save(OptsSST#sst_options.log_options), Monitor = OptsSST#sst_options.monitor, PressMethod = OptsSST#sst_options.press_method, + BlockVersion = OptsSST#sst_options.block_version, {Length, SlotIndex, BlockEntries, SlotsBin, Bloom} = build_all_slots(SlotList), {_, BlockIndexCache, HighModDate} = @@ -581,8 +590,14 @@ starting({call, From}, SlotIndex, Level, FirstKey, Length, MaxSQN, Bloom, CountOfTombs), ActualFilename = write_file( - RootPath, Filename, SummaryBin, SlotsBin, - PressMethod, IdxModDate, CountOfTombs), + RootPath, + Filename, + SummaryBin, + SlotsBin, + {BlockVersion, PressMethod}, + IdxModDate, + CountOfTombs + ), {UpdState, Bloom} = read_file( ActualFilename, @@ -632,6 +647,7 @@ starting(cast, complete_l0startup, State) -> leveled_log:save(OptsSST#sst_options.log_options), Monitor = OptsSST#sst_options.monitor, PressMethod = OptsSST#sst_options.press_method, + BlockVersion= OptsSST#sst_options.block_version, FetchFun = fun(Slot) -> lists:nth(Slot, FetchedSlots) end, KVList = leveled_pmem:to_list(length(FetchedSlots), FetchFun), Time0 = timer:now_diff(os:timestamp(), SW0), @@ -660,8 +676,15 @@ starting(cast, complete_l0startup, State) -> SW4 = os:timestamp(), ActualFilename = - write_file(RootPath, Filename, SummaryBin, SlotsBin, - PressMethod, IdxModDate, not_counted), + write_file( + RootPath, + Filename, + SummaryBin, + SlotsBin, + {BlockVersion, PressMethod}, + IdxModDate, + not_counted + ), {UpdState, Bloom} = read_file( ActualFilename, @@ -740,7 +763,7 @@ reader({call, From}, LedgerKey, Hash, State#state.summary, - State#state.compression_method, + State#state.block_method, State#state.high_modified_date, State#state.index_moddate, RS#read_state.filter_fun, @@ -791,17 +814,17 @@ reader({call, From}, reader({call, From}, {get_slots, SlotList, SegChecker, LowLastMod}, State = #state{read_state = RS}) when ?IS_DEF(RS) -> - PressMethod = State#state.compression_method, + BlockMethod = State#state.block_method, IdxModDate = State#state.index_moddate, {NeedBlockIdx, SlotBins} = read_slots( RS#read_state.handle, SlotList, {SegChecker, LowLastMod, RS#read_state.blockindex_cache}, - State#state.compression_method, + State#state.block_method, State#state.index_moddate), {keep_state_and_data, - [{reply, From, {NeedBlockIdx, SlotBins, PressMethod, IdxModDate}}]}; + [{reply, From, {NeedBlockIdx, SlotBins, BlockMethod, IdxModDate}}]}; reader({call, From}, get_maxsequencenumber, State) -> Summary = State#state.summary, {keep_state_and_data, @@ -877,7 +900,7 @@ delete_pending({call, From}, fetch( LedgerKey, Hash, State#state.summary, - State#state.compression_method, + State#state.block_method, State#state.high_modified_date, State#state.index_moddate, RS#read_state.filter_fun, @@ -914,17 +937,17 @@ delete_pending( {call, From}, {get_slots, SlotList, SegChecker, LowLastMod}, State = #state{read_state = RS}) when ?IS_DEF(RS) -> - PressMethod = State#state.compression_method, + BlockMethod = State#state.block_method, IdxModDate = State#state.index_moddate, {_NeedBlockIdx, SlotBins} = read_slots( RS#read_state.handle, SlotList, {SegChecker, LowLastMod, RS#read_state.blockindex_cache}, - PressMethod, + BlockMethod, IdxModDate), {keep_state_and_data, - [{reply, From, {false, SlotBins, PressMethod, IdxModDate}}, + [{reply, From, {false, SlotBins, BlockMethod, IdxModDate}}, ?DELETE_TIMEOUT]}; delete_pending( {call, From}, @@ -1398,7 +1421,7 @@ check_modified(_, _, _) -> leveled_codec:ledger_key(), leveled_codec:segment_hash(), sst_summary(), - press_method(), + block_method(), non_neg_integer()|undefined, boolean(), summary_filter(), @@ -1417,7 +1440,7 @@ check_modified(_, _, _) -> %% not_present if the key is not in the store. fetch(LedgerKey, Hash, Summary, - PressMethod, HighModDate, IndexModDate, FilterFun, BIC, FetchCache, + BlockMethod, HighModDate, IndexModDate, FilterFun, BIC, FetchCache, Handle, Level, Monitor) -> SW0 = leveled_monitor:maybe_time(Monitor), Slot = @@ -1430,7 +1453,7 @@ fetch(LedgerKey, Hash, SlotBin = read_slot(Handle, Slot), {Result, Header} = binaryslot_get( - SlotBin, LedgerKey, Hash, PressMethod, IndexModDate), + SlotBin, LedgerKey, Hash, BlockMethod, IndexModDate), {_UpdateState, BIC0, HMD0} = update_blockindex_cache( [{SlotID, Header}], BIC, HighModDate, IndexModDate), @@ -1469,7 +1492,7 @@ fetch(LedgerKey, Hash, BlockLengths, byte_size(PosBin), LedgerKey, - PressMethod, + BlockMethod, IndexModDate ), case Result of @@ -1549,11 +1572,11 @@ maxslots_level(_Level, MaxSlotCount) -> 2 * MaxSlotCount. write_file(RootPath, Filename, SummaryBin, SlotsBin, - PressMethod, IdxModDate, CountOfTombs) -> + BlockMethod, IdxModDate, CountOfTombs) -> SummaryLength = byte_size(SummaryBin), SlotsLength = byte_size(SlotsBin), {PendingName, FinalName} = generate_filenames(Filename), - FileVersion = gen_fileversion(PressMethod, IdxModDate, CountOfTombs), + FileVersion = gen_fileversion(BlockMethod, IdxModDate, CountOfTombs), case filelib:is_file(filename:join(RootPath, FinalName)) of true -> AltName = filename:join(RootPath, filename:basename(FinalName)) @@ -1564,14 +1587,19 @@ write_file(RootPath, Filename, SummaryBin, SlotsBin, ok end, - ok = leveled_util:safe_rename(filename:join(RootPath, PendingName), - filename:join(RootPath, FinalName), - <>, - false), + ok = + leveled_util:safe_rename( + filename:join(RootPath, PendingName), + filename:join(RootPath, FinalName), + << + FileVersion:8/integer, + SlotsLength:32/integer, + SummaryLength:32/integer, + SlotsBin/binary, + SummaryBin/binary + >>, + false + ), FinalName. read_file(Filename, State, LoadPageCache, BIC, Level) -> @@ -1611,7 +1639,7 @@ read_file(Filename, State, LoadPageCache, BIC, Level) -> }, Bloom}. -gen_fileversion(PressMethod, IdxModDate, CountOfTombs) -> +gen_fileversion({BlockVersion, PressMethod}, IdxModDate, CountOfTombs) -> % Native or none can be treated the same once written, as reader % does not need to know as compression info will be in header of the % block @@ -1643,16 +1671,38 @@ gen_fileversion(PressMethod, IdxModDate, CountOfTombs) -> _ -> 0 end, - Bit1 + Bit2 + Bit3 + Bit4. + Bit5 = + case BlockVersion of + 0 -> + 0; + 1 -> + 16 + end, + Bit1 + Bit2 + Bit3 + Bit4 + Bit5. imp_fileversion(VersionInt, State) -> - UpdState0 = + CompressionMethod0 = case VersionInt band 1 of 0 -> - State#state{compression_method = native}; + native; 1 -> - State#state{compression_method = lz4} + lz4 + end, + CompressionMethod = + case VersionInt band 8 of + 0 -> + CompressionMethod0; + 8 -> + zstd + end, + BlockVersion = + case VersionInt band 16 of + 0 -> + 0; + 16 -> + 1 end, + UpdState0 = State#state{block_method = {BlockVersion, CompressionMethod}}, UpdState1 = case VersionInt band 2 of 0 -> @@ -1667,12 +1717,7 @@ imp_fileversion(VersionInt, State) -> 4 -> UpdState1#state{tomb_count = 0} end, - case VersionInt band 8 of - 0 -> - UpdState2; - 8 -> - UpdState2#state{compression_method = zstd} - end. + UpdState2. open_reader(Filename, LoadPageCache) -> {ok, Handle} = file:open(Filename, [binary, raw, read]), @@ -1766,7 +1811,6 @@ build_all_slots( lists:append(HashList, HashLists) ). - generate_filenames(RootFilename) -> Ext = filename:extension(RootFilename), Components = filename:split(RootFilename), @@ -1781,64 +1825,6 @@ generate_filenames(RootFilename) -> filename:join(DN, FP_NOEXT) ++ ".sst"} end. - --spec serialise_block(any(), press_method()) -> binary(). -%% @doc -%% Convert term to binary -%% Function split out to make it easier to experiment with different -%% compression methods. Also, perhaps standardise applictaion of CRC -%% checks -serialise_block(Term, lz4) -> - {ok, Bin} = lz4:pack(term_to_binary(Term)), - CRC32 = hmac(Bin), - <>; -serialise_block(Term, native) -> - Bin = term_to_binary(Term, ?BINARY_SETTINGS), - CRC32 = hmac(Bin), - <>; -serialise_block(Term, zstd) -> - Bin = zstd:compress(term_to_binary(Term)), - CRC32 = hmac(Bin), - <>; -serialise_block(Term, none) -> - Bin = term_to_binary(Term), - CRC32 = hmac(Bin), - <>. - --spec deserialise_block(binary(), press_method()) -> list(leveled_codec:ledger_kv()). -%% @doc -%% Convert binary to term -%% Function split out to make it easier to experiment with different -%% compression methods. -%% -%% If CRC check fails we treat all the data as missing -deserialise_block(Bin, PressMethod) when byte_size(Bin) > 4 -> - BinS = byte_size(Bin) - 4, - <> = Bin, - try - CRC32 = hmac(TermBin), - deserialise_checkedblock(TermBin, PressMethod) - catch - _Exception:_Reason -> - [] - end; -deserialise_block(_Bin, _PM) -> - []. - -deserialise_checkedblock(Bin, lz4) when is_binary(Bin) -> - case lz4:unpack(Bin) of - {ok, Bin0} when is_binary(Bin0) -> - binary_to_term(Bin0) - end; -deserialise_checkedblock(Bin, zstd) when is_binary(Bin) -> - case zstd:decompress(Bin) of - Bin0 when is_binary(Bin0) -> - binary_to_term(Bin0) - end; -deserialise_checkedblock(Bin, _Other) when is_binary(Bin) -> - % native or none can be treated the same - binary_to_term(Bin). - -spec hmac(binary()|integer()) -> integer(). %% @doc %% Perform a CRC check on an input @@ -2054,14 +2040,14 @@ take_max_lastmoddate(LMD, LMDAcc) -> -spec generate_binary_slot( leveled_codec:maybe_lookup(), {forward|reverse, list(leveled_codec:ledger_kv())}, - press_method(), + block_method(), boolean(), build_timings()) -> {binary_slot(), build_timings()}. %% @doc %% Generate the serialised slot to be used when storing this sublist of keys %% and values generate_binary_slot( - Lookup, {DR, KVL0}, PressMethod, IndexModDate, BuildTimings0) -> + Lookup, {DR, KVL0}, BlockMethod, IndexModDate, BuildTimings0) -> % The slot should be received reversed - get last key before flipping % accumulate_positions/2 should use the reversed KVL for efficiency {KVL, KVLr} = @@ -2104,45 +2090,55 @@ generate_binary_slot( {B1, B2, B3, B4, B5} = case length(KVL) of L when L =< SideBlockSize -> - {serialise_block(KVL, PressMethod), + { + leveled_sstblock:serialise_block(Lookup, BlockMethod, KVL), <<0:0>>, <<0:0>>, <<0:0>>, - <<0:0>>}; + <<0:0>> + }; L when L =< 2 * SideBlockSize -> {KVLA, KVLB} = lists:split(SideBlockSize, KVL), - {serialise_block(KVLA, PressMethod), - serialise_block(KVLB, PressMethod), + { + leveled_sstblock:serialise_block(Lookup, BlockMethod, KVLA), + leveled_sstblock:serialise_block(Lookup, BlockMethod, KVLB), <<0:0>>, <<0:0>>, - <<0:0>>}; + <<0:0>> + }; L when L =< (2 * SideBlockSize + MidBlockSize) -> {KVLA, KVLB_Rest} = lists:split(SideBlockSize, KVL), {KVLB, KVLC} = lists:split(SideBlockSize, KVLB_Rest), - {serialise_block(KVLA, PressMethod), - serialise_block(KVLB, PressMethod), - serialise_block(KVLC, PressMethod), + { + leveled_sstblock:serialise_block(Lookup, BlockMethod, KVLA), + leveled_sstblock:serialise_block(Lookup, BlockMethod, KVLB), + leveled_sstblock:serialise_block(Lookup, BlockMethod, KVLC), <<0:0>>, - <<0:0>>}; + <<0:0>> + }; L when L =< (3 * SideBlockSize + MidBlockSize) -> {KVLA, KVLB_Rest} = lists:split(SideBlockSize, KVL), {KVLB, KVLC_Rest} = lists:split(SideBlockSize, KVLB_Rest), {KVLC, KVLD} = lists:split(MidBlockSize, KVLC_Rest), - {serialise_block(KVLA, PressMethod), - serialise_block(KVLB, PressMethod), - serialise_block(KVLC, PressMethod), - serialise_block(KVLD, PressMethod), - <<0:0>>}; + { + leveled_sstblock:serialise_block(Lookup, BlockMethod, KVLA), + leveled_sstblock:serialise_block(Lookup, BlockMethod, KVLB), + leveled_sstblock:serialise_block(Lookup, BlockMethod, KVLC), + leveled_sstblock:serialise_block(Lookup, BlockMethod, KVLD), + <<0:0>> + }; L when L =< (4 * SideBlockSize + MidBlockSize) -> {KVLA, KVLB_Rest} = lists:split(SideBlockSize, KVL), {KVLB, KVLC_Rest} = lists:split(SideBlockSize, KVLB_Rest), {KVLC, KVLD_Rest} = lists:split(MidBlockSize, KVLC_Rest), {KVLD, KVLE} = lists:split(SideBlockSize, KVLD_Rest), - {serialise_block(KVLA, PressMethod), - serialise_block(KVLB, PressMethod), - serialise_block(KVLC, PressMethod), - serialise_block(KVLD, PressMethod), - serialise_block(KVLE, PressMethod)} + { + leveled_sstblock:serialise_block(Lookup, BlockMethod, KVLA), + leveled_sstblock:serialise_block(Lookup, BlockMethod, KVLB), + leveled_sstblock:serialise_block(Lookup, BlockMethod, KVLC), + leveled_sstblock:serialise_block(Lookup, BlockMethod, KVLD), + leveled_sstblock:serialise_block(Lookup, BlockMethod, KVLE) + } end, BuildTimings2 = update_buildtimings(BuildTimings1, slot_serialise), @@ -2163,25 +2159,38 @@ generate_binary_slot( Header = case IndexModDate of true -> - <>; + PosBinIndex/binary + >>; false -> - <> + PosBinIndex/binary + >> end, CheckH = hmac(Header), - SlotBin = <>, + SlotBin = + << + CheckB1P:32/integer, + B1P:32/integer, + CheckH:32/integer, + Header/binary, + B1/binary, + B2/binary, + B3/binary, + B4/binary, + B5/binary + >>, BuildTimings3 = update_buildtimings(BuildTimings2, slot_finish), @@ -2193,21 +2202,21 @@ generate_binary_slot( binary()|{file:io_device(), integer()}, binary(), integer(), - press_method(), + block_method(), boolean(), list()) -> list(leveled_codec:ledger_kv()). %% @doc %% Acc should start as not_present if LedgerKey is a key, and a list if %% LedgerKey is false -check_blocks_allkeys([], _BP, _BLs, _PBL, _PM, _IMD, Acc) -> +check_blocks_allkeys([], _BP, _BLs, _PBL, _BM, _IMD, Acc) -> lists:reverse(Acc); check_blocks_allkeys( [Pos|Rest], BlockPointer, BlockLengths, PosBinLength, - PressMethod, + BlockMethod, IdxModDate, Acc) -> {BlockNumber, BlockPos} = revert_position(Pos), @@ -2219,14 +2228,14 @@ check_blocks_allkeys( BlockNumber, additional_offset(IdxModDate) ), - case spawn_check_block(BlockPos, BlockBin, PressMethod) of + case spawn_check_block(BlockPos, BlockBin, BlockMethod) of {K, V} -> check_blocks_allkeys( Rest, BlockPointer, BlockLengths, PosBinLength, - PressMethod, + BlockMethod, IdxModDate, [{K, V}|Acc] ) @@ -2238,7 +2247,7 @@ check_blocks_allkeys( binary(), integer(), leveled_codec:ledger_key(), - press_method(), + block_method(), boolean()) -> not_present|leveled_codec:ledger_kv(). %% @doc @@ -2252,7 +2261,7 @@ check_blocks_matchkey( BlockLengths, PosBinLength, LedgerKeyToCheck, - PressMethod, + BlockMethod, IdxModDate) -> {BlockNumber, BlockPos} = revert_position(Pos), BlockBin = @@ -2262,7 +2271,7 @@ check_blocks_matchkey( BlockNumber, additional_offset(IdxModDate) ), - CheckResult = spawn_check_block(BlockPos, BlockBin, PressMethod), + CheckResult = spawn_check_block(BlockPos, BlockBin, BlockMethod), case {CheckResult, LedgerKeyToCheck} of {{K, V}, K} -> {K, V}; @@ -2273,23 +2282,25 @@ check_blocks_matchkey( BlockLengths, PosBinLength, LedgerKeyToCheck, - PressMethod, + BlockMethod, IdxModDate ) end. --spec spawn_check_block(non_neg_integer(), binary(), press_method()) +-spec spawn_check_block(non_neg_integer(), binary(), block_method()) -> not_present|leveled_codec:ledger_kv(). -spawn_check_block(BlockPos, BlockBin, PressMethod) -> +spawn_check_block(BlockPos, BlockBin, BlockMethod) -> Parent = self(), Pid = spawn_link( - fun() -> check_block(Parent, BlockPos, BlockBin, PressMethod) end + fun() -> + check_block(Parent, BlockPos, BlockBin, BlockMethod) + end ), receive {checked_block, Pid, R} -> R end. -check_block(From, BlockPos, BlockBin, PressMethod) -> - R = fetchfrom_rawblock(BlockPos, deserialise_block(BlockBin, PressMethod)), +check_block(From, BlockPos, BlockBin, BlockMethod) -> + R = leveled_sstblock:get_nth(BlockPos, BlockBin, BlockMethod), From ! {checked_block, self(), R}. -spec additional_offset(boolean()) -> pos_integer(). @@ -2304,12 +2315,11 @@ additional_offset(false) -> read_block({Handle, StartPos}, BlockLengths, PosBinLength, BlockID, AO) -> {Offset, Length} = block_offsetandlength(BlockLengths, BlockID), - {ok, BlockBin} = file:pread(Handle, - StartPos - + Offset - + PosBinLength - + AO, - Length), + {ok, BlockBin} = + file:pread( + Handle, + StartPos + Offset + PosBinLength + AO, Length + ), BlockBin; read_block(SlotBin, BlockLengths, PosBinLength, BlockID, AO) -> {Offset, Length} = block_offsetandlength(BlockLengths, BlockID), @@ -2318,9 +2328,12 @@ read_block(SlotBin, BlockLengths, PosBinLength, BlockID, AO) -> BlockBin. read_slot(Handle, Slot) -> - {ok, SlotBin} = file:pread(Handle, - Slot#slot_index_value.start_position, - Slot#slot_index_value.length), + {ok, SlotBin} = + file:pread( + Handle, + Slot#slot_index_value.start_position, + Slot#slot_index_value.length + ), SlotBin. -spec pointer_mapfun( @@ -2355,7 +2368,7 @@ binarysplit_mapfun(MultiSlotBin, StartPos) -> file:io_device(), list(), {segment_check_fun(), non_neg_integer(), blockindex_cache()}, - press_method(), + block_method(), boolean()) -> {boolean(), list(expanded_slot()|leveled_codec:ledger_kv())}. %% @doc @@ -2368,12 +2381,12 @@ binarysplit_mapfun(MultiSlotBin, StartPos) -> %% be considered as superior to a matching key - as otherwise a matching key %% may be intermittently removed from the result set read_slots(Handle, SlotList, {false, 0, _BlockIndexCache}, - _PressMethod, _IdxModDate) -> + _BlockMethod, _IdxModDate) -> % No list of segments passed or useful Low LastModified Date % Just read slots in SlotList {false, read_slotlist(SlotList, Handle)}; read_slots(Handle, SlotList, {SegChecker, LowLastMod, BlockIndexCache}, - PressMethod, IdxModDate) -> + BlockMethod, IdxModDate) -> % Potentially need to check the low last modified date, and also the % segment_check_fun against the index. If the index is cached, return the % KV pairs at this point, otherwise return the slot pointer so that the @@ -2423,7 +2436,7 @@ read_slots(Handle, SlotList, {SegChecker, LowLastMod, BlockIndexCache}, BlockIdx, {Handle, SP}, BlockLengths, - PressMethod, + BlockMethod, IdxModDate, SegChecker, {SK, EK}), @@ -2439,14 +2452,14 @@ read_slots(Handle, SlotList, {SegChecker, LowLastMod, BlockIndexCache}, binary(), binary()|{file:io_device(), integer()}, binary(), - press_method(), + block_method(), boolean(), segment_check_fun(), {range_endpoint(), range_endpoint()}) -> list(leveled_codec:ledger_kv()). checkblocks_segandrange( BlockIdx, SlotOrHandle, BlockLengths, - PressMethod, IdxModDate, SegChecker, {StartKey, EndKey}) -> + BlockMethod, IdxModDate, SegChecker, {StartKey, EndKey}) -> PositionList = find_pos(BlockIdx, SegChecker), KVL = check_blocks_allkeys( @@ -2454,7 +2467,7 @@ checkblocks_segandrange( SlotOrHandle, BlockLengths, byte_size(BlockIdx), - PressMethod, + BlockMethod, IdxModDate, [] ), @@ -2469,7 +2482,7 @@ read_slotlist(SlotList, Handle) -> -spec binaryslot_reader( list(expanded_slot()), - press_method(), + block_method(), boolean(), segment_check_fun(), list(expandable_pointer())) @@ -2486,7 +2499,7 @@ read_slotlist(SlotList, Handle) -> %% endpoints of the block are outside of the range, and leaving blocks already %% proven to be outside of the range unopened. binaryslot_reader( - SlotBinsToFetch, PressMethod, IdxModDate, SegChecker, SlotsToPoint) -> + SlotBinsToFetch, BlockMethod, IdxModDate, SegChecker, SlotsToPoint) -> % Two accumulators are added. % One to collect the list of keys and values found in the binary slots % (subject to range filtering if the slot is still deserialised at this @@ -2498,18 +2511,18 @@ binaryslot_reader( % loop state), and those caches can be used for future queries. binaryslot_reader( lists:reverse(SlotBinsToFetch), - PressMethod, + BlockMethod, IdxModDate, SegChecker, SlotsToPoint, [] ). -binaryslot_reader([], _PressMethod, _IdxModDate, _SegChecker, Acc, BIAcc) -> +binaryslot_reader([], _BlockMethod, _IdxModDate, _SegChecker, Acc, BIAcc) -> {Acc, BIAcc}; binaryslot_reader( [{SlotBin, ID, SK, EK}|Tail], - PressMethod, IdxModDate, SegChecker, Acc, BIAcc) -> + BlockMethod, IdxModDate, SegChecker, Acc, BIAcc) -> % The start key and end key here, may not the start key and end key the % application passed into the query. If the slot is known to lie entirely % inside the range, on either of both sides, the SK and EK may be @@ -2517,18 +2530,18 @@ binaryslot_reader( % entries in this slot to be trimmed from either or both sides. {TrimmedL, BICache} = binaryslot_trimmed( - SlotBin, SK, EK, PressMethod, IdxModDate, SegChecker, Acc + SlotBin, SK, EK, BlockMethod, IdxModDate, SegChecker, Acc ), binaryslot_reader( Tail, - PressMethod, + BlockMethod, IdxModDate, SegChecker, TrimmedL, [{ID, BICache}|BIAcc] ); -binaryslot_reader([{K, V}|Tail], PM, IMD, SC, Acc, BIAcc) -> - binaryslot_reader(Tail, PM, IMD, SC, [{K, V}|Acc], BIAcc). +binaryslot_reader([{K, V}|Tail], BM, IMD, SC, Acc, BIAcc) -> + binaryslot_reader(Tail, BM, IMD, SC, [{K, V}|Acc], BIAcc). read_length_list(Handle, LengthList) -> @@ -2559,9 +2572,9 @@ extract_header(Header, false) -> binary(), leveled_codec:ledger_key(), leveled_codec:segment_hash(), - press_method(), + block_method(), boolean()) -> {not_present|leveled_codec:ledger_kv(), binary()|none}. -binaryslot_get(FullBin, Key, Hash, PressMethod, IdxModDate) -> +binaryslot_get(FullBin, Key, Hash, BlockMethod, IdxModDate) -> case crc_check_slot(FullBin) of {Header, Blocks} -> {BlockLengths, _LMD, PosBinIndex} = @@ -2571,7 +2584,7 @@ binaryslot_get(FullBin, Key, Hash, PressMethod, IdxModDate) -> HashExtract when is_integer(HashExtract) -> find_pos(PosBinIndex, segment_checker(HashExtract)) end, - {fetch_value(PosList, BlockLengths, Blocks, Key, PressMethod), + {fetch_value(PosList, BlockLengths, Blocks, Key, BlockMethod), Header}; crc_wonky -> {not_present, none} @@ -2579,11 +2592,11 @@ binaryslot_get(FullBin, Key, Hash, PressMethod, IdxModDate) -> -spec binaryslot_tolist( binary(), - press_method(), + block_method(), boolean(), list(leveled_codec:ledger_kv()|expandable_pointer())) -> list(leveled_codec:ledger_kv()|expandable_pointer()). -binaryslot_tolist(FullBin, PressMethod, IdxModDate, InitAcc) -> +binaryslot_tolist(FullBin, BlockMethod, IdxModDate, InitAcc) -> case crc_check_slot(FullBin) of {Header, Blocks} -> {BlockLengths, _LMD, _PosBinIndex} = @@ -2600,7 +2613,7 @@ binaryslot_tolist(FullBin, PressMethod, IdxModDate, InitAcc) -> B5:B5L/binary>> = Blocks, lists:foldl( fun(B, Acc) -> - append(deserialise_block(B, PressMethod), Acc) + append(leveled_sstblock:get_all(B, BlockMethod), Acc) end, InitAcc, [B5, B4, B3, B2, B1] @@ -2613,7 +2626,7 @@ binaryslot_tolist(FullBin, PressMethod, IdxModDate, InitAcc) -> binary(), range_endpoint(), range_endpoint(), - press_method(), + block_method(), boolean(), segment_check_fun(), list(leveled_codec:ledger_kv()|expandable_pointer()) @@ -2623,10 +2636,10 @@ binaryslot_tolist(FullBin, PressMethod, IdxModDate, InitAcc) -> %% @doc %% Must return a trimmed and reversed list of results in the range binaryslot_trimmed( - FullBin, all, all, PressMethod, IdxModDate, false, Acc) -> - {binaryslot_tolist(FullBin, PressMethod, IdxModDate, Acc), none}; + FullBin, all, all, BlockMethod, IdxModDate, false, Acc) -> + {binaryslot_tolist(FullBin, BlockMethod, IdxModDate, Acc), none}; binaryslot_trimmed( - FullBin, StartKey, EndKey, PressMethod, IdxModDate, SegmentChecker, Acc + FullBin, StartKey, EndKey, BlockMethod, IdxModDate, SegmentChecker, Acc ) -> case {crc_check_slot(FullBin), SegmentChecker} of % Get a trimmed list of keys in the slot based on the range, trying @@ -2647,7 +2660,7 @@ binaryslot_trimmed( blocks_required( {StartKey, EndKey}, Block1, Block2, MidBlock, Block4, Block5, - PressMethod), + BlockMethod), {append(TrimmedKVL, Acc), none}; {{Header, _Blocks}, SegmentChecker} -> {BlockLengths, _LMD, BlockIdx} = @@ -2657,7 +2670,7 @@ binaryslot_trimmed( BlockIdx, FullBin, BlockLengths, - PressMethod, + BlockMethod, IdxModDate, SegmentChecker, {StartKey, EndKey}), @@ -2669,69 +2682,115 @@ binaryslot_trimmed( -spec blocks_required( {range_endpoint(), range_endpoint()}, binary(), binary(), binary(), binary(), binary(), - press_method()) -> list(leveled_codec:ledger_kv()). + block_method()) -> list(leveled_codec:ledger_kv()). blocks_required( - {StartKey, EndKey}, B1, B2, MidBlock, B4, B5, PressMethod) -> - MidBlockList = deserialise_block(MidBlock, PressMethod), - case filterby_midblock( - fetchends_rawblock(MidBlockList), {StartKey, EndKey}) of + {StartKey, EndKey}, B1, B2, MidBlock, B4, B5, BlockMethod) -> + {Top, Tail, MidBlockFetchFun} = + leveled_sstblock:get_topandtail(MidBlock, BlockMethod), + case filterby_midblock({Top, Tail}, {StartKey, EndKey}) of empty -> - append( - in_range(deserialise_block(B1, PressMethod), StartKey, EndKey), - in_range(deserialise_block(B2, PressMethod), StartKey, EndKey), - in_range(deserialise_block(B4, PressMethod), StartKey, EndKey), - in_range(deserialise_block(B5, PressMethod), StartKey, EndKey) + in_range( + append( + leveled_sstblock:get_all(B1, BlockMethod), + leveled_sstblock:get_all(B2, BlockMethod), + leveled_sstblock:get_all(B4, BlockMethod), + leveled_sstblock:get_all(B5, BlockMethod) + ), + StartKey, + EndKey ); all_blocks -> append( - get_lefthand_blocks(B1, B2, PressMethod, StartKey), - MidBlockList, - get_righthand_blocks(B4, B5, PressMethod, EndKey) + in_range( + get_lefthand_blocks( + B1, B2, BlockMethod, StartKey, EndKey), + StartKey, + all + ), + MidBlockFetchFun(all), + in_range( + get_righthand_blocks( + B4, B5, BlockMethod, StartKey, EndKey), + all, + EndKey + ) ); lt_mid -> in_range( - get_lefthand_blocks(B1, B2, PressMethod, StartKey), - all, - EndKey); + get_lefthand_blocks( + B1, B2, BlockMethod, StartKey, EndKey), + StartKey, + EndKey + ); le_mid -> - append( - get_lefthand_blocks(B1, B2, PressMethod, StartKey), - in_range(MidBlockList, all, EndKey) + in_range( + append( + get_lefthand_blocks( + B1, B2, BlockMethod, StartKey, EndKey), + MidBlockFetchFun(all) + ), + StartKey, + EndKey ); mid_only -> - in_range(MidBlockList, StartKey, EndKey); + in_range(MidBlockFetchFun(all), StartKey, EndKey); ge_mid -> - append( - in_range(MidBlockList, StartKey, all), - get_righthand_blocks(B4, B5, PressMethod, EndKey) + in_range( + append( + MidBlockFetchFun(all), + get_righthand_blocks( + B4, B5, BlockMethod, all, EndKey) + ), + StartKey, + EndKey ); gt_mid -> in_range( - get_righthand_blocks(B4, B5, PressMethod, EndKey), + get_righthand_blocks( + B4, B5, BlockMethod, StartKey, EndKey), StartKey, - all) + EndKey + ) end. -get_lefthand_blocks(B1, B2, PressMethod, StartKey) -> - BlockList2 = deserialise_block(B2, PressMethod), - case previous_block_required( - fetchends_rawblock(BlockList2), StartKey) of +get_lefthand_blocks(B1, B2, BlockMethod, StartKey, EndKey) -> + {Top, Tail, InnerLeftBlockFetchFun} = + leveled_sstblock:get_topandtail(B2, BlockMethod), + case previous_block_required({Top, Tail}, StartKey) of true -> - in_range(deserialise_block(B1, PressMethod), StartKey, all) - ++ BlockList2; + case this_leftblock_required({Top, Tail}, EndKey) of + true -> + append( + leveled_sstblock:get_all(B1, BlockMethod), + InnerLeftBlockFetchFun(all) + ); + false -> + {_, _, OuterLeftBlockFetchFun} = + leveled_sstblock:get_topandtail(B1, BlockMethod), + OuterLeftBlockFetchFun({StartKey, EndKey}) + end; false -> - in_range(BlockList2, StartKey, all) + InnerLeftBlockFetchFun({StartKey, EndKey}) end. -get_righthand_blocks(B4, B5, PressMethod, EndKey) -> - BlockList4 = deserialise_block(B4, PressMethod), - case next_block_required( - fetchends_rawblock(BlockList4), EndKey) of +get_righthand_blocks(B4, B5, BlockMethod, StartKey, EndKey) -> + {Top, Tail, InnerRightBlockFetchFun} = + leveled_sstblock:get_topandtail(B4, BlockMethod), + case next_block_required({Top, Tail}, EndKey) of true -> - BlockList4 - ++ in_range(deserialise_block(B5, PressMethod), all, EndKey); + case this_rightblock_required({Top, Tail}, StartKey) of + true -> + append( + InnerRightBlockFetchFun(all), + leveled_sstblock:get_all(B5, BlockMethod) + ); + false -> + {_, _, OuterRightBlockFetchFun} = + leveled_sstblock:get_topandtail(B5, BlockMethod), + OuterRightBlockFetchFun({StartKey, EndKey}) + end; false -> - in_range(BlockList4, all, EndKey) + InnerRightBlockFetchFun({StartKey, EndKey}) end. filterby_midblock({not_present, not_present}, _RangeKeys) -> @@ -2759,6 +2818,20 @@ filterby_midblock({MidFirst, MidLast}, {_StartKey, EndKey}) -> all_blocks end. +this_leftblock_required({not_present, not_present}, _EndKey) -> + true; +this_leftblock_required(_, all) -> + true; +this_leftblock_required({Top, _Tail}, EndKey) -> + not leveled_codec:endkey_passed(EndKey, Top). + +this_rightblock_required({not_present, not_present}, _StartKey) -> + true; +this_rightblock_required(_, all) -> + true; +this_rightblock_required({_Top, Tail}, StartKey) -> + Tail >= StartKey. + previous_block_required({not_present, not_present}, _SK) -> true; previous_block_required({FK, _LK}, StartKey) when FK < StartKey -> @@ -2780,12 +2853,53 @@ next_block_required({_FK, LK}, EndKey) -> in_range(KVL, all, all) -> KVL; in_range(KVL, all, EK) -> - lists:takewhile( - fun({K, _V}) -> not leveled_codec:endkey_passed(EK, K) end, KVL); + before_end(KVL, EK, []); in_range(KVL, SK, all) -> - lists:dropwhile(fun({K, _V}) -> K < SK end, KVL); + after_start(KVL, SK); in_range(KVL, SK, EK) -> - in_range(in_range(KVL, SK, all), all, EK). + before_end(after_start(KVL, SK), EK, []). + +-define(MAX_AHEAD, 12). +-define(CHECK_AHEAD, 8). + +before_end(KVL, EK, Acc) when length(KVL) > ?MAX_AHEAD -> + SkipCheck = + leveled_codec:endkey_passed( + EK, element(1, lists:nth(?CHECK_AHEAD, KVL))), + case SkipCheck of + true -> + append( + Acc, + lists:takewhile( + fun({K, _V}) -> not leveled_codec:endkey_passed(EK, K) end, + KVL + ) + ); + false -> + {B, MB} = lists:split(?CHECK_AHEAD, KVL), + before_end(MB, EK, append(Acc, B)) + end; +before_end(KVL, EK, Acc) -> + append( + Acc, + lists:takewhile( + fun({K, _V}) -> not leveled_codec:endkey_passed(EK, K) end, + KVL + ) + ). + +after_start(KVL, SK) when length(KVL) > ?MAX_AHEAD -> + SkipCheck = element(1, lists:nth(?CHECK_AHEAD, KVL)) < SK, + case SkipCheck of + true -> + {_B, MB} = lists:split(?CHECK_AHEAD, KVL), + after_start(MB, SK); + false -> + lists:dropwhile(fun({K, _V}) -> K < SK end, KVL) + end; +after_start(KVL, SK) -> + lists:dropwhile(fun({K, _V}) -> K < SK end, KVL). + crc_check_slot(FullBin) -> < binary(), binary(), leveled_codec:ledger_key(), - press_method()) -> not_present|leveled_codec:ledger_kv(). -fetch_value([], _BlockLengths, _Blocks, _Key, _PressMethod) -> + block_method()) -> not_present|leveled_codec:ledger_kv(). +fetch_value([], _BlockLengths, _Blocks, _Key, _BlockMethod) -> not_present; -fetch_value([Pos|Rest], BlockLengths, Blocks, Key, PressMethod) -> +fetch_value([Pos|Rest], BlockLengths, Blocks, Key, BlockMethod) -> {BlockNumber, BlockPos} = revert_position(Pos), {Offset, Length} = block_offsetandlength(BlockLengths, BlockNumber), <<_Pre:Offset/binary, Block:Length/binary, _Rest/binary>> = Blocks, - R = fetchfrom_rawblock(BlockPos, deserialise_block(Block, PressMethod)), + R = leveled_sstblock:get_nth(BlockPos, Block, BlockMethod), case R of {K, V} when K == Key -> {K, V}; _ -> - fetch_value(Rest, BlockLengths, Blocks, Key, PressMethod) + fetch_value(Rest, BlockLengths, Blocks, Key, BlockMethod) end. --spec fetchfrom_rawblock( - pos_integer(), list(leveled_codec:ledger_kv())) - -> not_present|leveled_codec:ledger_kv(). -%% @doc -%% Fetch from a deserialised block, but accounting for potential corruption -%% in that block which may lead to it returning as an empty list if that -%% corruption is detected by the deserialising function -fetchfrom_rawblock(BlockPos, RawBlock) when BlockPos > length(RawBlock) -> - %% Capture the slightly more general case than this being an empty list - %% in case of some other unexpected misalignement that would otherwise - %% crash the leveled_sst file process - not_present; -fetchfrom_rawblock(BlockPos, RawBlock) -> - lists:nth(BlockPos, RawBlock). - --spec fetchends_rawblock( - list(leveled_codec:ledger_kv())) - -> {not_present, not_present}| - {leveled_codec:ledger_key(), leveled_codec:ledger_key()}. -%% @doc -%% Fetch the first and last key from a block, and not_present if the block -%% is empty (rather than crashing) -fetchends_rawblock([]) -> - {not_present, not_present}; -fetchends_rawblock(RawBlock) -> - {element(1, hd(RawBlock)), - element(1, lists:last(RawBlock))}. - revert_position(Pos) -> {SideBlockSize, MidBlockSize} = ?LOOK_BLOCKSIZE, case Pos < 2 * SideBlockSize of @@ -2967,26 +3053,35 @@ append(L1, L2, L3, L4) -> %% Merge from a single list (i.e. at Level 0) merge_lists(KVList1, SSTOpts, IdxModDate) -> SlotCount = length(KVList1) div ?LOOK_SLOTSIZE, - {[], + { [], - split_lists(KVList1, [], - SlotCount, SSTOpts#sst_options.press_method, IdxModDate), + [], + split_lists( + KVList1,[], + SlotCount, + { + SSTOpts#sst_options.block_version, + SSTOpts#sst_options.press_method + }, + IdxModDate + ), element(1, lists:nth(1, KVList1)), - not_counted}. + not_counted + }. -split_lists([], SlotLists, 0, _PressMethod, _IdxModDate) -> +split_lists([], SlotLists, 0, _BlockMethod, _IdxModDate) -> lists:reverse(SlotLists); -split_lists(LastPuff, SlotLists, 0, PressMethod, IdxModDate) -> +split_lists(LastPuff, SlotLists, 0, BlockMethod, IdxModDate) -> {SlotD, _} = generate_binary_slot( - lookup, {forward, LastPuff}, PressMethod, IdxModDate, no_timing), + lookup, {forward, LastPuff}, BlockMethod, IdxModDate, no_timing), lists:reverse([SlotD|SlotLists]); -split_lists(KVList1, SlotLists, N, PressMethod, IdxModDate) -> +split_lists(KVList1, SlotLists, N, BlockMethod, IdxModDate) -> {Slot, KVListRem} = lists:split(?LOOK_SLOTSIZE, KVList1), {SlotD, _} = generate_binary_slot( - lookup, {forward, Slot}, PressMethod, IdxModDate, no_timing), - split_lists(KVListRem, [SlotD|SlotLists], N - 1, PressMethod, IdxModDate). + lookup, {forward, Slot}, BlockMethod, IdxModDate, no_timing), + split_lists(KVListRem, [SlotD|SlotLists], N - 1, BlockMethod, IdxModDate). -spec merge_lists( list(maybe_expanded_pointer()), @@ -3019,7 +3114,7 @@ merge_lists( null, 0, SSTOpts#sst_options.max_sstslots, - SSTOpts#sst_options.press_method, + {SSTOpts#sst_options.block_version, SSTOpts#sst_options.press_method}, IndexModDate, 0, BuildTimings @@ -3034,7 +3129,7 @@ merge_lists( leveled_codec:ledger_key()|null, non_neg_integer(), pos_integer()|infinity, - press_method(), + block_method(), boolean(), non_neg_integer(), build_timings()) -> @@ -3043,18 +3138,18 @@ merge_lists( non_neg_integer()}. merge_lists(KVL1, KVL2, LI, SlotList, FirstKey, MaxSlots, MaxSlots, - _PressMethod, _IdxModDate, CountOfTombs, T0) -> + _BlockMethod, _IdxModDate, CountOfTombs, T0) -> % This SST file is full, move to complete file, and return the % remainder log_buildtimings(T0, LI), {KVL1, KVL2, lists:reverse(SlotList), FirstKey, CountOfTombs}; merge_lists([], [], LI, SlotList, FirstKey, _SlotCount, _MaxSlots, - _PressMethod, _IdxModDate, CountOfTombs, T0) -> + _BlockMethod, _IdxModDate, CountOfTombs, T0) -> % the source files are empty, complete the file log_buildtimings(T0, LI), {[], [], lists:reverse(SlotList), FirstKey, CountOfTombs}; merge_lists(KVL1, KVL2, LI, SlotList, FirstKey, SlotCount, MaxSlots, - PressMethod, IdxModDate, CountOfTombs, T0) -> + BlockMethod, IdxModDate, CountOfTombs, T0) -> % Form a slot by merging the two lists until the next 128 K/V pairs have % been determined {KVRem1, KVRem2, Slot, FK0} = @@ -3063,34 +3158,38 @@ merge_lists(KVL1, KVL2, LI, SlotList, FirstKey, SlotCount, MaxSlots, case Slot of {_, []} -> % There were no actual keys in the slot (maybe some expired) - merge_lists(KVRem1, - KVRem2, - LI, - SlotList, - FK0, - SlotCount, - MaxSlots, - PressMethod, - IdxModDate, - CountOfTombs, - T1); + merge_lists( + KVRem1, + KVRem2, + LI, + SlotList, + FK0, + SlotCount, + MaxSlots, + BlockMethod, + IdxModDate, + CountOfTombs, + T1 + ); {Lookup, KVL} -> % Convert the list of KVs for the slot into a binary, and related % metadata {SlotD, T2} = generate_binary_slot( - Lookup, {reverse, KVL}, PressMethod, IdxModDate, T1), - merge_lists(KVRem1, - KVRem2, - LI, - [SlotD|SlotList], - FK0, - SlotCount + 1, - MaxSlots, - PressMethod, - IdxModDate, - leveled_codec:count_tombs(KVL, CountOfTombs), - T2) + Lookup, {reverse, KVL}, BlockMethod, IdxModDate, T1), + merge_lists( + KVRem1, + KVRem2, + LI, + [SlotD|SlotList], + FK0, + SlotCount + 1, + MaxSlots, + BlockMethod, + IdxModDate, + leveled_codec:count_tombs(KVL, CountOfTombs), + T2 + ) end. -spec form_slot( @@ -3313,13 +3412,13 @@ maybelog_fetch_timing({Pid, _SlotFreq}, Level, Type, SW) when is_pid(Pid), SW =/ -define(TEST_AREA, "test/test_area/"). binaryslot_trimmed( - FullBin, StartKey, EndKey, PressMethod, IdxModDate, SegmentChecker) -> + FullBin, StartKey, EndKey, BlockMethod, IdxModDate, SegmentChecker) -> binaryslot_trimmed( - FullBin, StartKey, EndKey, PressMethod, IdxModDate, SegmentChecker, [] + FullBin, StartKey, EndKey, BlockMethod, IdxModDate, SegmentChecker, [] ). -binaryslot_tolist(FullBin, PressMethod, IdxModDate) -> - binaryslot_tolist(FullBin, PressMethod, IdxModDate, []). +binaryslot_tolist(FullBin, BlockMethod, IdxModDate) -> + binaryslot_tolist(FullBin, BlockMethod, IdxModDate, []). sst_getkvrange(Pid, StartKey, EndKey, ScanWidth) -> @@ -3352,26 +3451,30 @@ sst_getkvrange(Pid, StartKey, EndKey, ScanWidth, SegChecker, LowLastMod) -> sst_getslots(Pid, SlotList) -> sst_getfilteredslots(Pid, SlotList, false, 0, []). -testsst_new(RootPath, Filename, Level, KVList, MaxSQN, PressMethod) -> +testsst_new( + RootPath, Filename, Level, KVList, MaxSQN, {BV, PM}) -> OptsSST = - #sst_options{press_method=PressMethod, - log_options=leveled_log:get_opts()}, + #sst_options{ + press_method=PM, + block_version=BV, + log_options=leveled_log:get_opts() + }, sst_new(RootPath, Filename, Level, KVList, MaxSQN, OptsSST, false). testsst_new(RootPath, Filename, - KVL1, KVL2, IsBasement, Level, MaxSQN, PressMethod) -> + KVL1, KVL2, IsBasement, Level, MaxSQN, {BV, PM}) -> OptsSST = - #sst_options{press_method=PressMethod, - log_options=leveled_log:get_opts()}, + #sst_options{ + press_method=PM, + block_version=BV, + log_options=leveled_log:get_opts() + }, sst_newmerge(RootPath, Filename, KVL1, KVL2, IsBasement, Level, MaxSQN, OptsSST, false). generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) -> - generate_randomkeys(Seqn, - Count, - [], - BucketRangeLow, - BucketRangeHigh). + generate_randomkeys( + Seqn, Count, [], BucketRangeLow, BucketRangeHigh). generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) -> Acc; @@ -3564,7 +3667,7 @@ indexed_list_test() -> {{_PosBinIndex1, FullBin, _HL, _LK}, no_timing} = generate_binary_slot( - lookup, {forward, KVL1}, native, ?INDEX_MODDATE, no_timing), + lookup, {forward, KVL1}, {0, native}, ?INDEX_MODDATE, no_timing), io:format(user, "Indexed list created slot in ~w microseconds of size ~w~n", [timer:now_diff(os:timestamp(), SW0), byte_size(FullBin)]), @@ -3594,7 +3697,7 @@ indexed_list_mixedkeys_test() -> {{_PosBinIndex1, FullBin, _HL, _LK}, no_timing} = generate_binary_slot( - lookup, {forward, Keys}, native, ?INDEX_MODDATE, no_timing), + lookup, {forward, Keys}, {0, native}, ?INDEX_MODDATE, no_timing), {TestK1, TestV1} = lists:nth(4, KVL1), MH1 = leveled_codec:segment_hash(TestK1), @@ -3622,7 +3725,7 @@ indexed_list_mixedkeys2_test() -> Keys = IdxKeys1 ++ KVL1 ++ IdxKeys2, {{_Header, FullBin, _HL, _LK}, no_timing} = generate_binary_slot( - lookup, {forward, Keys}, native, ?INDEX_MODDATE, no_timing), + lookup, {forward, Keys}, {0, native}, ?INDEX_MODDATE, no_timing), lists:foreach(fun({K, V}) -> MH = leveled_codec:segment_hash(K), test_binary_slot(FullBin, K, MH, {K, V}) @@ -3634,10 +3737,10 @@ indexed_list_allindexkeys_test() -> ?LOOK_SLOTSIZE), {{HeaderT, FullBinT, HL, LK}, no_timing} = generate_binary_slot( - lookup, {forward, Keys}, native, true, no_timing), + lookup, {forward, Keys}, {0, native}, true, no_timing), {{HeaderF, FullBinF, HL, LK}, no_timing} = generate_binary_slot( - lookup, {forward, Keys}, native, false, no_timing), + lookup, {forward, Keys}, {0, native}, false, no_timing), EmptySlotSize = ?LOOK_SLOTSIZE - 1, LMD = ?FLIPPER32, ?assertMatch(<<_BL:20/binary, LMD:32/integer, EmptySlotSize:8/integer>>, @@ -3645,8 +3748,8 @@ indexed_list_allindexkeys_test() -> ?assertMatch(<<_BL:20/binary, EmptySlotSize:8/integer>>, HeaderF), % SW = os:timestamp(), - BinToListT = binaryslot_tolist(FullBinT, native, true), - BinToListF = binaryslot_tolist(FullBinF, native, false), + BinToListT = binaryslot_tolist(FullBinT, {0, native}, true), + BinToListF = binaryslot_tolist(FullBinF, {0, native}, false), % io:format(user, % "Indexed list flattened in ~w microseconds ~n", % [timer:now_diff(os:timestamp(), SW)]), @@ -3655,37 +3758,37 @@ indexed_list_allindexkeys_test() -> ?assertMatch( {Keys, none}, binaryslot_trimmed( - FullBinT, all, all, native, true, false)), + FullBinT, all, all, {0, native}, true, false)), ?assertMatch(Keys, BinToListF), ?assertMatch( {Keys, none}, binaryslot_trimmed( - FullBinF, all, all, native, false, false)). + FullBinF, all, all, {0, native}, false, false)). indexed_list_allindexkeys_nolookup_test() -> Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(1000)), ?NOLOOK_SLOTSIZE), {{Header, FullBin, _HL, _LK}, no_timing} = generate_binary_slot( - no_lookup, {forward, Keys}, native, ?INDEX_MODDATE,no_timing), + no_lookup, {forward, Keys}, {0, native}, ?INDEX_MODDATE,no_timing), ?assertMatch(<<_BL:20/binary, _LMD:32/integer, 127:8/integer>>, Header), % SW = os:timestamp(), BinToList = - binaryslot_tolist(FullBin, native, ?INDEX_MODDATE), + binaryslot_tolist(FullBin, {0, native}, ?INDEX_MODDATE), % io:format(user, % "Indexed list flattened in ~w microseconds ~n", % [timer:now_diff(os:timestamp(), SW)]), ?assertMatch(Keys, BinToList), ?assertMatch( {Keys, none}, - binaryslot_trimmed(FullBin, all, all, native, ?INDEX_MODDATE, false)). + binaryslot_trimmed(FullBin, all, all, {0, native}, ?INDEX_MODDATE, false)). indexed_list_allindexkeys_trimmed_test() -> Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)), ?LOOK_SLOTSIZE), {{Header, FullBin, _HL, _LK}, no_timing} = generate_binary_slot( - lookup, {forward, Keys}, native, ?INDEX_MODDATE, no_timing), + lookup, {forward, Keys}, {0, native}, ?INDEX_MODDATE, no_timing), EmptySlotSize = ?LOOK_SLOTSIZE - 1, ?assertMatch( <<_BL:20/binary, _LMD:32/integer, EmptySlotSize:8/integer>>, @@ -3696,7 +3799,7 @@ indexed_list_allindexkeys_trimmed_test() -> FullBin, {i, "Bucket", {"t1_int", 0}, null}, {i, "Bucket", {"t1_int", 99999}, null}, - native, + {0, native}, ?INDEX_MODDATE, false)), @@ -3705,7 +3808,7 @@ indexed_list_allindexkeys_trimmed_test() -> R1 = lists:sublist(Keys, 10, 91), {O1, none} = binaryslot_trimmed( - FullBin, SK1, EK1, native, ?INDEX_MODDATE, false), + FullBin, SK1, EK1, {0, native}, ?INDEX_MODDATE, false), ?assertMatch(91, length(O1)), ?assertMatch(R1, O1), @@ -3713,7 +3816,7 @@ indexed_list_allindexkeys_trimmed_test() -> {EK2, _} = lists:nth(20, Keys), R2 = lists:sublist(Keys, 10, 11), {O2, none} = - binaryslot_trimmed(FullBin, SK2, EK2, native, ?INDEX_MODDATE, false), + binaryslot_trimmed(FullBin, SK2, EK2, {0, native}, ?INDEX_MODDATE, false), ?assertMatch(11, length(O2)), ?assertMatch(R2, O2), @@ -3721,7 +3824,7 @@ indexed_list_allindexkeys_trimmed_test() -> {EK3, _} = lists:nth(?LOOK_SLOTSIZE, Keys), R3 = lists:sublist(Keys, ?LOOK_SLOTSIZE - 1, 2), {O3, none} = - binaryslot_trimmed(FullBin, SK3, EK3, native, ?INDEX_MODDATE, false), + binaryslot_trimmed(FullBin, SK3, EK3, {0, native}, ?INDEX_MODDATE, false), ?assertMatch(2, length(O3)), ?assertMatch(R3, O3). @@ -3735,7 +3838,7 @@ indexed_list_mixedkeys_bitflip_test() -> Keys = lists:ukeysort(1, generate_indexkeys(60) ++ KVL1), {{Header, SlotBin, _HL, LK}, no_timing} = generate_binary_slot( - lookup, {forward, Keys}, native, ?INDEX_MODDATE, no_timing), + lookup, {forward, Keys}, {0, native}, ?INDEX_MODDATE, no_timing), ?assertMatch(LK, element(1, lists:last(Keys))), @@ -3755,7 +3858,7 @@ indexed_list_mixedkeys_bitflip_test() -> test_binary_slot(SlotBin, TestKey1, MH1, lists:nth(1, KVL1)), test_binary_slot(SlotBin, TestKey2, MH2, lists:nth(33, KVL1)), ToList = - binaryslot_tolist(SlotBin, native, ?INDEX_MODDATE), + binaryslot_tolist(SlotBin, {0, native}, ?INDEX_MODDATE), ?assertMatch(Keys, ToList), EH1 = case extract_hash(MH1) of Int1 when is_integer(Int1) -> Int1 end, @@ -3777,9 +3880,9 @@ indexed_list_mixedkeys_bitflip_test() -> test_binary_slot(SlotBin2, TestKey2, MH2, not_present), ToList1 = - binaryslot_tolist(SlotBin1, native, ?INDEX_MODDATE), + binaryslot_tolist(SlotBin1, {0, native}, ?INDEX_MODDATE), ToList2 = - binaryslot_tolist(SlotBin2, native, ?INDEX_MODDATE), + binaryslot_tolist(SlotBin2, {0, native}, ?INDEX_MODDATE), ?assertMatch(true, is_list(ToList1)), ?assertMatch(true, is_list(ToList2)), @@ -3793,7 +3896,7 @@ indexed_list_mixedkeys_bitflip_test() -> {SK1, _} = lists:nth(10, Keys), {EK1, _} = lists:nth(20, Keys), {O1, none} = - binaryslot_trimmed(SlotBin3, SK1, EK1, native, ?INDEX_MODDATE, false), + binaryslot_trimmed(SlotBin3, SK1, EK1, {0, native}, ?INDEX_MODDATE, false), ?assertMatch([], O1), SlotBin4 = flip_byte(SlotBin, 0, 20), @@ -3802,15 +3905,15 @@ indexed_list_mixedkeys_bitflip_test() -> test_binary_slot(SlotBin4, TestKey1, MH1, not_present), test_binary_slot(SlotBin5, TestKey1, MH1, not_present), ToList4 = - binaryslot_tolist(SlotBin4, native, ?INDEX_MODDATE), + binaryslot_tolist(SlotBin4, {0, native}, ?INDEX_MODDATE), ToList5 = - binaryslot_tolist(SlotBin5, native, ?INDEX_MODDATE), + binaryslot_tolist(SlotBin5, {0, native}, ?INDEX_MODDATE), ?assertMatch([], ToList4), ?assertMatch([], ToList5), {O4, none} = - binaryslot_trimmed(SlotBin4, SK1, EK1, native, ?INDEX_MODDATE, false), + binaryslot_trimmed(SlotBin4, SK1, EK1, {0, native}, ?INDEX_MODDATE, false), {O5, none} = - binaryslot_trimmed(SlotBin4, SK1, EK1, native, ?INDEX_MODDATE, false), + binaryslot_trimmed(SlotBin4, SK1, EK1, {0, native}, ?INDEX_MODDATE, false), ?assertMatch([], O4), ?assertMatch([], O5). @@ -3829,7 +3932,7 @@ flip_byte(Binary, Offset, Length) -> test_binary_slot(FullBin, Key, Hash, ExpectedValue) -> % SW = os:timestamp(), {ReturnedValue, _Header} = - binaryslot_get(FullBin, Key, Hash, native, ?INDEX_MODDATE), + binaryslot_get(FullBin, Key, Hash, {0, native}, ?INDEX_MODDATE), ?assertMatch(ExpectedValue, ReturnedValue). % io:format(user, "Fetch success in ~w microseconds ~n", % [timer:now_diff(os:timestamp(), SW)]). @@ -3863,8 +3966,7 @@ size_tester(KVL1, KVL2, N) -> {RP, Filename} = {?TEST_AREA, "doublesize_test"}, Opts = - #sst_options{press_method=native, - log_options=leveled_log:get_opts()}, + #sst_options{press_method=native, log_options=leveled_log:get_opts()}, {ok, SST1, _KD, _BB} = sst_newmerge( RP, Filename, KVL1, KVL2, false, ?DOUBLESIZE_LEVEL, N, Opts, false @@ -3909,9 +4011,9 @@ merge_tester(NewFunS, NewFunM) -> KVL3 = lists:ukeymerge(1, KVL1, KVL2), SW0 = os:timestamp(), {ok, P1, {FK1, LK1}, _Bloom1} = - NewFunS(?TEST_AREA, "level1_src", 1, KVL1, 6000, native), + NewFunS(?TEST_AREA, "level1_src", 1, KVL1, 6000, {0, native}), {ok, P2, {FK2, LK2}, _Bloom2} = - NewFunS(?TEST_AREA, "level2_src", 2, KVL2, 3000, native), + NewFunS(?TEST_AREA, "level2_src", 2, KVL2, 3000, {0, native}), ExpFK1 = element(1, lists:nth(1, KVL1)), ExpLK1 = element(1, lists:last(KVL1)), ExpFK2 = element(1, lists:nth(1, KVL2)), @@ -3935,7 +4037,7 @@ merge_tester(NewFunS, NewFunM) -> FK2 }], NewR = - NewFunM(?TEST_AREA, "level2_merge", ML1, ML2, false, 2, N * 2, native), + NewFunM(?TEST_AREA, "level2_merge", ML1, ML2, false, 2, N * 2, {0, native}), {ok, P3, {{Rem1, Rem2}, FK3, LK3}, _Bloom3} = NewR, ?assertMatch([], Rem1), ?assertMatch([], Rem2), @@ -3973,7 +4075,7 @@ simple_persisted_range_tester(SSTNewFun) -> [{FirstKey, _FV}|_Rest] = KVList1, {LastKey, _LV} = lists:last(KVList1), {ok, Pid, {FirstKey, LastKey}, _Bloom} = - SSTNewFun(RP, Filename, 1, KVList1, length(KVList1), native), + SSTNewFun(RP, Filename, 1, KVList1, length(KVList1), {0, native}), {o, B, K, null} = LastKey, SK1 = {o, B, K, 0}, @@ -4015,7 +4117,7 @@ simple_persisted_rangesegfilter_tester(SSTNewFun) -> [{FirstKey, _FV}|_Rest] = KVList1, {LastKey, _LV} = lists:last(KVList1), {ok, Pid, {FirstKey, LastKey}, _Bloom} = - SSTNewFun(RP, Filename, 1, KVList1, length(KVList1), native), + SSTNewFun(RP, Filename, 1, KVList1, length(KVList1), {0, native}), SK1 = element(1, lists:nth(124, KVList1)), SK2 = element(1, lists:nth(126, KVList1)), @@ -4107,7 +4209,8 @@ additional_range_test() -> lists:seq(?NOLOOK_SLOTSIZE + Gap + 1, 2 * ?NOLOOK_SLOTSIZE + Gap)), {ok, P1, {{Rem1, Rem2}, SK, EK}, _Bloom1} = - testsst_new(?TEST_AREA, "range1_src", IK1, IK2, false, 1, 9999, native), + testsst_new( + ?TEST_AREA, "range1_src", IK1, IK2, false, 1, 9999, {0, native}), ?assertMatch([], Rem1), ?assertMatch([], Rem2), ?assertMatch(SK, element(1, lists:nth(1, IK1))), @@ -4167,7 +4270,7 @@ simple_switchcache_tester() -> [{FirstKey, _FV}|_Rest] = KVList1, {LastKey, _LV} = lists:last(KVList1), {ok, OpenP4, {FirstKey, LastKey}, _Bloom1} = - testsst_new(RP, Filename, 4, KVList1, length(KVList1), native), + testsst_new(RP, Filename, 4, KVList1, length(KVList1), {0, native}), lists:foreach(fun({K, V}) -> ?assertMatch({K, V}, sst_get(OpenP4, K)) end, @@ -4187,8 +4290,8 @@ simple_switchcache_tester() -> end, KVList1), ok = sst_close(OpenP4), - OptsSST = #sst_options{press_method=native, - log_options=leveled_log:get_opts()}, + OptsSST = + #sst_options{press_method=native, log_options=leveled_log:get_opts()}, {ok, OpenP5, {FirstKey, LastKey}, _Bloom2} = sst_open(RP, Filename ++ ".sst", OptsSST, 5), lists:foreach(fun({K, V}) -> @@ -4229,7 +4332,7 @@ simple_persisted_slotsize_tester(SSTNewFun) -> [{FirstKey, _FV}|_Rest] = KVList1, {LastKey, _LV} = lists:last(KVList1), {ok, Pid, {FirstKey, LastKey}, _Bloom} = - SSTNewFun(RP, Filename, 1, KVList1, length(KVList1), native), + SSTNewFun(RP, Filename, 1, KVList1, length(KVList1), {0, native}), lists:foreach(fun({K, V}) -> ?assertMatch({K, V}, sst_get(Pid, K)) end, @@ -4247,7 +4350,7 @@ reader_hibernate_tester() -> [{FirstKey, FV}|_Rest] = KVList1, {LastKey, _LV} = lists:last(KVList1), {ok, Pid, {FirstKey, LastKey}, _Bloom} = - testsst_new(RP, Filename, 1, KVList1, length(KVList1), native), + testsst_new(RP, Filename, 1, KVList1, length(KVList1), {0, native}), ?assertMatch({FirstKey, FV}, sst_get(Pid, FirstKey)), SQN = leveled_codec:strip_to_seqonly({FirstKey, FV}), ?assertMatch( @@ -4267,7 +4370,7 @@ delete_pending_tester() -> [{FirstKey, _FV}|_Rest] = KVList1, {LastKey, _LV} = lists:last(KVList1), {ok, Pid, {FirstKey, LastKey}, _Bloom} = - testsst_new(RP, Filename, 1, KVList1, length(KVList1), native), + testsst_new(RP, Filename, 1, KVList1, length(KVList1), {0, native}), timer:sleep(2000), leveled_sst:sst_setfordelete(Pid, false), timer:sleep(?DELETE_TIMEOUT + 1000), @@ -4280,7 +4383,7 @@ fetch_status_test() -> [{FirstKey, _FV}|_Rest] = KVList1, {LastKey, _LV} = lists:last(KVList1), {ok, Pid, {FirstKey, LastKey}, _Bloom} = - testsst_new(RP, Filename, 1, KVList1, length(KVList1), native), + testsst_new(RP, Filename, 1, KVList1, length(KVList1), {0, native}), {status, Pid, {module, gen_statem}, SItemL} = sys:get_status(Pid), {data,[{"State", {reader, S}}]} = lists:nth(3, lists:nth(5, SItemL)), RS = S#state.read_state, @@ -4312,7 +4415,7 @@ simple_persisted_tester(SSTNewFun) -> [{FirstKey, _FV}|_Rest] = KVList1, {LastKey, _LV} = lists:last(KVList1), {ok, Pid, {FirstKey, LastKey}, Bloom} = - SSTNewFun(RP, Filename, Level, KVList1, length(KVList1), native), + SSTNewFun(RP, Filename, Level, KVList1, length(KVList1), {0, native}), B0 = check_binary_references(Pid), @@ -4416,8 +4519,8 @@ simple_persisted_tester(SSTNewFun) -> ok = sst_close(Pid), io:format(user, "Reopen SST file~n", []), - OptsSST = #sst_options{press_method=native, - log_options=leveled_log:get_opts()}, + OptsSST = + #sst_options{press_method=native, log_options=leveled_log:get_opts()}, {ok, OpenP, {FirstKey, LastKey}, Bloom} = sst_open(RP, Filename ++ ".sst", OptsSST, Level), @@ -4514,10 +4617,13 @@ nonsense_coverage_test() -> hashmatching_bytreesize_test() -> B = <<"Bucket">>, - V = leveled_head:riak_metadata_to_binary(term_to_binary([{"actor1", 1}]), - <<1:32/integer, - 0:32/integer, - 0:32/integer>>), + V = + leveled_head:riak_metadata_to_binary( + term_to_binary([{"actor1", 1}]), + <<1:32/integer, + 0:32/integer, + 0:32/integer>> + ), GenKeyFun = fun(X) -> LK = @@ -4534,7 +4640,7 @@ hashmatching_bytreesize_test() -> KVL = lists:map(GenKeyFun, lists:seq(1, 128)), {{PosBinIndex1, _FullBin, _HL, _LK}, no_timing} = generate_binary_slot( - lookup, {forward, KVL}, native, ?INDEX_MODDATE, no_timing), + lookup, {forward, KVL}, {0, native}, ?INDEX_MODDATE, no_timing), check_segment_match(PosBinIndex1, KVL, small), check_segment_match(PosBinIndex1, KVL, medium). @@ -4575,13 +4681,19 @@ stop_whenstarter_stopped_testto() -> end, ?assertMatch(false, lists:foldl(TestFun, true, [10000, 2000, 2000, 2000])). -corrupted_block_range_test() -> - corrupted_block_rangetester(native, 100), - corrupted_block_rangetester(lz4, 100), - corrupted_block_rangetester(zstd, 100), - corrupted_block_rangetester(none, 100). +corrupted_block_range_v0_test() -> + corrupted_block_rangetester({0, native}, 100), + corrupted_block_rangetester({0, lz4}, 100), + corrupted_block_rangetester({0, zstd}, 100), + corrupted_block_rangetester({0, none}, 100). + +corrupted_block_range_v1_test() -> + corrupted_block_rangetester({1, native}, 100), + corrupted_block_rangetester({1, lz4}, 100), + corrupted_block_rangetester({1, zstd}, 100), + corrupted_block_rangetester({1, none}, 100). -corrupted_block_rangetester(PressMethod, TestCount) -> +corrupted_block_rangetester(BlockMethod, TestCount) -> N = 100, KVL1 = lists:ukeysort(1, generate_randomkeys(1, N, 1, 2)), RandomRangesFun = @@ -4593,11 +4705,21 @@ corrupted_block_rangetester(PressMethod, TestCount) -> {SK, EK} end, RandomRanges = lists:map(RandomRangesFun, lists:seq(1, TestCount)), - B1 = serialise_block(lists:sublist(KVL1, 1, 20), PressMethod), - B2 = serialise_block(lists:sublist(KVL1, 21, 20), PressMethod), - MidBlock = serialise_block(lists:sublist(KVL1, 41, 20), PressMethod), - B4 = serialise_block(lists:sublist(KVL1, 61, 20), PressMethod), - B5 = serialise_block(lists:sublist(KVL1, 81, 20), PressMethod), + B1 = + leveled_sstblock:serialise_block( + no_lookup, BlockMethod, lists:sublist(KVL1, 1, 20)), + B2 = + leveled_sstblock:serialise_block( + no_lookup, BlockMethod, lists:sublist(KVL1, 21, 20)), + MidBlock = + leveled_sstblock:serialise_block( + no_lookup, BlockMethod, lists:sublist(KVL1, 41, 20)), + B4 = + leveled_sstblock:serialise_block( + no_lookup, BlockMethod, lists:sublist(KVL1, 61, 20)), + B5 = + leveled_sstblock:serialise_block( + no_lookup, BlockMethod, lists:sublist(KVL1, 81, 20)), CorruptBlockFun = fun(Block) -> case rand:uniform(10) < 2 of @@ -4614,25 +4736,31 @@ corrupted_block_rangetester(PressMethod, TestCount) -> lists:map(CorruptBlockFun, [B1, B2, MidBlock, B4, B5]), BR = blocks_required( - {SK, EK}, CB1, CB2, CBMid, CB4, CB5, PressMethod), + {SK, EK}, CB1, CB2, CBMid, CB4, CB5, BlockMethod), ?assertMatch(true, length(BR) =< 100), lists:foreach(fun({_K, _V}) -> ok end, BR) end, lists:foreach(CheckFun, RandomRanges). -corrupted_block_fetch_test() -> - corrupted_block_fetch_tester(native), - corrupted_block_fetch_tester(lz4), - corrupted_block_fetch_tester(zstd), - corrupted_block_fetch_tester(none). +corrupted_block_fetch_v0_test() -> + corrupted_block_fetch_tester({0, native}), + corrupted_block_fetch_tester({0, lz4}), + corrupted_block_fetch_tester({0, zstd}), + corrupted_block_fetch_tester({0, none}). + +corrupted_block_fetch_v1_test() -> + corrupted_block_fetch_tester({1, native}), + corrupted_block_fetch_tester({1, lz4}), + corrupted_block_fetch_tester({1, zstd}), + corrupted_block_fetch_tester({1, none}). -corrupted_block_fetch_tester(PressMethod) -> +corrupted_block_fetch_tester(BlockMethod) -> KC = 120, KVL1 = lists:ukeysort(1, generate_randomkeys(1, KC, 1, 2)), {{Header, SlotBin, _HashL, _LastKey}, _BT} = generate_binary_slot( - lookup, {forward, KVL1}, PressMethod, false, no_timing), + lookup, {forward, KVL1}, BlockMethod, false, no_timing), < BlockLengths, byte_size(PosBinIndex), LK, - PressMethod, + BlockMethod, false ), case R of @@ -5458,16 +5586,16 @@ blocks_required_test() -> lists:map(fun(I) -> {IdxKey(I), IdxValue(I)} end, lists:seq(65, 70)) ++ lists:map(fun(I) -> {StdKey(I), MetaValue(I)} end, lists:seq(1, 8)), - B1 = serialise_block(Block1L, native), - B2 = serialise_block(Block2L, native), - B3 = serialise_block(MidBlockL, native), - B4 = serialise_block(Block4L, native), - B5 = serialise_block(Block5L, native), - Empty = serialise_block([], native), + B1 = leveled_sstblock:serialise_block(no_lookup, {0, native}, Block1L), + B2 = leveled_sstblock:serialise_block(no_lookup, {0, native}, Block2L), + B3 = leveled_sstblock:serialise_block(no_lookup, {0, native}, MidBlockL), + B4 = leveled_sstblock:serialise_block(no_lookup, {0, native}, Block4L), + B5 = leveled_sstblock:serialise_block(no_lookup, {0, native}, Block5L), + Empty = leveled_sstblock:serialise_block(no_lookup, {0, native}, []), TestFun = fun(SK, EK, Exp) -> - KVL = blocks_required({SK, EK}, B1, B2, B3, B4, B5, native), + KVL = blocks_required({SK, EK}, B1, B2, B3, B4, B5, {0, native}), io:format( "Length KVL ~w First ~p Last ~p~n", [length(KVL), hd(KVL), lists:last(KVL)]), @@ -5493,31 +5621,31 @@ blocks_required_test() -> blocks_required( {{?IDX_TAG, B, {Idx, KeyFun(3)}, null}, {?IDX_TAG, B, {Idx, KeyFun(99)}, null}}, - B1, B2, Empty, B4, B5, native), + B1, B2, Empty, B4, B5, {0, native}), ?assertMatch(52, length(KVL1)), KVL2 = blocks_required( {{?IDX_TAG, B, {Idx, KeyFun(3)}, null}, {?IDX_TAG, B, {Idx, KeyFun(99)}, null}}, - B1, B2, Empty, Empty, Empty, native), + B1, B2, Empty, Empty, Empty, {0, native}), ?assertMatch(30, length(KVL2)), KVL3 = blocks_required( {{?IDX_TAG, B, {Idx, KeyFun(3)}, null}, {?IDX_TAG, B, {Idx, KeyFun(99)}, null}}, - B1, Empty, Empty, Empty, Empty, native), + B1, Empty, Empty, Empty, Empty, {0, native}), ?assertMatch(14, length(KVL3)), KVL4 = blocks_required( {{?IDX_TAG, B, {Idx, KeyFun(3)}, null}, {?IDX_TAG, B, {Idx, KeyFun(99)}, null}}, - B1, Empty, B3, B4, B5, native), + B1, Empty, B3, B4, B5, {0, native}), ?assertMatch(52, length(KVL4)), KVL5 = blocks_required( {{?IDX_TAG, B, {Idx, KeyFun(3)}, null}, {?IDX_TAG, B, {Idx, KeyFun(99)}, null}}, - B1, B2, B3, Empty, B5, native), + B1, B2, B3, Empty, B5, {0, native}), ?assertMatch(52, length(KVL5)) . diff --git a/src/leveled_sstblock.erl b/src/leveled_sstblock.erl new file mode 100644 index 00000000..c5bac1e6 --- /dev/null +++ b/src/leveled_sstblock.erl @@ -0,0 +1,808 @@ +%% -------- SST Block Functions --------- +%% +%% Functions to serialise and then fetch from those serialised blocks, i.e. +%% - serialise_block/3 +%% - get_all/2 deserialise returning all +%% - get_topandtail/2 return only the first and last elements, as well as a +%% function to return the remainder, so that deserialisation of the remainder +%% may be avoided on inspection of top and tail +%% - get_nth/3 deserialise enough of the block to return just the nth item +%% +%% The fetch functions may be optimised for the block type to minimise the +%% work required to fetch the required amount of deserialised data. +%% +%% Standard block sizes are +%% -define(LOOK_BLOCKSIZE, {24, 32}). +%% -define(NOLOOK_BLOCKSIZE, {56, 32}). +%% +%% Requirement to serialise LOOK_BLOCKS to optimise for picking the nth value +%% Requirement to serialise NOLOOK_BLOCKS to optimise for picking the first and +%% last values + +-module(leveled_sstblock). + +-define(MAX_SUBBLOCK_SIZE, 1 bsl 16). +-define(BLOCK_TYPE0, 0). + % Block is just a list of terms +-define(BLOCK_TYPE1, 1). + % Lookup block divided into 4 blocks of 6 + % 24 KV blocks only +-define(BLOCK_TYPE2, 2). + % Lookup block divided into 4 blocks of 8 + % 32 KV blocks only +-define(BLOCK_TYPE3, 3). + % Nolookup block with first/last terms at head +-define(BLOCK_TYPE4, 4). + % Nolookup block with first/last terms at head, and block split into L/M/R + % 56 KV blocks only +-define(COMPRESSION_FACTOR, 1). + % When using native compression - how hard should the compression code + % try to reduce the size of the compressed output. 1 Is to imply minimal + % effort, 6 is default in OTP: + % https://www.erlang.org/doc/man/erlang.html#term_to_binary-2 +-define(BINARY_SETTINGS, [{compressed, ?COMPRESSION_FACTOR}]). + +-type block_type() :: + ?BLOCK_TYPE0|?BLOCK_TYPE1|?BLOCK_TYPE2|?BLOCK_TYPE3|?BLOCK_TYPE4. +-type range_filter() :: + all|{leveled_codec:ledger_key(), leveled_codec:ledger_key()}. +-type top_and_tail() :: + { + leveled_codec:ledger_key()|not_present, + leveled_codec:ledger_key()|not_present, + fun((range_filter()) -> list(leveled_codec:ledger_kv())) + }. + +-export( + [ + serialise_block/3, + get_all/2, + get_topandtail/2, + get_nth/3 + ] +). + +%%%============================================================================ +%%% API +%%%============================================================================ + + +-spec serialise_block( + lookup|no_lookup, + {leveled_sst:block_version(), leveled_sst:press_method()}, + list(leveled_codec:ledger_kv())) -> + binary(). +serialise_block( + lookup, + {1, PressMethod}, + [ A1, A2, A3, A4, A5, A6, + B1, B2, B3, B4, B5, B6, + C1, C2, C3, C4, C5, C6, + D1, D2, D3, D4, D5, D6 + ] = TL +) when PressMethod == lz4; PressMethod == zstd -> + ABn = term_to_binary([A1, A2, A3, A4, A5, A6]), + BBn = term_to_binary([B1, B2, B3, B4, B5, B6]), + CBn = term_to_binary([C1, C2, C3, C4, C5, C6]), + DBn = term_to_binary([D1, D2, D3, D4, D5, D6]), + case {byte_size(ABn), byte_size(BBn), byte_size(CBn), byte_size(DBn)} of + {ASz, BSz, CSz, DSz} + when + ASz < ?MAX_SUBBLOCK_SIZE, + BSz < ?MAX_SUBBLOCK_SIZE, + CSz < ?MAX_SUBBLOCK_SIZE, + DSz < ?MAX_SUBBLOCK_SIZE -> + BlockBin = + << + ASz:16/integer, + BSz:16/integer, + CSz:16/integer, + DSz:16/integer, + ABn/binary, + BBn/binary, + CBn/binary, + DBn/binary + >>, + crc_validate_bin( + << + (compress_block(BlockBin, PressMethod))/binary, + (?BLOCK_TYPE1):8/integer + >> + ); + _ -> + serialise_block_aslist(PressMethod, TL) + end; +serialise_block( + lookup, + {1, PressMethod}, + [ + A1, A2, A3, A4, A5, A6, A7, A8, + B1, B2, B3, B4, B5, B6, B7, B8, + C1, C2, C3, C4, C5, C6, C7, C8, + D1, D2, D3, D4, D5, D6, D7, D8 + ] = TL +) when PressMethod == lz4; PressMethod == zstd -> + ABn = term_to_binary([A1, A2, A3, A4, A5, A6, A7, A8]), + BBn = term_to_binary([B1, B2, B3, B4, B5, B6, B7, B8]), + CBn = term_to_binary([C1, C2, C3, C4, C5, C6, C7, C8]), + DBn = term_to_binary([D1, D2, D3, D4, D5, D6, D7, D8]), + case {byte_size(ABn), byte_size(BBn), byte_size(CBn), byte_size(DBn)} of + {ASz, BSz, CSz, DSz} + when + ASz < ?MAX_SUBBLOCK_SIZE, + BSz < ?MAX_SUBBLOCK_SIZE, + CSz < ?MAX_SUBBLOCK_SIZE, + DSz < ?MAX_SUBBLOCK_SIZE -> + BlockBin = + << + ASz:16/integer, + BSz:16/integer, + CSz:16/integer, + DSz:16/integer, + ABn/binary, + BBn/binary, + CBn/binary, + DBn/binary + >>, + crc_validate_bin( + << + (compress_block(BlockBin, PressMethod))/binary, + (?BLOCK_TYPE2):8/integer + >> + ); + _ -> + serialise_block_aslist(PressMethod, TL) + end; +serialise_block( + no_lookup, + {1, PressMethod}, + [ + L1, L2, L3, L4, L5, L6, L7, L8, L9, L10, L11, L12, + L13, L14, L15, L16, L17, L18, L19, L20, L21, L22, L23, L24, + M1, M2, M3, M4, M5, M6, M7, M8, + R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, + R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24 + ] = TermList +) + when + PressMethod == zstd; PressMethod == lz4 -> + LBn = + term_to_binary( + [ + L1, L2, L3, L4, L5, L6, L7, L8, L9, L10, L11, L12, + L13, L14, L15, L16, L17, L18, L19, L20, L21, L22, L23, L24 + ] + ), + MBn = term_to_binary([M1, M2, M3, M4, M5, M6, M7, M8]), + RBn = + term_to_binary( + [ + R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, + R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24 + ] + ), + TTBn = term_to_binary({element(1, L1), element(1, R24)}), + case {byte_size(LBn), byte_size(MBn), byte_size(RBn), byte_size(TTBn)} of + {LSz, MSz, RSz, TTSz} + when + LSz < ?MAX_SUBBLOCK_SIZE, + MSz < ?MAX_SUBBLOCK_SIZE, + RSz < ?MAX_SUBBLOCK_SIZE, + TTSz < ?MAX_SUBBLOCK_SIZE + -> + CompressedBin = + compress_block( + << + LSz:16/integer, + MSz:16/integer, + RSz:16/integer, + LBn/binary, + MBn/binary, + RBn/binary + >>, + PressMethod + ), + + crc_validate_bin( + << + TTSz:16/integer, + TTBn/binary, + CompressedBin/binary, + (?BLOCK_TYPE4):8/integer + >> + ); + _ -> + serialise_block_aslist(PressMethod, TermList) + end; +serialise_block(no_lookup, {1, PressMethod}, TermList) + when + length(TermList) > 2, + PressMethod == zstd; PressMethod == lz4 -> + TopTail = + term_to_binary( + { + element(1, hd(TermList)), + element(1, lists:last(TermList)) + } + ), + AllBin = compress_block(term_to_binary(TermList), PressMethod), + case byte_size(TopTail) of + TTSz when TTSz < ?MAX_SUBBLOCK_SIZE -> + crc_validate_bin( + << + TTSz:16/integer, + TopTail/binary, + AllBin/binary, + (?BLOCK_TYPE3):8/integer + >> + ); + _ -> + serialise_block_aslist(PressMethod, TermList) + end; +serialise_block(_, {1, PressMethod}, TermList) -> + serialise_block_aslist(PressMethod, TermList); +serialise_block(_, {0, PressMethod}, TermList) -> + serialise_block(TermList, PressMethod). + +-spec get_all( + binary(), leveled_sst:block_method()) -> + list(leveled_codec:ledger_kv()). +get_all(Block, {1, PressMethod}) -> + ExtractFun = + fun(CheckedBlock) -> + get_all_block(CheckedBlock, PressMethod) + end, + check_block(Block, [], ExtractFun); +get_all(Block, {0, PressMethod}) -> + ExtractFun = + fun(CheckedBlock) -> + deserialise_checkedblock(CheckedBlock, PressMethod) + end, + check_block(Block, [], ExtractFun). + +-spec get_topandtail( + binary(), leveled_sst:block_method()) -> top_and_tail(). +get_topandtail(Block, {0, PressMethod}) -> + ExtractFun = + fun(CheckedBlock) -> + TL = deserialise_checkedblock(CheckedBlock, PressMethod), + { + element(1, hd(TL)), + element(1, lists:last(TL)), + fun(_) -> TL end + } + end, + check_block( + Block, + {not_present, not_present, fun(_) -> [] end}, + ExtractFun + ); +get_topandtail(Block, {1, PressMethod}) -> + ExtractFun = + fun(CheckedBlock) -> + get_topandtail_block(CheckedBlock, PressMethod) + end, + check_block( + Block, + {not_present, not_present, fun(_) -> [] end}, + ExtractFun + ). + +-spec get_nth( + pos_integer(), binary(), leveled_sst:block_method()) -> + leveled_codec:ledger_kv()|not_present. +get_nth(N, Block, {1, PressMethod}) -> + ExtractFun = + fun(CheckedBlock) -> + get_nth_item(N, CheckedBlock, PressMethod) + end, + check_block(Block, not_present, ExtractFun); +get_nth(N, Block, {0, PressMethod}) -> + ExtractFun = + fun(CheckedBlock) -> + lists:nth( + N, + deserialise_checkedblock(CheckedBlock, PressMethod) + ) + end, + check_block(Block, not_present, ExtractFun). + +%%%============================================================================ +%%% General internal functions - v1 +%%%============================================================================ + +-spec crc_validate_bin(binary()) -> binary(). +crc_validate_bin(Bin) -> + CRC32 = leveled_sst:hmac(Bin), + <>. + +-spec serialise_block_aslist( + leveled_sst:press_method(), list(leveled_codec:ledger_kv())) + -> binary(). +serialise_block_aslist(PM, TermList) when PM == lz4; PM == zstd -> + CompressedBin = + << + (compress_block(term_to_binary(TermList), PM))/binary, + (?BLOCK_TYPE0):8/integer + >>, + crc_validate_bin(CompressedBin); +serialise_block_aslist(native, TermList) -> + CompressedBin = + << + (term_to_binary(TermList, ?BINARY_SETTINGS))/binary, + ?BLOCK_TYPE0:8/integer + >>, + crc_validate_bin(CompressedBin); +serialise_block_aslist(none, TermList) -> + UncompressedBin = + <<(term_to_binary(TermList))/binary, ?BLOCK_TYPE0:8/integer >>, + crc_validate_bin(UncompressedBin). + +-spec compress_block(binary(), lz4|zstd) -> binary(). +compress_block(BlockBin, lz4) -> + {ok, Bin} = lz4:pack(BlockBin), + Bin; +compress_block(BlockBin, zstd) -> + zstd:compress(BlockBin). + +-spec decompress_block(binary(), lz4|zstd) -> binary(). +decompress_block(BlockBin, lz4) -> + {ok, Bin} = lz4:unpack(BlockBin), + Bin; +decompress_block(BlockBin, zstd) -> + case zstd:decompress(BlockBin) of + DeflateBin when is_binary(DeflateBin) -> + DeflateBin + end. + +-spec + check_block + (binary(), list(), fun((binary()) -> list(leveled_codec:ledger_kv()))) + -> list(leveled_codec:ledger_kv()); + (binary(), not_present, fun((binary()) -> leveled_codec:ledger_kv())) + -> leveled_codec:ledger_kv()|not_present; + (binary(), top_and_tail(), fun((binary()) -> top_and_tail())) + -> top_and_tail(). +check_block(Block, Default, ExtractFun) when byte_size(Block) > 4 -> + BinS = byte_size(Block) - 4, + <> = Block, + try + CRC32 = leveled_sst:hmac(TermBin), + ExtractFun(TermBin) + catch + _Exception:Reason -> + leveled_log:log(sst15, [Reason]), + Default + end; +check_block(_Block, Default, _ExtractFun) -> + Default. + +%%%============================================================================ +%%% Block-type specific cases - v1 +%%%============================================================================ + +-spec get_topandtail_block( + binary(), leveled_sst:press_method()) -> top_and_tail(). +get_topandtail_block(CheckedBlock, PressMethod) -> + CheckedSize = byte_size(CheckedBlock), + <> = CheckedBlock, + get_topandtail_block(Type, TypedBlock, PressMethod). + +-spec get_topandtail_block( + block_type(), binary(), leveled_sst:press_method()) -> + top_and_tail(). +get_topandtail_block(Type, TypedBlock, PM) when Type == ?BLOCK_TYPE3 -> + <> = TypedBlock, + {Top, Tail} = binary_to_term(TopTail), + { + Top, + Tail, + fun(_) -> get_all_block(?BLOCK_TYPE3, TypedBlock, PM) end + }; +get_topandtail_block(Type, TypedBlock, PM) + when + Type == ?BLOCK_TYPE4 andalso + (PM == lz4 orelse PM == zstd) -> + << + TTSz:16/integer, + TopTail:TTSz/binary, + CompressedBin/binary + >> = TypedBlock, + {Top, Tail} = binary_to_term(TopTail), + FetchFun = + fun(Range) -> + << + LSz:16/integer, + MSz:16/integer, + RSz:16/integer, + LBn:LSz/binary, + MBn:MSz/binary, + RBn:RSz/binary + >> = decompress_block(CompressedBin, PM), + [M1, M2, M3, M4, M5, M6, M7, M8] = binary_to_term(MBn), + BlockNeeds = + case Range of + all -> + all_blocks; + {SK, EK} -> + leveled_sst:filterby_midblock( + { + element(1, M1), + element(1, M8) + }, + {SK, EK} + ) + end, + case BlockNeeds of + lt_mid -> + binary_to_term(LBn); + le_mid -> + [ + L1, L2, L3, L4, L5, L6, + L7, L8, L9, L10, L11, L12, + L13, L14, L15, L16, L17, L18, + L19, L20, L21, L22, L23, L24 + ] = binary_to_term(LBn), + [ + L1, L2, L3, L4, L5, L6, + L7, L8, L9, L10, L11, L12, + L13, L14, L15, L16, L17, L18, + L19, L20, L21, L22, L23, L24, + M1, M2, M3, M4, M5, M6, M7, M8 + ]; + mid_only -> + [M1, M2, M3, M4, M5, M6, M7, M8]; + ge_mid -> + [ + R1, R2, R3, R4, R5, R6, + R7, R8, R9, R10, R11, R12, + R13, R14, R15, R16, R17, R18, + R19, R20, R21, R22, R23, R24 + ] = binary_to_term(RBn), + [ + M1, M2, M3, M4, M5, M6, M7, M8, + R1, R2, R3, R4, R5, R6, + R7, R8, R9, R10, R11, R12, + R13, R14, R15, R16, R17, R18, + R19, R20, R21, R22, R23, R24 + ]; + gt_mid -> + binary_to_term(RBn); + _ -> + [ + L1, L2, L3, L4, L5, L6, + L7, L8, L9, L10, L11, L12, + L13, L14, L15, L16, L17, L18, + L19, L20, L21, L22, L23, L24 + ] = binary_to_term(LBn), + [ + R1, R2, R3, R4, R5, R6, + R7, R8, R9, R10, R11, R12, + R13, R14, R15, R16, R17, R18, + R19, R20, R21, R22, R23, R24 + ] = binary_to_term(RBn), + [ + L1, L2, L3, L4, L5, L6, + L7, L8, L9, L10, L11, L12, + L13, L14, L15, L16, L17, L18, + L19, L20, L21, L22, L23, L24, + M1, M2, M3, M4, M5, M6, M7, M8, + R1, R2, R3, R4, R5, R6, + R7, R8, R9, R10, R11, R12, + R13, R14, R15, R16, R17, R18, + R19, R20, R21, R22, R23, R24 + ] + end + end, + {Top, Tail, FetchFun}; +get_topandtail_block(Type, TypedBlock, PM) -> + TL = get_all_block(Type, TypedBlock, PM), + {element(1, hd(TL)), element(1, lists:last(TL)), fun(_) -> TL end}. + +-spec get_nth_item( + pos_integer(), binary(), leveled_sst:press_method()) -> + leveled_codec:ledger_kv(). +get_nth_item(N, CheckedBlock, PressMethod) -> + CheckedSize = byte_size(CheckedBlock), + <> = CheckedBlock, + get_nth_item(Type, N, TypedBlock, PressMethod). + +-spec get_nth_item( + block_type(), pos_integer(), binary(), leveled_sst:press_method()) -> + leveled_codec:ledger_kv(). +get_nth_item(Type, N, TypedBlock, PM) + when Type == ?BLOCK_TYPE0, (PM == zstd orelse PM == lz4) -> + lists:nth(N, deserialise_checkedblock(TypedBlock, PM)); +get_nth_item(Type, N, TypedBlock, _PM) when Type == ?BLOCK_TYPE0 -> + lists:nth(N, binary_to_term(TypedBlock)); +get_nth_item(Type, N, TypedBlock, PressMethod) + when + (Type == ?BLOCK_TYPE1 orelse Type == ?BLOCK_TYPE2 ) andalso + (PressMethod == lz4 orelse PressMethod == zstd) -> + Width = case Type of ?BLOCK_TYPE1 -> 6; ?BLOCK_TYPE2 -> 8 end, + << + ASz:16/integer, + BSz:16/integer, + CSz:16/integer, + DSz:16/integer, + ABn:ASz/binary, + BBn:BSz/binary, + CBn:CSz/binary, + DBn:DSz/binary + >> = decompress_block(TypedBlock, PressMethod), + case N of + N when N =< Width -> + lists:nth(N, binary_to_term(ABn)); + N when N =< (2 * Width) -> + lists:nth(N - Width, binary_to_term(BBn)); + N when N =< (3 * Width) -> + lists:nth(N - (2 * Width), binary_to_term(CBn)); + N -> + lists:nth(N - (3 * Width), binary_to_term(DBn)) + end. + +-spec get_all_block( + binary(), leveled_sst:press_method()) -> + list(leveled_codec:ledger_kv()). +get_all_block(CheckedBlock, PressMethod) -> + CheckedSize = byte_size(CheckedBlock), + <> = CheckedBlock, + get_all_block(Type, TypedBlock, PressMethod). + +-spec get_all_block( + block_type(), binary(), leveled_sst:press_method()) -> + list(leveled_codec:ledger_kv()). +get_all_block(Type, TypedBlock, PM) + when Type == ?BLOCK_TYPE0, (PM == zstd orelse PM == lz4) -> + deserialise_checkedblock(TypedBlock, PM); +get_all_block(Type, TypedBlock, _PM) when Type == ?BLOCK_TYPE0 -> + binary_to_term(TypedBlock); +get_all_block(Type, TypedBlock, PM) when Type == ?BLOCK_TYPE3 -> + << + TTSz:16/integer, + _TopTail:TTSz/binary, + AllBin/binary + >> = TypedBlock, + get_all_block(?BLOCK_TYPE0, AllBin, PM); +get_all_block(Type, TypedBlock, PM) + when + Type == ?BLOCK_TYPE4 andalso + (PM == lz4 orelse PM == zstd) -> + << + TTSz:16/integer, + _TopTail:TTSz/binary, + CompressedBin/binary + >> = TypedBlock, + << + LSz:16/integer, + MSz:16/integer, + RSz:16/integer, + LBn:LSz/binary, + MBn:MSz/binary, + RBn:RSz/binary + >> = decompress_block(CompressedBin, PM), + [ + L1, L2, L3, L4, L5, L6, L7, L8, L9, L10, L11, L12, + L13, L14, L15, L16, L17, L18, L19, L20, L21, L22, L23, L24 + ] = binary_to_term(LBn), + [ + M1, M2, M3, M4, M5, M6, M7, M8 + ] = binary_to_term(MBn), + [ + R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, + R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24 + ] = binary_to_term(RBn), + [ + L1, L2, L3, L4, L5, L6, L7, L8, L9, L10, L11, L12, + L13, L14, L15, L16, L17, L18, L19, L20, L21, L22, L23, L24, + M1, M2, M3, M4, M5, M6, M7, M8, + R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, + R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24 + ]; +get_all_block(Type, TypedBlock, PM) + when + Type == ?BLOCK_TYPE1, + (PM == lz4 orelse PM == zstd) -> + << + ASz:16/integer, + BSz:16/integer, + CSz:16/integer, + DSz:16/integer, + ABn:ASz/binary, + BBn:BSz/binary, + CBn:CSz/binary, + DBn:DSz/binary + >> = decompress_block(TypedBlock, PM), + [A1, A2, A3, A4, A5, A6] = binary_to_term(ABn), + [B1, B2, B3, B4, B5, B6] = binary_to_term(BBn), + [C1, C2, C3, C4, C5, C6] = binary_to_term(CBn), + [D1, D2, D3, D4, D5, D6] = binary_to_term(DBn), + [ + A1, A2, A3, A4, A5, A6, + B1, B2, B3, B4, B5, B6, + C1, C2, C3, C4, C5, C6, + D1, D2, D3, D4, D5, D6 + ]; +get_all_block(Type, TypedBlock, PM) + when + Type == ?BLOCK_TYPE2, + (PM == lz4 orelse PM == zstd) -> + << + ASz:16/integer, + BSz:16/integer, + CSz:16/integer, + DSz:16/integer, + ABn:ASz/binary, + BBn:BSz/binary, + CBn:CSz/binary, + DBn:DSz/binary + >> = decompress_block(TypedBlock, PM), + [A1, A2, A3, A4, A5, A6, A7, A8] = binary_to_term(ABn), + [B1, B2, B3, B4, B5, B6, B7, B8] = binary_to_term(BBn), + [C1, C2, C3, C4, C5, C6, C7, C8] = binary_to_term(CBn), + [D1, D2, D3, D4, D5, D6, D7, D8] = binary_to_term(DBn), + [ + A1, A2, A3, A4, A5, A6, A7, A8, + B1, B2, B3, B4, B5, B6, B7, B8, + C1, C2, C3, C4, C5, C6, C7, C8, + D1, D2, D3, D4, D5, D6, D7, D8 + ]. + +%%%============================================================================ +%%% Internal functions - v0 +%%%============================================================================ + +deserialise_checkedblock(Bin, lz4) when is_binary(Bin) -> + case lz4:unpack(Bin) of + {ok, Bin0} when is_binary(Bin0) -> + binary_to_term(Bin0) + end; +deserialise_checkedblock(Bin, zstd) when is_binary(Bin) -> + case zstd:decompress(Bin) of + Bin0 when is_binary(Bin0) -> + binary_to_term(Bin0) + end; +deserialise_checkedblock(Bin, _Other) when is_binary(Bin) -> + % native or none can be treated the same + binary_to_term(Bin). + +-spec serialise_block(any(), leveled_sst:press_method()) -> binary(). +%% @doc +%% Convert term to binary +%% Function split out to make it easier to experiment with different +%% compression methods. Also, perhaps standardise applictaion of CRC +%% checks +serialise_block(Term, lz4) -> + {ok, Bin} = lz4:pack(term_to_binary(Term)), + CRC32 = leveled_sst:hmac(Bin), + <>; +serialise_block(Term, native) -> + Bin = term_to_binary(Term, ?BINARY_SETTINGS), + CRC32 = leveled_sst:hmac(Bin), + <>; +serialise_block(Term, zstd) -> + Bin = zstd:compress(term_to_binary(Term)), + CRC32 = leveled_sst:hmac(Bin), + <>; +serialise_block(Term, none) -> + Bin = term_to_binary(Term), + CRC32 = leveled_sst:hmac(Bin), + <>. + +%%%============================================================================ +%%% eunit tests +%%%============================================================================ + +-ifdef(TEST). + +-include_lib("eunit/include/eunit.hrl"). +-include("leveled.hrl"). + +v1_block_test() -> + v1_block_tester(lookup, {1, zstd}, 24), + v1_block_tester(lookup, {1, zstd}, 25), + v1_block_tester(lookup, {1, native}, 32), + v1_block_tester(lookup, {1, native}, 31), + v1_block_tester(no_lookup, {1, zstd}, 24). + +v1_bigblock_test() -> + BigBlob = crypto:strong_rand_bytes(16384), + {MegaSec, Sec, MicroSec} = os:timestamp(), + MetaBin = + << + MegaSec:32/integer, + Sec:32/integer, + MicroSec:32/integer, + BigBlob/binary + >>, + MetaLen = byte_size(MetaBin), + SibMetaBin = + << + 1:32/integer, + 0:32/integer, + MetaLen:32/integer, + MetaBin/binary + >>, + v1_block_tester(lookup, {1, zstd}, 24, SibMetaBin), + v1_block_tester(lookup, {1, zstd}, 32, SibMetaBin), + v1_block_tester(lookup, {1, zstd}, 25, SibMetaBin), + v1_block_tester(lookup, {1, native}, 32, SibMetaBin), + v1_block_tester(lookup, {1, native}, 31, SibMetaBin), + v1_block_tester(no_lookup, {1, zstd}, 24, SibMetaBin). + +v1_nolookup_bigtail_test() -> + BigBlob = crypto:strong_rand_bytes(1024), + BigBucket = base64:encode(crypto:strong_rand_bytes(65536)), + {MegaSec, Sec, MicroSec} = os:timestamp(), + MetaBin = + << + MegaSec:32/integer, + Sec:32/integer, + MicroSec:32/integer, + BigBlob/binary + >>, + MetaLen = byte_size(MetaBin), + SibMetaBin = + << + 1:32/integer, + 0:32/integer, + MetaLen:32/integer, + MetaBin/binary + >>, + v1_block_tester(no_lookup, {1, zstd}, 24, SibMetaBin, BigBucket), + v1_block_tester(no_lookup, {1, zstd}, 56, SibMetaBin, BigBucket). + +v1_block_tester(Lookup, BlockMethod, BlockSize) -> + v1_block_tester( + Lookup, + BlockMethod, + BlockSize, + <<1:32/integer, 0:32/integer, 0:32/integer>> + ). + +v1_block_tester(Lookup, BlockMethod, BlockSize, SibMetaBin) -> + v1_block_tester(Lookup, BlockMethod, BlockSize, SibMetaBin, <<"Bucket">>). + +v1_block_tester(Lookup, BlockMethod, BlockSize, SibMetaBin, B) -> + V = + leveled_head:riak_metadata_to_binary( + term_to_binary([{"actor1", 1}]), + SibMetaBin + ), + GenKeyFun = + fun(X) -> + LK = + {?RIAK_TAG, + B, + list_to_binary("Key" ++ integer_to_list(X)), + null}, + LKV = + leveled_codec:generate_ledgerkv( + LK, X, V, byte_size(V), infinity), + {_Bucket, _Key, MetaValue, _Hashes, _LastMods} = LKV, + {LK, MetaValue} + end, + KVL = lists:map(GenKeyFun, lists:seq(1, BlockSize)), + Block = serialise_block(Lookup, BlockMethod, KVL), + case Lookup of + lookup -> + LKV1 = get_nth(1, Block, BlockMethod), + ?assertMatch(LKV1, hd(KVL)), + LKV6 = get_nth(6, Block, BlockMethod), + ?assertMatch(LKV6, lists:nth(6, KVL)), + LKV7 = get_nth(7, Block, BlockMethod), + ?assertMatch(LKV7, lists:nth(7, KVL)), + LKV24 = get_nth(24, Block, BlockMethod), + ?assertMatch(LKV24, lists:nth(24, KVL)); + no_lookup -> + ok + end, + {Top, Tail, AllFun} = get_topandtail(Block, BlockMethod), + ?assertMatch(Top, element(1, hd(KVL))), + ?assertMatch(Tail, element(1, lists:last(KVL))), + ?assertMatch(KVL, AllFun(all)), + ?assertMatch(KVL, get_all(Block, BlockMethod)). + + + +-endif. \ No newline at end of file diff --git a/src/leveled_tree.erl b/src/leveled_tree.erl index 15cfde26..9bb74c26 100644 --- a/src/leveled_tree.erl +++ b/src/leveled_tree.erl @@ -45,8 +45,8 @@ from_orderedset(Table, Type) -> from_orderedlist(ets:tab2list(Table), Type, ?SKIP_WIDTH). --spec from_orderedset(ets:tab(), tree_type(), integer()|auto) - -> leveled_tree(). +-spec from_orderedset( + ets:tab(), tree_type(), integer()|auto) -> leveled_tree(). %% @doc %% Convert an ETS table of Keys and Values (of table type ordered_set) into a %% leveled_tree of the given type. The SkipWidth is an integer representing @@ -63,8 +63,8 @@ from_orderedset(Table, Type, SkipWidth) -> from_orderedlist(OrderedList, Type) -> from_orderedlist(OrderedList, Type, ?SKIP_WIDTH). --spec from_orderedlist(list(tuple()), tree_type(), integer()|auto) - -> leveled_tree(). +-spec from_orderedlist( + list(tuple()), tree_type(), integer()|auto) -> leveled_tree(). %% @doc %% Convert a list of Keys and Values (of table type ordered_set) into a %% leveled_tree of the given type. The SkipWidth is an integer representing @@ -114,7 +114,11 @@ match(Key, {skpl, _L, SkipList}) -> SL0 = skpl_getsublist(Key, SkipList), lookup_match(Key, SL0). --spec search(tuple()|integer(), leveled_tree(), fun()) -> none|tuple(). +-spec search( + tuple()|integer(), + leveled_tree(), + fun((leveled_pmanifest:manifest_entry()) -> leveled_codec:object_key())) + -> none|tuple(). %% @doc %% Search is used when the tree is a manifest of key ranges and it is necessary %% to find a rnage which may contain the key. The StartKeyFun is used if the @@ -162,10 +166,10 @@ search(Key, {skpl, _L, SkipList}, StartKeyFun) -> none end. --spec match_range(tuple()|integer()|all, - tuple()|integer()|all, - leveled_tree()) - -> list(). +-spec match_range( + tuple()|integer()|all, + tuple()|integer()|all, + leveled_tree()) -> list(). %% @doc %% Return a range of value between trees from a tree associated with an %% exact match for the given key. This assumes the tree contains the actual @@ -181,11 +185,11 @@ match_range(StartRange, EndRange, Tree) -> end, match_range(StartRange, EndRange, Tree, EndRangeFun). --spec match_range(tuple()|integer()|all, - tuple()|integer()|all, - leveled_tree(), - fun()) - -> list(). +-spec match_range( + tuple()|integer()|all, + tuple()|integer()|all, + leveled_tree(), + fun((term(), term(), term()) -> boolean())) -> list(). %% @doc %% As match_range/3 but a function can be passed to be used when comparing the %5 EndKey with a key in the tree (such as leveled_codec:endkey_passed), where @@ -197,11 +201,12 @@ match_range(StartRange, EndRange, {idxt, _L, Tree}, EndRangeFun) -> match_range(StartRange, EndRange, {skpl, _L, SkipList}, EndRangeFun) -> skpllookup_to_range(StartRange, EndRange, SkipList, EndRangeFun). --spec search_range(tuple()|integer()|all, - tuple()|integer()|all, - leveled_tree(), - fun()) - -> list(). +-spec search_range( + tuple()|integer()|all, + tuple()|integer()|all, + leveled_tree(), + fun((leveled_pmanifest:manifest_entry()) -> leveled_codec:object_key())) + -> list(). %% @doc %% Extract a range from a tree, with search used when the tree is a manifest %% of key ranges and it is necessary to find a rnage which may encapsulate the @@ -429,42 +434,29 @@ idxtlookup_range_end(EndRange, {TLI, NK0, SL0}, Iter0, Output, EndRangeFun) -> end end. +skplfold_range([], _StartRange, _EndRange, Acc) -> + Acc; +skplfold_range([{K, _SL}|Rest], StartRange, EndRange, Acc) when StartRange > K -> + skplfold_range(Rest, StartRange, EndRange, Acc); +skplfold_range([{K, SL}|Rest], StartRange, EndRange, Acc) -> + case leveled_codec:endkey_passed(EndRange, K) of + true -> + [SL|Acc]; + false -> + skplfold_range(Rest, StartRange, EndRange, [SL|Acc]) + end. + skpllookup_to_range(StartRange, EndRange, SkipList, EndRangeFun) -> - FoldFun = - fun({K, SL}, {PassedStart, PassedEnd, Acc}) -> - case {PassedStart, PassedEnd} of - {false, false} -> - case StartRange > K of - true -> - {PassedStart, PassedEnd, Acc}; - false -> - case leveled_codec:endkey_passed(EndRange, K) of - true -> - {true, true, [SL|Acc]}; - false -> - {true, false, [SL|Acc]} - end - end; - {true, false} -> - case leveled_codec:endkey_passed(EndRange, K) of - true -> - {true, true, [SL|Acc]}; - false -> - {true, false, [SL|Acc]} - end; - {true, true} -> - {PassedStart, PassedEnd, Acc} - end - end, - Lv1List = lists:reverse(element(3, - lists:foldl(FoldFun, - {false, false, []}, - SkipList))), - Lv0List = lists:reverse(element(3, - lists:foldl(FoldFun, - {false, false, []}, - lists:append(Lv1List)))), + Lv1List = + lists:reverse( + skplfold_range(SkipList, StartRange, EndRange, []) + ), + Lv0List = + lists:reverse( + skplfold_range( + lists:append(Lv1List), StartRange, EndRange, []) + ), BeforeFun = fun({K, _V}) -> K < StartRange @@ -479,20 +471,20 @@ skpllookup_to_range(StartRange, EndRange, SkipList, EndRangeFun) -> end end, - case length(Lv0List) of - 0 -> + case Lv0List of + [] -> []; - 1 -> - RHS = lists:dropwhile(BeforeFun, lists:nth(1, Lv0List)), + [SingleList] -> + RHS = lists:dropwhile(BeforeFun, SingleList), lists:takewhile(AfterFun, RHS); - 2 -> - RHSofLHL = lists:dropwhile(BeforeFun, lists:nth(1, Lv0List)), - LHSofRHL = lists:takewhile(AfterFun, lists:last(Lv0List)), + [LHList, RHList] -> + RHSofLHL = lists:dropwhile(BeforeFun, LHList), + LHSofRHL = lists:takewhile(AfterFun, RHList), RHSofLHL ++ LHSofRHL; - L -> - RHSofLHL = lists:dropwhile(BeforeFun, lists:nth(1, Lv0List)), - LHSofRHL = lists:takewhile(AfterFun, lists:last(Lv0List)), - MidLists = lists:sublist(Lv0List, 2, L - 2), + [LHL|Rest] -> + RHSofLHL = lists:dropwhile(BeforeFun, LHL), + LHSofRHL = lists:takewhile(AfterFun, lists:last(Rest)), + MidLists = lists:sublist(Rest, length(Rest) - 1), lists:append([RHSofLHL] ++ MidLists ++ [LHSofRHL]) end. diff --git a/src/leveled_util.erl b/src/leveled_util.erl index 630e9f42..b6d45676 100644 --- a/src/leveled_util.erl +++ b/src/leveled_util.erl @@ -1,6 +1,6 @@ %% -------- Utility Functions --------- %% -%% Generally helpful funtions within leveled +%% Generally helpful functions within leveled %% -module(leveled_util). diff --git a/test/end_to_end/iterator_SUITE.erl b/test/end_to_end/iterator_SUITE.erl index 855aad8e..07beabd2 100644 --- a/test/end_to_end/iterator_SUITE.erl +++ b/test/end_to_end/iterator_SUITE.erl @@ -637,6 +637,47 @@ query_count(_Config) -> Index1Count = count_termsonindex( BucketBin, <<"idx1_bin">>, Book1, ?KEY_ONLY), + + TermCountFun = + fun(_B, {T, _K}, Acc) -> + Cnt = maps:get(T, Acc, 0), + maps:put(T, Cnt, Acc) + end, + + {async, TermRunner} = + leveled_bookie:book_returnfolder( + Book1, + { + index_query, + BucketBin, + {TermCountFun, maps:new()}, + {<<"idx1_bin">>, <<"0">>, <<"|">>}, + {true, undefined} + } + ), + + TermCounts = TermRunner(), + io:format("TermCounts ~0p", [TermCounts]), + lists:foreach( + fun(T) -> + {async, TR0} = + leveled_bookie:book_returnfolder( + Book1, + { + index_query, + BucketBin, + {fun testutil:foldkeysfun/3, []}, + {<<"idx1_bin">>, T, <>}, + ?KEY_ONLY + } + ), + ResultsForTerm = length(TR0()), + io:format("Results for term ~w ~w", [T, ResultsForTerm]), + maps:get(T, TermCounts) == ResultsForTerm + end, + maps:keys(TermCounts) + ), + ok = leveled_bookie:book_close(Book1), {ok, Book2} = leveled_bookie:book_start( diff --git a/test/end_to_end/perf_SUITE.erl b/test/end_to_end/perf_SUITE.erl index 59d746fd..0a4bc32f 100644 --- a/test/end_to_end/perf_SUITE.erl +++ b/test/end_to_end/perf_SUITE.erl @@ -363,6 +363,16 @@ memory_usage() -> profile_app(Pids, ProfiledFun, P) -> + MinTime = + case P of + P when P == query; P == mini_query -> + 100000; + P when P == head; P == load -> + 200000; + _ -> + 150000 + end, + eprof:start(), eprof:start_profiling(Pids), @@ -370,7 +380,7 @@ profile_app(Pids, ProfiledFun, P) -> eprof:stop_profiling(), eprof:log(atom_to_list(P) ++ ".log"), - eprof:analyze(total, [{filter, [{time, 160000}]}]), + eprof:analyze(total, [{filter, [{time, MinTime}]}]), eprof:stop(), {ok, Analysis} = file:read_file(atom_to_list(P) ++ ".log"), io:format(user, "~n~s~n", [Analysis]) diff --git a/test/end_to_end/recovery_SUITE.erl b/test/end_to_end/recovery_SUITE.erl index 5b35970b..8bff06b3 100644 --- a/test/end_to_end/recovery_SUITE.erl +++ b/test/end_to_end/recovery_SUITE.erl @@ -48,10 +48,11 @@ end_per_suite(Config) -> replace_everything(_Config) -> % See https://github.com/martinsumner/leveled/issues/389 - % Also replaces previous test which was checking the comapction process + % Also replaces previous test which was checking the compaction process % respects the journal object count passed at startup RootPath = testutil:reset_filestructure(), BackupPath = testutil:reset_filestructure("backupRE"), + JournalPath = filename:join(RootPath, "journal/journal_files"), CompPath = filename:join(RootPath, "journal/journal_files/post_compact"), SmallJournalCount = 7000, StdJournalCount = 20000, @@ -74,13 +75,13 @@ replace_everything(_Config) -> {KSpcL2, V2} = testutil:put_altered_indexed_objects(Book1, BKT, KSpcL1), ok = testutil:check_indexed_objects(Book1, BKT, KSpcL2, V2), - compact_and_wait(Book1, 1000), - compact_and_wait(Book1, 1000), + {ok, FileList0} = file:list_dir(JournalPath), + io:format( + "Number of journal files before compaction ~w~n", + [length(FileList0)] + ), {ok, FileList1} = file:list_dir(CompPath), - io:format("Number of files after compaction ~w~n", [length(FileList1)]), - compact_and_wait(Book1, 1000), - {ok, FileList2} = file:list_dir(CompPath), - io:format("Number of files after compaction ~w~n", [length(FileList2)]), + FileList2 = check_compaction(Book1, CompPath), true = FileList1 =< FileList2, %% There will normally be 5 journal files after 50K write then alter %% That may be two files with entirely altered objects - which will be @@ -92,10 +93,14 @@ replace_everything(_Config) -> %% is randomisation in both the scoring and the journal size (due to %% jittering of parameters). compact_and_wait(Book1, 1000), - {ok, FileList3} = file:list_dir(CompPath), - io:format("Number of files after compaction ~w~n", [length(FileList3)]), - %% By the third compaction there should be no further changes - true = FileList2 == FileList3, + {ok, FileList3a} = file:list_dir(CompPath), + io:format("Number of files after compaction ~w~n", [length(FileList3a)]), + compact_and_wait(Book1, 1000), + {ok, FileList3b} = file:list_dir(CompPath), + io:format("Number of files after compaction ~w~n", [length(FileList3b)]), + %% By the fourth compaction there should be no further changes + true = FileList3a == FileList3b, + true = 0 < FileList3b, {async, BackupFun} = leveled_bookie:book_hotbackup(Book1), ok = BackupFun(BackupPath), @@ -302,6 +307,17 @@ recovery_with_samekeyupdates(_Config) -> testutil:reset_filestructure(BackupPath), testutil:reset_filestructure(). +check_compaction(Book, CompPath) -> + compact_and_wait(Book, 1000), + {ok, FileList} = file:list_dir(CompPath), + io:format("Number of files after compaction ~w~n", [length(FileList)]), + case FileList > 0 of + true -> + FileList; + _ -> + check_compaction(Book, CompPath) + end. + same_key_rotation_withindexes(_Config) -> % If we have the same key - but the indexes change. Do we consistently % recalc the indexes correctly, even when the key exists multiple times diff --git a/test/end_to_end/riak_SUITE.erl b/test/end_to_end/riak_SUITE.erl index 2446b785..c753be16 100644 --- a/test/end_to_end/riak_SUITE.erl +++ b/test/end_to_end/riak_SUITE.erl @@ -6,6 +6,7 @@ -export([ test_large_lsm_merge/1, basic_riak/1, + block_version_change/1, fetchclocks_modifiedbetween/1, crossbucket_aae/1, handoff/1, @@ -21,6 +22,7 @@ suite() -> [{timetrap, {hours, 2}}]. all() -> [ basic_riak, + block_version_change, fetchclocks_modifiedbetween, crossbucket_aae, handoff, @@ -192,14 +194,188 @@ lsm_merge_tester(LoopsPerBucket) -> ok = leveled_bookie:book_destroy(Bookie2). +block_version_change(_Config) -> + KeyCount = 40000, + Bucket = {<<"Type0">>, <<"B0">>}, + IndexCount = 8, + + RootPath = testutil:reset_filestructure("blockVerion"), + StartOpts1 = + [ + {root_path, RootPath}, + {max_pencillercachesize, 12000}, + {block_version, 0}, + {sync_strategy, testutil:sync_strategy()}, + {database_id, 32}, + {stats_logfrequency, 5}, + {stats_probability, 80} + ], + {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), + + IndexGenFun = + fun(ListID) -> + fun() -> + RandInt = rand:uniform(IndexCount), + ID = integer_to_list(ListID), + [ + { + add, + list_to_binary("integer" ++ ID ++ "_int"), + RandInt + }, + { + add, + list_to_binary("binary" ++ ID ++ "_bin"), + <> + } + ] + end + end, + + ObjList1 = + testutil:generate_objects( + KeyCount, + {fixed_binary, 1}, [], + crypto:strong_rand_bytes(512), + IndexGenFun(1), + Bucket + ), + testutil:riakload(Bookie1, ObjList1), + + SubList1 = lists:sublist(lists:ukeysort(1, ObjList1), 1000), + ok = testutil:check_forlist(Bookie1, SubList1), + + FoldKeysFun = fun(_B, K, Acc) -> [K|Acc] end, + IntIndexFold = + fun(Idx, Book) -> + fun(IC, CountAcc) -> + ID = integer_to_list(Idx), + Index = list_to_binary("integer" ++ ID ++ "_int"), + {async, R} = + leveled_bookie:book_indexfold( + Book, + {Bucket, <<>>}, + {FoldKeysFun, []}, + {Index, IC, IC}, + {true, undefined} + ), + KTL = R(), + CountAcc + length(KTL) + end + end, + BinIndexFold = + fun(Idx, Book) -> + fun(IC, CountAcc) -> + ID = integer_to_list(Idx), + Index = list_to_binary("binary" ++ ID ++ "_bin"), + {async, R} = + leveled_bookie:book_indexfold( + Book, + {Bucket, <<>>}, + {FoldKeysFun, []}, + {Index, <>, <>}, + {true, undefined} + ), + KTL = R(), + CountAcc + length(KTL) + end + end, + + CheckIndices = + fun(Bookie, ObjList, Idx) -> + SWA = os:timestamp(), + TotalIntIndexEntries = + lists:foldl( + IntIndexFold(Idx, Bookie), + 0, + lists:seq(1, IndexCount) + ), + io:format( + "~w queries returned count=~w in ~w ms~n", + [ + IndexCount, + TotalIntIndexEntries, + timer:now_diff(os:timestamp(), SWA) div 1000 + ] + ), + true = TotalIntIndexEntries == length(ObjList1), + SWB = os:timestamp(), + TotalBinIndexEntries = + lists:foldl( + BinIndexFold(Idx, Bookie), + 0, + lists:seq(1, IndexCount) + ), + io:format( + "~w queries returned count=~w in ~w ms~n", + [ + IndexCount, + TotalBinIndexEntries, + timer:now_diff(os:timestamp(), SWB) div 1000 + ] + ), + true = TotalBinIndexEntries == length(ObjList) + end, + + CheckIndices(Bookie1, ObjList1, 1), + + ok = leveled_bookie:book_close(Bookie1), + + StartOpts2 = lists:ukeysort(1, [{block_version, 1}|StartOpts1]), + {ok, Bookie2} = leveled_bookie:book_start(StartOpts2), + + ObjList2 = + testutil:generate_objects( + KeyCount, + {fixed_binary, KeyCount + 1}, [], + crypto:strong_rand_bytes(512), + IndexGenFun(2), + Bucket + ), + testutil:riakload(Bookie2, ObjList2), + + SubList2 = lists:sublist(lists:ukeysort(1, ObjList2), 1000), + ok = testutil:check_forlist(Bookie2, SubList1), + ok = testutil:check_forlist(Bookie2, SubList2), + + CheckIndices(Bookie2, ObjList1, 1), + CheckIndices(Bookie2, ObjList2, 2), + + ok = leveled_bookie:book_close(Bookie2), + + {ok, Bookie3} = leveled_bookie:book_start(StartOpts1), + + ObjList3 = + testutil:generate_objects( + KeyCount, + {fixed_binary, KeyCount + KeyCount + 1}, [], + crypto:strong_rand_bytes(512), + IndexGenFun(3), + Bucket + ), + testutil:riakload(Bookie3, ObjList3), + + SubList3 = lists:sublist(lists:ukeysort(1, ObjList3), 1000), + ok = testutil:check_forlist(Bookie3, SubList1), + ok = testutil:check_forlist(Bookie3, SubList2), + ok = testutil:check_forlist(Bookie3, SubList3), + + CheckIndices(Bookie3, ObjList1, 1), + CheckIndices(Bookie3, ObjList2, 2), + CheckIndices(Bookie3, ObjList3, 3), + + ok = leveled_bookie:book_destroy(Bookie3). + basic_riak(_Config) -> basic_riak_tester(<<"B0">>, 640000), basic_riak_tester({<<"Type0">>, <<"B0">>}, 80000). basic_riak_tester(Bucket, KeyCount) -> % Key Count should be > 10K and divisible by 5 - io:format("Basic riak test with Bucket ~w KeyCount ~w~n", - [Bucket, KeyCount]), + io:format( + "Basic riak test with Bucket ~w KeyCount ~w~n", + [Bucket, KeyCount] + ), IndexCount = 20, RootPath = testutil:reset_filestructure("basicRiak"), From 5e4c472344a5b5dc19a5a4c6cb10d011e22c08cc Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 7 Aug 2025 16:00:47 +0100 Subject: [PATCH 2/4] Filter Expression support (#440) Add extended query support: - Query terms can be split into a map of projected attributes using an evaluation expression - Terms can be filtered based on the map of projected attributes using a filter expression - Query results cna be combined using a set expression Co-authored-by: Thomas Arts --- .gitignore | 4 + README.md | 2 +- include/leveled.hrl | 6 + rebar.config | 6 +- src/leveled_bookie.erl | 248 ++++--- src/leveled_codec.erl | 61 +- src/leveled_eval.erl | 1029 +++++++++++++++++++++++++++ src/leveled_evallexer.xrl | 54 ++ src/leveled_evalparser.yrl | 84 +++ src/leveled_filter.erl | 710 ++++++++++++++++++ src/leveled_filterlexer.xrl | 51 ++ src/leveled_filterparser.yrl | 53 ++ src/leveled_runner.erl | 19 +- src/leveled_setop.erl | 170 +++++ src/leveled_setoplexer.xrl | 21 + src/leveled_setopparser.yrl | 28 + src/leveled_tree.erl | 18 +- src/leveled_util.erl | 36 +- test/end_to_end/basic_SUITE.erl | 7 +- test/end_to_end/iterator_SUITE.erl | 514 ++++++++++++- test/end_to_end/perf_SUITE.erl | 81 ++- test/end_to_end/recovery_SUITE.erl | 135 ++-- test/end_to_end/riak_SUITE.erl | 78 +- test/end_to_end/testutil.erl | 29 +- test/property/evallang_eqc.erl | 161 +++++ test/property/filterlang_eqc.erl | 111 +++ test/property/leveled_simpleeqc.erl | 5 +- test/property/leveled_statemeqc.erl | 4 +- test/property/setoplang_eqc.erl | 105 +++ 29 files changed, 3556 insertions(+), 274 deletions(-) create mode 100644 src/leveled_eval.erl create mode 100644 src/leveled_evallexer.xrl create mode 100644 src/leveled_evalparser.yrl create mode 100644 src/leveled_filter.erl create mode 100644 src/leveled_filterlexer.xrl create mode 100644 src/leveled_filterparser.yrl create mode 100644 src/leveled_setop.erl create mode 100644 src/leveled_setoplexer.xrl create mode 100644 src/leveled_setopparser.yrl create mode 100644 test/property/evallang_eqc.erl create mode 100644 test/property/filterlang_eqc.erl create mode 100644 test/property/setoplang_eqc.erl diff --git a/.gitignore b/.gitignore index 667578a3..8e691596 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,8 @@ cover cover_* .eqc-info leveled_data/* +compile_commands.json +*parser.erl +*lexer.erl elp +current_counterexample.eqc diff --git a/README.md b/README.md index f5b8efae..7dc7898a 100644 --- a/README.md +++ b/README.md @@ -86,4 +86,4 @@ To have rebar3 execute the full set of tests, run: For those with a Quickcheck license, property-based tests can also be run using: -```./rebar3 as eqc do eunit --module=leveled_simpleeqc, eunit --module=leveled_statemeqc``` +```./rebar3 as eqc do eunit``` diff --git a/include/leveled.hrl b/include/leveled.hrl index 4c5fe810..43512e08 100644 --- a/include/leveled.hrl +++ b/include/leveled.hrl @@ -72,6 +72,12 @@ %% Inker key type used for tombstones %%%============================================================================ +%%%============================================================================ +%%% Test +%%%============================================================================ + +-define(EQC_TIME_BUDGET, 120). + %%%============================================================================ %%% Helper Function %%%============================================================================ diff --git a/rebar.config b/rebar.config index c9696cad..a3794cc3 100644 --- a/rebar.config +++ b/rebar.config @@ -2,11 +2,13 @@ {xref_checks, [undefined_function_calls,undefined_functions, - locals_not_used, deprecated_function_calls, deprecated_functions]}. {cover_excl_mods, - [testutil, + [leveled_filterlexer, leveled_filterparser, + leveled_evallexer, leveled_evalparser, + leveled_setoplexer, leveled_setopparser, + testutil, appdefined_SUITE, basic_SUITE, iterator_SUITE, perf_SUITE, recovery_SUITE, riak_SUITE, tictac_SUITE]}. diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index 372afd5f..545110c5 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -82,6 +82,7 @@ -export([ book_returnfolder/2, book_indexfold/5, + book_multiindexfold/5, book_bucketlist/4, book_keylist/3, book_keylist/4, @@ -707,23 +708,24 @@ book_returnfolder(Pid, RunnerType) -> %% be interrupted by a throw, which will be forwarded to the worker (whilst %% still closing down the snapshot). This may be used, for example, to %% curtail a fold in the application at max_results --spec book_indexfold(pid(), - Constraint:: {Bucket, StartKey}, - FoldAccT :: {FoldFun, Acc}, - Range :: {IndexField, Start, End}, - TermHandling :: {ReturnTerms, TermRegex}) -> - {async, Runner::fun(() -> dynamic())} - when Bucket::term(), - Key :: term(), - StartKey::term(), - FoldFun::fun((Bucket, Key | {IndexVal, Key}, Acc) -> Acc), - Acc::dynamic(), - IndexField::term(), - IndexVal::term(), - Start::IndexVal, - End::IndexVal, - ReturnTerms::boolean(), - TermRegex :: leveled_codec:regular_expression(). +-spec book_indexfold( + pid(), + Constraint:: {Bucket, StartKey}, + FoldAccT :: {FoldFun, Acc}, + Range :: {IndexField, Start, End}, + TermHandling :: {ReturnTerms, TermExpression}) -> + {async, Runner::fun(() -> dynamic())} + when Bucket::term(), + Key :: term(), + StartKey::term(), + FoldFun::fun((Bucket, Key | {IndexVal, Key}, Acc) -> Acc), + Acc::dynamic(), + IndexField::term(), + IndexVal::term(), + Start::IndexVal, + End::IndexVal, + ReturnTerms::boolean()|binary(), + TermExpression :: leveled_codec:term_expression(). book_indexfold(Pid, Constraint, FoldAccT, Range, TermHandling) when is_tuple(Constraint) -> @@ -739,6 +741,26 @@ book_indexfold(Pid, Bucket, FoldAccT, Range, TermHandling) -> leveled_log:log(b0019, [Bucket]), book_indexfold(Pid, {Bucket, null}, FoldAccT, Range, TermHandling). +-type query() + :: {binary(), binary(), binary(), leveled_codec:term_expression()}. +-type combo_fun() + :: fun((list(sets:set(leveled_codec:key()))) + -> sets:set(leveled_codec:key())). + +-spec book_multiindexfold( + pid(), + leveled_codec:key(), + { + fun((leveled_codec:key(), leveled_codec:key(), term()) -> term()), + term() + }, + list({non_neg_integer(), query()}), + combo_fun()) + -> {async, fun(() -> term())}. +book_multiindexfold(Pid, Bucket, FoldAccT, Queries, ComboFun) -> + RunnerType = + {multi_index_query, Bucket, FoldAccT, Queries, ComboFun}, + book_returnfolder(Pid, RunnerType). %% @doc list buckets. Folds over the ledger only. Given a `Tag' folds %% over the keyspace calling `FoldFun' from `FoldAccT' for each @@ -840,7 +862,7 @@ book_keylist(Pid, Tag, Bucket, KeyRange, FoldAccT) -> StartKey :: Key, EndKey :: Key, Key :: term(), - TermRegex :: leveled_codec:regular_expression(), + TermRegex :: leveled_codec:term_expression(), Runner :: fun(() -> Acc). book_keylist(Pid, Tag, Bucket, KeyRange, FoldAccT, TermRegex) -> RunnerType = {keylist, Tag, Bucket, KeyRange, FoldAccT, TermRegex}, @@ -2029,22 +2051,53 @@ snaptype_by_presence(false) -> %% Get an {async, Runner} for a given fold type. Fold types have different %% tuple inputs get_runner(State, {index_query, Constraint, FoldAccT, Range, TermHandling}) -> - {IdxFld, StartT, EndT} = Range, - {Bucket, ObjKey0} = - case Constraint of - {B, SK} -> - {B, SK}; - B -> - {B, null} - end, - StartKey = - leveled_codec:to_querykey(Bucket, ObjKey0, ?IDX_TAG, IdxFld, StartT), - EndKey = - leveled_codec:to_querykey(Bucket, null, ?IDX_TAG, IdxFld, EndT), + {StartKey, EndKey} = index_range(Constraint, Range), SnapFun = return_snapfun(State, ledger, {StartKey, EndKey}, false, false), - leveled_runner:index_query(SnapFun, - {StartKey, EndKey, TermHandling}, - FoldAccT); + leveled_runner:index_query( + SnapFun, {StartKey, EndKey, TermHandling}, FoldAccT); +get_runner( + State, + {multi_index_query, Bucket, FoldAccT, Queries, ComboFun}) -> + {FoldFun, InitAcc} = FoldAccT, + KeyFolder = fun(_B, K, Acc) -> [K|Acc] end, + QueryRunners = + lists:map( + fun({SetId, {IdxFld, StartTerm, EndTerm, Expr}}) -> + {SK, EK} = + index_range( + {Bucket, null}, {IdxFld, StartTerm, EndTerm}), + SnapFun = + return_snapfun(State, ledger, {SK, EK}, false, true), + {async, Runner} = + leveled_runner:index_query( + SnapFun, {SK, EK, {false, Expr}}, {KeyFolder, []} + ), + {SetId, Runner} + end, + Queries + ), + OverallRunner = + fun() -> + FinalSet = + ComboFun( + maps:from_list( + lists:map( + fun({SetId, R}) -> + case R() of + KLR when is_list(KLR) -> + {SetId, sets:from_list(KLR)} + end + end, + QueryRunners) + ) + ), + lists:foldl( + fun(K, Acc) -> FoldFun(Bucket, K, Acc) end, + InitAcc, + sets:to_list(FinalSet) + ) + end, + {async, OverallRunner}; get_runner(State, {keylist, Tag, FoldAccT}) -> SnapFun = return_snapfun(State, ledger, no_lookup, true, true), leveled_runner:bucketkey_query(SnapFun, Tag, null, FoldAccT); @@ -2054,13 +2107,14 @@ get_runner(State, {keylist, Tag, Bucket, FoldAccT}) -> get_runner(State, {keylist, Tag, Bucket, KeyRange, FoldAccT, TermRegex}) -> SnapFun = return_snapfun(State, ledger, no_lookup, true, true), leveled_runner:bucketkey_query( - SnapFun, Tag, Bucket, KeyRange, FoldAccT, TermRegex); + SnapFun, Tag, Bucket, KeyRange, FoldAccT, TermRegex); %% Set of runners for object or metadata folds -get_runner(State, - {foldheads_allkeys, - Tag, FoldFun, - JournalCheck, SnapPreFold, SegmentList, - LastModRange, MaxObjectCount}) -> +get_runner( + State, + {foldheads_allkeys, + Tag, FoldFun, + JournalCheck, SnapPreFold, SegmentList, + LastModRange, MaxObjectCount}) -> SnapType = snaptype_by_presence(JournalCheck), SnapFun = return_snapfun(State, SnapType, no_lookup, true, SnapPreFold), leveled_runner:foldheads_allkeys( @@ -2072,25 +2126,30 @@ get_runner(State, LastModRange, MaxObjectCount ); -get_runner(State, - {foldobjects_allkeys, Tag, FoldFun, SnapPreFold}) -> - get_runner(State, - {foldobjects_allkeys, Tag, FoldFun, SnapPreFold, key_order}); -get_runner(State, - {foldobjects_allkeys, Tag, FoldFun, SnapPreFold, key_order}) -> - SnapFun = return_snapfun(State, store, no_lookup, true, SnapPreFold), - leveled_runner:foldobjects_allkeys(SnapFun, Tag, FoldFun, key_order); -get_runner(State, - {foldobjects_allkeys, Tag, FoldFun, SnapPreFold, sqn_order}) -> - SnapFun = return_snapfun(State, store, undefined, true, SnapPreFold), - leveled_runner:foldobjects_allkeys(SnapFun, Tag, FoldFun, sqn_order); -get_runner(State, - {foldheads_bybucket, - Tag, - BucketList, bucket_list, - FoldFun, - JournalCheck, SnapPreFold, - SegmentList, LastModRange, MaxObjectCount}) -> +get_runner(State, {foldobjects_allkeys, Tag, FoldFun, SnapPreFold}) -> + get_runner( + State, {foldobjects_allkeys, Tag, FoldFun, SnapPreFold, key_order}); +get_runner(State, {foldobjects_allkeys, Tag, FoldFun, SnapPreFold, Order}) -> + case Order of + key_order -> + SnapFun = + return_snapfun(State, store, no_lookup, true, SnapPreFold), + leveled_runner:foldobjects_allkeys( + SnapFun, Tag, FoldFun, key_order); + sqn_order -> + SnapFun = + return_snapfun(State, store, undefined, true, SnapPreFold), + leveled_runner:foldobjects_allkeys( + SnapFun, Tag, FoldFun, sqn_order) + end; +get_runner( + State, + {foldheads_bybucket, + Tag, + BucketList, bucket_list, + FoldFun, + JournalCheck, SnapPreFold, + SegmentList, LastModRange, MaxObjectCount}) -> KeyRangeFun = fun(Bucket) -> {StartKey, EndKey, _} = return_ledger_keyrange(Tag, Bucket, all), @@ -2108,18 +2167,19 @@ get_runner(State, LastModRange, MaxObjectCount ); -get_runner(State, - {foldheads_bybucket, - Tag, - Bucket, KeyRange, - FoldFun, - JournalCheck, SnapPreFold, - SegmentList, LastModRange, MaxObjectCount}) -> +get_runner( + State, + {foldheads_bybucket, + Tag, + Bucket, KeyRange, + FoldFun, + JournalCheck, SnapPreFold, + SegmentList, LastModRange, MaxObjectCount}) -> {StartKey, EndKey, SnapQ} = return_ledger_keyrange(Tag, Bucket, KeyRange), SnapType = snaptype_by_presence(JournalCheck), SnapFun = return_snapfun(State, SnapType, SnapQ, true, SnapPreFold), leveled_runner:foldheads_bybucket( - SnapFun, + SnapFun, Tag, [{StartKey, EndKey}], FoldFun, @@ -2128,23 +2188,25 @@ get_runner(State, LastModRange, MaxObjectCount ); -get_runner(State, - {foldobjects_bybucket, - Tag, Bucket, KeyRange, - FoldFun, - SnapPreFold}) -> +get_runner( + State, + {foldobjects_bybucket, + Tag, Bucket, KeyRange, + FoldFun, + SnapPreFold}) -> {StartKey, EndKey, SnapQ} = return_ledger_keyrange(Tag, Bucket, KeyRange), SnapFun = return_snapfun(State, store, SnapQ, true, SnapPreFold), leveled_runner:foldobjects_bybucket( - SnapFun, Tag, [{StartKey, EndKey}], FoldFun); -get_runner(State, - {foldobjects_byindex, - Tag, Bucket, {Field, FromTerm, ToTerm}, - FoldObjectsFun, - SnapPreFold}) -> + SnapFun, Tag, [{StartKey, EndKey}], FoldFun); +get_runner( + State, + {foldobjects_byindex, + Tag, Bucket, {Field, FromTerm, ToTerm}, + FoldObjectsFun, + SnapPreFold}) -> SnapFun = return_snapfun(State, store, no_lookup, true, SnapPreFold), leveled_runner:foldobjects_byindex( - SnapFun, {Tag, Bucket, Field, FromTerm, ToTerm},FoldObjectsFun); + SnapFun, {Tag, Bucket, Field, FromTerm, ToTerm}, FoldObjectsFun); get_runner(State, {bucket_list, Tag, FoldAccT}) -> {FoldBucketsFun, Acc} = FoldAccT, SnapFun = return_snapfun(State, ledger, no_lookup, false, false), @@ -2158,6 +2220,21 @@ get_runner(State, DeprecatedQuery) -> get_deprecatedrunner(State, DeprecatedQuery). +index_range(Constraint, Range) -> + {IdxFld, StartT, EndT} = Range, + {Bucket, ObjKey0} = + case Constraint of + {B, SK} -> + {B, SK}; + B -> + {B, null} + end, + StartKey = + leveled_codec:to_querykey(Bucket, ObjKey0, ?IDX_TAG, IdxFld, StartT), + EndKey = + leveled_codec:to_querykey(Bucket, null, ?IDX_TAG, IdxFld, EndT), + {StartKey, EndKey}. + -spec get_deprecatedrunner(book_state(), tuple()) -> {async, fun(() -> term())}. %% @doc @@ -2838,17 +2915,16 @@ ttl_test() -> KeyList = IndexFolder(), ?assertMatch(20, length(KeyList)), - {ok, Regex} = re:compile("f8"), + {ok, Regex} = leveled_util:regex_compile("f8"), {async, IndexFolderTR} = book_returnfolder( Bookie1, - { - index_query, - <<"Bucket">>, - {FoldKeysFun, []}, - {<<"idx1_bin">>, <<"f8">>, <<"f9">>}, - {true, Regex}} - ), + {index_query, + <<"Bucket">>, + {FoldKeysFun, []}, + {<<"idx1_bin">>, <<"f8">>, <<"f9">>}, + {true, Regex}} + ), TermKeyList = IndexFolderTR(), ?assertMatch(10, length(TermKeyList)), diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index ead95a41..354520a4 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -130,10 +130,17 @@ {index_specs(), infinity|integer()}. % {KeyChanges, TTL} -type maybe_lookup() :: lookup|no_lookup. --type regular_expression() :: - {re_pattern, term(), term(), term(), term()}|undefined. - % first element must be re_pattern, but tuple may change legnth with - % versions +-type actual_regex() :: + {re_pattern, term(), term(), term(), term()}. +-type capture_value() :: binary()|integer(). +-type query_filter_fun() :: + fun((#{binary() => capture_value()}) -> boolean()). +-type query_eval_fun() :: + fun((binary(), binary()) -> #{binary() => capture_value()}). +-type query_expression() :: + {query, query_eval_fun(), query_filter_fun()}. +-type term_expression() :: + actual_regex()|undefined|query_expression(). -type value_fetcher() :: {fun((pid(), leveled_codec:journal_key()) -> any()), @@ -176,7 +183,8 @@ maybe_lookup/0, last_moddate/0, lastmod_range/0, - regular_expression/0, + term_expression/0, + actual_regex/0, value_fetcher/0, proxy_object/0, slimmed_key/0, @@ -311,7 +319,7 @@ maybe_accumulate( maybe_accumulate(T, Acc, Count, Filter, AccFun). -spec accumulate_index( - {boolean(), undefined|leveled_runner:mp()}, + {boolean()|binary(), term_expression()}, leveled_runner:fold_keys_fun()) -> leveled_penciller:pclacc_fun(). accumulate_index({false, undefined}, FoldKeysFun) -> @@ -326,11 +334,21 @@ accumulate_index({true, undefined}, FoldKeysFun) -> when IdxValue =/= null, ObjKey =/= null -> FoldKeysFun(Bucket, {IdxValue, ObjKey}, Acc) end; +accumulate_index( + {AddTerm, {query, EvalFun, FilterFun}}, FoldKeysFun) -> + fun({?IDX_TAG, Bucket, {_IdxFld, IdxValue}, ObjKey}, _Value, Acc) + when is_binary(ObjKey) -> + CptMap = EvalFun(IdxValue, ObjKey), + check_captured_terms( + CptMap, + FilterFun, AddTerm, FoldKeysFun, + Bucket, IdxValue, ObjKey, + Acc) + end; accumulate_index({AddTerm, TermRegex}, FoldKeysFun) -> - fun( - {?IDX_TAG, Bucket, {_IdxFld, IdxValue}, ObjKey}, _Value, Acc) - when IdxValue =/= null, ObjKey =/= null -> - case re:run(IdxValue, TermRegex) of + fun({?IDX_TAG, Bucket, {_IdxFld, IdxValue}, ObjKey}, _Value, Acc) + when IdxValue =/= null, ObjKey =/= null, ?IS_DEF(TermRegex) -> + case leveled_util:regex_run(IdxValue, TermRegex, []) of nomatch -> Acc; _ -> @@ -343,6 +361,29 @@ accumulate_index({AddTerm, TermRegex}, FoldKeysFun) -> end end. +check_captured_terms( + CptMap, FilterFun, AddTerm, FoldKeysFun, B, IdxValue, ObjKey, Acc) -> + case FilterFun(CptMap) of + true -> + case AddTerm of + true -> + FoldKeysFun(B, {IdxValue, ObjKey}, Acc); + false -> + FoldKeysFun(B, ObjKey, Acc); + CptKey when is_binary(CptKey) -> + case maps:get(CptKey, CptMap, undefined) of + undefined -> + Acc; + CptValue -> + FoldKeysFun(B, {CptValue, ObjKey}, Acc) + end + end; + false -> + Acc + end. + + + -spec key_dominates(ledger_kv(), ledger_kv()) -> boolean(). %% @doc %% When comparing two keys in the ledger need to find if one key comes before diff --git a/src/leveled_eval.erl b/src/leveled_eval.erl new file mode 100644 index 00000000..7186055b --- /dev/null +++ b/src/leveled_eval.erl @@ -0,0 +1,1029 @@ +%% -------- Eval Functions --------- +%% +%% Support for different eval expressions within leveled +%% + +-module(leveled_eval). + +-export([generate_eval_function/2]). + +%%%============================================================================ +%%% External API +%%%============================================================================ + +-spec generate_eval_function( + string(), + map()) -> fun((binary(), binary()) -> map())|{error, term()}. +generate_eval_function(EvalString, Substitutions) -> + try + {ok, ParsedEval} = generate_eval_expression(EvalString, Substitutions), + fun(Term, Key) -> + apply_eval(ParsedEval, Term, Key, maps:new()) + end + catch + error:{badmatch, {error, Error, _LN}} -> + {error, Error}; + error:{badmatch, {error, Error}} -> + {error, Error} + end. + +%%%============================================================================ +%%% Internal functions +%%%============================================================================ + +generate_eval_expression(EvalString, Substitutions) -> + CodePointList = unicode:characters_to_list(EvalString), + {ok, Tokens, _EndLine} = leveled_evallexer:string(CodePointList), + case leveled_filter:substitute_items(Tokens, Substitutions, []) of + {error, Error} -> + {error, Error}; + UpdTokens -> + leveled_evalparser:parse(UpdTokens) + end. + +apply_eval({eval, Eval}, Term, Key, AttrMap) -> + apply_eval(Eval, Term, Key, AttrMap); +apply_eval({'PIPE', Eval1, 'INTO', Eval2}, Term, Key, AttrMap) -> + apply_eval(Eval2, Term, Key, apply_eval(Eval1, Term, Key, AttrMap)); +apply_eval({ + delim, {identifier, _, InKey}, {string, _, Delim}, ExpKeys}, + Term, Key, AttrMap) -> + case term_to_process(InKey, Term, Key, AttrMap) of + TermToSplit when is_binary(TermToSplit) -> + CP = + case get({compile_pattern, Delim}) of + undefined -> + NewDelimCP = compile_delim(Delim), + put({compile_pattern, Delim}, NewDelimCP), + NewDelimCP; + DelimCP -> + DelimCP + end, + delim(TermToSplit, CP, AttrMap, ExpKeys); + _ -> + AttrMap + end; +apply_eval( + {join, InKeys, {string, _, Delim}, {identifier, _, OutKey}}, + _Term, _Key, AttrMap) -> + NewTerm = + unicode:characters_to_binary( + lists:join( + Delim, + lists:filter( + fun(V) -> is_binary(V) end, + lists:map( + fun(InKey) -> maps:get(InKey, AttrMap, <<"">>) end, + InKeys + ) + ) + ) + ), + maps:put(OutKey, NewTerm, AttrMap); +apply_eval( + { + split, + {identifier, _, InKey}, + {string, _, Splitter}, + {identifier, _, OutKey} + }, + Term, Key, AttrMap) -> + case term_to_process(InKey, Term, Key, AttrMap) of + TermToSplit when is_binary(TermToSplit) -> + CP = + case get({compile_pattern, Splitter}) of + undefined -> + NewSplitCP = compile_delim(Splitter), + put({compile_pattern, Splitter}, NewSplitCP), + NewSplitCP; + SplitCP -> + SplitCP + end, + TermList = binary:split(TermToSplit, CP, [global, trim_all]), + maps:put(OutKey, TermList, AttrMap); + _ -> + AttrMap + end; +apply_eval( + {slice, {identifier, _, InKey}, WidthAttr, {identifier, _, OutKey}}, + Term, Key, AttrMap) -> + Width = element(3, WidthAttr), + case term_to_process(InKey, Term, Key, AttrMap) of + TermToSlice when is_binary(TermToSlice) -> + TermCount = string:length(TermToSlice) div Width, + TermList = + lists:map( + fun(S) -> string:slice(TermToSlice, S, Width) end, + lists:map( + fun(I) -> Width * I end, + lists:seq(0, TermCount - 1))), + maps:put(OutKey, TermList, AttrMap); + _ -> + AttrMap + end; +apply_eval( + {index, + {identifier, _, InKey}, + StartAtr, LengthAttr, + {identifier, _, OutKey}}, + Term, Key, AttrMap) -> + Start = element(3, StartAtr), + Length = element(3, LengthAttr), + case term_to_process(InKey, Term, Key, AttrMap) of + TermToIndex when is_binary(TermToIndex) -> + case string:length(TermToIndex) of + L when L >= (Start + Length) -> + maps:put( + OutKey, + string:slice(TermToIndex, Start, Length), + AttrMap + ); + _ -> + AttrMap + end; + _ -> + AttrMap + end; +apply_eval( + {kvsplit, + {identifier, _, InKey}, + {string, _, DelimPair}, {string, _, DelimKV}}, + Term, Key, AttrMap) -> + case term_to_process(InKey, Term, Key, AttrMap) of + TermToSplit when is_binary(TermToSplit) -> + lists:foldl( + fun(S, AccMap) -> + case string:split(S, DelimKV, all) of + [K, V] -> + maps:put(K, V, AccMap); + _ -> + AccMap + end + end, + AttrMap, + string:split(TermToSplit, DelimPair, all) + ); + _ -> + AttrMap + end; +apply_eval( + {to_integer, {identifier, _, InKey}, {identifier, _, OutKey}}, + Term, Key, AttrMap) -> + case term_to_process(InKey, Term, Key, AttrMap) of + TermToConvert when is_binary(TermToConvert) -> + case string:to_integer(TermToConvert) of + {I, _Rest} when is_integer(I) -> + maps:put(OutKey, I, AttrMap); + _ -> + AttrMap + end; + AlreadyInteger when is_integer(AlreadyInteger) -> + maps:put(OutKey, AlreadyInteger, AttrMap); + _ -> + AttrMap + end; +apply_eval( + {to_string, {identifier, _, InKey}, {identifier, _, OutKey}}, + Term, Key, AttrMap) -> + case term_to_process(InKey, Term, Key, AttrMap) of + TermToConvert when is_integer(TermToConvert) -> + maps:put( + OutKey, + list_to_binary(integer_to_list(TermToConvert)), + AttrMap + ); + AlreadyString when is_binary(AlreadyString) -> + maps:put(OutKey, AlreadyString, AttrMap); + _ -> + AttrMap + end; +apply_eval( + {map, InID, Comparator, MapList, Default, OutID}, + Term, Key, AttrMap) -> + {identifier, _, InKey} = InID, + {identifier, _, OutKey} = OutID, + TermToCompare = term_to_process(InKey, Term, Key, AttrMap), + F = reverse_compare_mapping(element(2, Comparator), TermToCompare), + case lists:dropwhile(F, MapList) of + [] -> + maps:put(OutKey, element(3, Default), AttrMap); + [{mapping, _T, Assignment}|_Rest] -> + maps:put(OutKey, element(3, Assignment), AttrMap) + end; +apply_eval( + {MathOp, OperandX, OperandY, {identifier, _, OutKey}}, + _Term, _Key, AttrMap) + when MathOp == add; MathOp == subtract -> + X = maybe_fetch_operand(OperandX, AttrMap), + Y = maybe_fetch_operand(OperandY, AttrMap), + case MathOp of + add when is_integer(X), is_integer(Y) -> + maps:put(OutKey, X + Y, AttrMap); + subtract when is_integer(X), is_integer(Y) -> + maps:put(OutKey, X - Y, AttrMap); + _ -> + AttrMap + end; +apply_eval( + {regex, {identifier, _, InKey}, CompiledRE, ExpKeys}, + Term, Key, AttrMap) -> + ExpectedKeyLength = length(ExpKeys), + Opts = [{capture, all_but_first, binary}], + case term_to_process(InKey, Term, Key, AttrMap) of + TermToCapture when is_binary(TermToCapture)-> + case leveled_util:regex_run(TermToCapture, CompiledRE, Opts) of + {match, CptTerms} when length(CptTerms) == ExpectedKeyLength -> + CptMap = maps:from_list(lists:zip(ExpKeys, CptTerms)), + maps:merge(AttrMap, CptMap); + _ -> + AttrMap + end; + _ -> + AttrMap + end. + +maybe_fetch_operand({identifier, _, ID}, AttrMap) -> + maps:get(ID, AttrMap, 0); +maybe_fetch_operand(Op, _AttrMap) -> + element(3, Op). + +term_to_process(<<"term">>, Term, _Key, _AttrMap) -> + Term; +term_to_process(<<"key">>, _Term, Key, _AttrMap) -> + Key; +term_to_process(AttrKey, _Term, _Key, AttrMap) -> + maps:get(AttrKey, AttrMap, not_found). + +reverse_compare_mapping('<', Term) -> + fun({mapping, T, _A}) -> Term >= element(3, T) end; +reverse_compare_mapping('<=', Term) -> + fun({mapping, T, _A}) -> Term > element(3, T) end; +reverse_compare_mapping('>', Term) -> + fun({mapping, T, _A}) -> Term =< element(3, T) end; +reverse_compare_mapping('>=', Term) -> + fun({mapping, T, _A}) -> Term < element(3, T) end; +reverse_compare_mapping('=', Term) -> + fun({mapping, T, _A}) -> Term =/= element(3, T) end. + +-spec delim(binary(), binary:cp(), map(), list(string())) -> map(). +delim(_Rem, _CP, AttrMap, []) -> + AttrMap; +delim(Term, CP, AttrMap, [Key|Rest]) -> + case binary:match(Term, CP) of + nomatch -> + maps:put(Key, Term, AttrMap); + {0, Length} -> + <<_Delim:Length/binary, Rem/binary>> = Term, + delim(Rem, CP, AttrMap, Rest); + {Pos, Length} -> + <> = Term, + delim( + Rem, + CP, + maps:put(Key, Part, AttrMap), + Rest + ) + end. + +-spec compile_delim(string()) -> binary:cp(). +compile_delim(Delim) -> + case unicode:characters_to_binary(Delim) of + DelimBin when is_binary(DelimBin) -> + binary:compile_pattern(DelimBin) + end. + +%%%============================================================================ +%%% Test +%%%============================================================================ + +-ifdef(TEST). + +-include_lib("eunit/include/eunit.hrl"). + +delim_test() -> + Term1 = <<"SOMEONE|19901223|20240405|TedBob|LS1_4BT">>, + Delim = "|", + CompiledDelim = compile_delim(Delim), + Result1 = + delim( + Term1, + CompiledDelim, + #{}, + ["$fn", "$dob", "$dod", "$gns", "$pcs"] + ), + ExpMap1 = + #{ + "$fn" => <<"SOMEONE">>, + "$dob" => <<"19901223">>, + "$dod" => <<"20240405">>, + "$gns" => <<"TedBob">>, + "$pcs" => <<"LS1_4BT">> + }, + ExpResult1 = lists:sort(maps:to_list(ExpMap1)), + ?assertMatch( + ExpResult1, + lists:sort(maps:to_list(Result1)) + ), + Term2 = <<"SOMEONE|19901223|20240405|TedBob">>, + Result2 = + delim( + Term2, + CompiledDelim, + #{}, + ["$fn", "$dob", "$dod", "$gns", "$pcs"] + ), + ExpMap2 = + #{ + "$fn" => <<"SOMEONE">>, + "$dob" => <<"19901223">>, + "$dod" => <<"20240405">>, + "$gns" => <<"TedBob">> + }, + ExpResult2 = lists:sort(maps:to_list(ExpMap2)), + ?assertMatch( + ExpResult2, + lists:sort(maps:to_list(Result2)) + ), + Term3 = <<"SOMEONE|19901223||TedBob">>, + Result3 = + delim( + Term3, + CompiledDelim, + #{}, + ["$fn", "$dob", "$dod", "$gns", "$pcs"] + ), + ExpMap3 = + #{ + "$fn" => <<"SOMEONE">>, + "$dob" => <<"19901223">>, + "$gns" => <<"TedBob">> + }, + ExpResult3 = lists:sort(maps:to_list(ExpMap3)), + ?assertMatch( + ExpResult3, + lists:sort(maps:to_list(Result3)) + ), + Term4 = <<"SOMEONE|19901223|20240405|TedBob|LS1_4BT|">>, + Result4 = + delim( + Term4, + CompiledDelim, + #{}, + ["$fn", "$dob", "$dod", "$gns", "$pcs"] + ), + ?assertMatch( + ExpResult1, + lists:sort(maps:to_list(Result4)) + ) + . + +basic_compile_pattern_test() -> + % Check nothing happens unexpected with caching in process dictionary + EvalString1 = "delim($term, :delim1, ($fn, $dob, $dod, $gns, $pcs))", + EvalString2 = "split($gns, :delim2, $gnl)", + T1 = <<"SOMEONE|19901223|20240405|#Ted#Bob|LS1_4BT">>, + Fun1 = + generate_eval_function( + EvalString1 ++ "|" ++ EvalString2, + #{<<"delim1">> => <<"|">>, <<"delim2">> => <<"#">>} + ), + true = is_function(Fun1, 2), + + M1 = Fun1(T1, <<"K1">>), + GNL1 = maps:get(<<"gnl">>, M1), + ?assertMatch([<<"Ted">>, <<"Bob">>], GNL1), + T2 = <<"SOMEONE#19901223#20240405#|Ted|Bob#LS1_4BT">>, + Fun2 = + generate_eval_function( + EvalString1 ++ "|" ++ EvalString2, + #{<<"delim1">> => <<"#">>, <<"delim2">> => <<"|">>} + ), + true = is_function(Fun2, 2), + + M2 = Fun2(T2, <<"K1">>), + GNL2 = maps:get(<<"gnl">>, M2), + ?assertMatch([<<"Ted">>, <<"Bob">>], GNL2), + M3 = Fun2(T2, <<"K1">>), + GNL3 = maps:get(<<"gnl">>, M3), + ?assertMatch([<<"Ted">>, <<"Bob">>], GNL3), + M4 = Fun1(T1, <<"K1">>), + GNL4 = maps:get(<<"gnl">>, M4), + ?assertMatch([<<"Ted">>, <<"Bob">>], GNL4) + . + + +parse_error_test() -> + Q1 = "delm($term, \"|\", ($fn, $dob, $dod, $gns, $pcs))", + Q2 = "delim($term, \"|\", ($fn, $dob, $dod, $gns, pcs))", + Q3 = "delim($term, \"|\", ($fn, $dob, $dod, $gns, $pcs)))", + Q4 = "delim($term, $fn, ($fn, $dob, $dod, $gns, $pcs))", + ?assertMatch({error, _E1}, generate_eval_function(Q1, maps:new())), + ?assertMatch({error, _E2}, generate_eval_function(Q2, maps:new())), + ?assertMatch({error, _E3}, generate_eval_function(Q3, maps:new())), + ?assertMatch({error, _E4}, generate_eval_function(Q4, maps:new())), + + Q5 = "begins_with($fn, :prefix)", + ?assertMatch( + {error, _E5A}, + generate_eval_function(Q5, #{"prefix" => <<"ÅßE"/utf8>>}) + ), + ?assertMatch( + {error, _E5B}, + generate_eval_function(Q5, #{<<"prefx">> => <<"ÅßE"/utf8>>}) + ). + + +basic_test() -> + EvalString1 = "delim($term, \"|\", ($fn, $dob, $dod, $gns, $pcs))", + EvalString2 = "delim($gns, \"#\", ($gn1, $gn2, $gn3))", + + EvalString3 = EvalString1 ++ " | " ++ EvalString2, + {ok, Tokens3, _EndLine3} = leveled_evallexer:string(EvalString3), + {ok, ParsedExp3} = leveled_evalparser:parse(Tokens3), + EvalOut3 = + apply_eval( + ParsedExp3, + <<"SMITH|19861216||Willow#Mia|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), + ?assertMatch(<<"SMITH">>, maps:get(<<"fn">>, EvalOut3)), + ?assertMatch(<<"19861216">>, maps:get(<<"dob">>, EvalOut3)), + ?assertMatch(undefined, maps:get(<<"dod">>, EvalOut3, undefined)), + ?assertMatch(<<"Willow#Mia">>, maps:get(<<"gns">>, EvalOut3)), + ?assertMatch(<<"LS1 4BT#LS8 1ZZ">>, maps:get(<<"pcs">>, EvalOut3)), + ?assertMatch(<<"Willow">>, maps:get(<<"gn1">>, EvalOut3)), + ?assertMatch(<<"Mia">>, maps:get(<<"gn2">>, EvalOut3)), + ?assertNot(maps:is_key(<<"gn3">>, EvalOut3)), + + + EvalString4 = EvalString3 ++ " | join(($dob, $fn), \"|\", $dobfn)", + {ok, Tokens4, _EndLine4} = leveled_evallexer:string(EvalString4), + {ok, ParsedExp4} = leveled_evalparser:parse(Tokens4), + EvalOut4 = + apply_eval( + ParsedExp4, + <<"SMITH|19861216||Willow#Mia|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), + ?assertMatch(<<"SMITH">>, maps:get(<<"fn">>, EvalOut4)), + ?assertMatch(<<"19861216">>, maps:get(<<"dob">>, EvalOut4)), + ?assertMatch(undefined, maps:get(<<"dod">>, EvalOut4, undefined)), + ?assertMatch(<<"Willow#Mia">>, maps:get(<<"gns">>, EvalOut4)), + ?assertMatch(<<"LS1 4BT#LS8 1ZZ">>, maps:get(<<"pcs">>, EvalOut4)), + ?assertMatch(<<"Willow">>, maps:get(<<"gn1">>, EvalOut4)), + ?assertMatch(<<"Mia">>, maps:get(<<"gn2">>, EvalOut4)), + ?assertNot(maps:is_key(<<"gn3">>, EvalOut4)), + ?assertMatch(<<"19861216|SMITH">>, maps:get(<<"dobfn">>, EvalOut4)), + + + EvalString5 = EvalString4 ++ " | index($dob, 0, 4, $yob) | to_integer($yob, $yob)", + {ok, Tokens5, _EndLine5} = leveled_evallexer:string(EvalString5), + {ok, ParsedExp5} = leveled_evalparser:parse(Tokens5), + EvalOut5 = + apply_eval( + ParsedExp5, + <<"SMITH|19861216||Willow#Mia|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), + ?assertMatch(<<"SMITH">>, maps:get(<<"fn">>, EvalOut5)), + ?assertMatch(<<"19861216">>, maps:get(<<"dob">>, EvalOut5)), + ?assertMatch(undefined, maps:get(<<"dod">>, EvalOut5, undefined)), + ?assertMatch(<<"Willow#Mia">>, maps:get(<<"gns">>, EvalOut5)), + ?assertMatch(<<"LS1 4BT#LS8 1ZZ">>, maps:get(<<"pcs">>, EvalOut5)), + ?assertMatch(<<"Willow">>, maps:get(<<"gn1">>, EvalOut5)), + ?assertMatch(<<"Mia">>, maps:get(<<"gn2">>, EvalOut5)), + ?assertNot(maps:is_key(<<"gn3">>, EvalOut5)), + ?assertMatch(<<"19861216|SMITH">>, maps:get(<<"dobfn">>, EvalOut5)), + ?assertMatch(1986, maps:get(<<"yob">>, EvalOut5)), + + EvalString6 = EvalString1 ++ " | slice($gns, 2, $gns)", + {ok, Tokens6, _EndLine6} = leveled_evallexer:string(EvalString6), + {ok, ParsedExp6} = leveled_evalparser:parse(Tokens6), + EvalOut6 = + apply_eval( + ParsedExp6, + <<"SMITH|19861216||MAN1Ve|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), + ?assertMatch(<<"SMITH">>, maps:get(<<"fn">>, EvalOut6)), + ?assertMatch(<<"19861216">>, maps:get(<<"dob">>, EvalOut6)), + ?assertMatch(undefined, maps:get(<<"dod">>, EvalOut6, undefined)), + ?assertMatch(<<"LS1 4BT#LS8 1ZZ">>, maps:get(<<"pcs">>, EvalOut6)), + ?assertMatch([<<"MA">>, <<"N1">>, <<"Ve">>], maps:get(<<"gns">>, EvalOut6)), + + EvalOut7 = + apply_eval( + ParsedExp6, + <<"SMITH|19861216||MAN1VeZ|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), + ?assertMatch(<<"SMITH">>, maps:get(<<"fn">>, EvalOut7)), + ?assertMatch(<<"19861216">>, maps:get(<<"dob">>, EvalOut7)), + ?assertMatch(undefined, maps:get(<<"dod">>, EvalOut7, undefined)), + ?assertMatch(<<"LS1 4BT#LS8 1ZZ">>, maps:get(<<"pcs">>, EvalOut7)), + ?assertMatch([<<"MA">>, <<"N1">>, <<"Ve">>], maps:get(<<"gns">>, EvalOut7)), + + EvalString8 = EvalString1 ++ " | split($gns, \"#\", $gns)", + {ok, Tokens8, _EndLine8} = leveled_evallexer:string(EvalString8), + {ok, ParsedExp8} = leveled_evalparser:parse(Tokens8), + EvalOut8 = + apply_eval( + ParsedExp8, + <<"SMITH|19861216||Willow#Mia#Vera|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), + ?assertMatch(<<"SMITH">>, maps:get(<<"fn">>, EvalOut8)), + ?assertMatch(<<"19861216">>, maps:get(<<"dob">>, EvalOut8)), + ?assertMatch(undefined, maps:get(<<"dod">>, EvalOut8, undefined)), + ?assertMatch(<<"LS1 4BT#LS8 1ZZ">>, maps:get(<<"pcs">>, EvalOut8)), + ?assertMatch([<<"Willow">>, <<"Mia">>, <<"Vera">>], maps:get(<<"gns">>, EvalOut8)), + + EvalString9 = + "delim($term, \"|\", ($name, $height, $weight, $pick)) |" + " to_integer($height, $height) |" + " to_integer($weight, $weight) |" + " to_integer($pick, $pick) |" + " delim($key, \"|\", ($team, $number)) |" + " index($team, 0, 9, $doh)", + {ok, Tokens9, _EndLine9} = leveled_evallexer:string(EvalString9), + {ok, ParsedExp9} = leveled_evalparser:parse(Tokens9), + EvalOut9 = + apply_eval( + ParsedExp9, + <<"WEMBANYAMA|224cm|95kg|#1">>, + <<"SPURS|00001">>, + maps:new() + ), + ?assertMatch(<<"WEMBANYAMA">>, maps:get(<<"name">>, EvalOut9)), + ?assertMatch(224, maps:get(<<"height">>, EvalOut9)), + ?assertMatch(95, maps:get(<<"weight">>, EvalOut9)), + ?assertMatch(<<"#1">>, maps:get(<<"pick">>, EvalOut9)), + % Not changes as not starting with integer + ?assertMatch(<<"SPURS">>, maps:get(<<"team">>, EvalOut9)), + ?assertMatch(<<"00001">>, maps:get(<<"number">>, EvalOut9)), + ?assertNot(maps:is_key(<<"doh">>, EvalOut9)), + + %% Age at 30 April 2024 + EvalString10 = + EvalString5 ++ + " | index($dob, 4, 4, $birthday)" + " | map($birthday, <=, ((\"0430\", 2024)), 2023, $yoc)" + " | subtract($yoc, $yob, $age)" + " | add($age, 1, $age_next)" + " | to_string($age, $age)" + , + {ok, Tokens10, _EndLine10} = leveled_evallexer:string(EvalString10), + {ok, ParsedExp10} = leveled_evalparser:parse(Tokens10), + EvalOut10A = + apply_eval( + ParsedExp10, + <<"SMITH|19861216||Willow#Mia#Vera|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), + ?assertMatch(<<"37">>, maps:get(<<"age">>, EvalOut10A)), + ?assertMatch(38, maps:get(<<"age_next">>, EvalOut10A)), + EvalOut10B = + apply_eval( + ParsedExp10, + <<"SMITH|19860216||Willow#Mia#Vera|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), + ?assertMatch(<<"38">>, maps:get(<<"age">>, EvalOut10B)), + EvalString10F = + EvalString1 ++ + " | index($dob, 0, 4, $yob)" + " | index($dob, 4, 4, $birthday)" + " | map($birthday, <=, ((\"0430\", 2024)), 2023, $yoc)" + " | subtract($yoc, $yob, $age)" + % yob has not been converted to an integer, + % so the age will not be set + " | to_string($age, $age)" + , + {ok, Tokens10F, _EndLine10F} = leveled_evallexer:string(EvalString10F), + {ok, ParsedExp10F} = leveled_evalparser:parse(Tokens10F), + EvalOut10F = + apply_eval( + ParsedExp10F, + <<"SMITH|19861216||Willow#Mia#Vera|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), + ?assertNot(maps:is_key(<<"age">>, EvalOut10F)), + + EvalString11A = + EvalString1 ++ + " | map($dob, <, " + "((\"1946\", \"Silent\"), (\"1966\", \"Boomer\")," + "(\"1980\", \"GenX\"), (\"1997\", \"Millenial\")), \"GenZ\"," + " $generation)", + {ok, Tokens11A, _EndLine11A} = leveled_evallexer:string(EvalString11A), + {ok, ParsedExp11A} = leveled_evalparser:parse(Tokens11A), + EvalOut11A = + apply_eval( + ParsedExp11A, + <<"SMITH|19861216||Willow#Mia#Vera|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), + ?assertMatch(<<"Millenial">>, maps:get(<<"generation">>, EvalOut11A)), + EvalString11B = + EvalString1 ++ + " | map($dob, <=, " + "((\"1945\", \"Silent\"), (\"1965\", \"Boomer\")," + "(\"1979\", \"GenX\"), (\"1996\", \"Millenial\")), \"GenZ\"," + " $generation)", + {ok, Tokens11B, _EndLine11B} = leveled_evallexer:string(EvalString11B), + {ok, ParsedExp11B} = leveled_evalparser:parse(Tokens11B), + EvalOut11B = + apply_eval( + ParsedExp11B, + <<"SMITH|19861216||Willow#Mia#Vera|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), + ?assertMatch(<<"Millenial">>, maps:get(<<"generation">>, EvalOut11B)), + EvalString11C = + EvalString1 ++ + " | map($dob, >, " + "((\"1996\", \"GenZ\"), (\"1979\", \"Millenial\")," + "(\"1965\", \"GenX\"), (\"1945\", \"Boomer\")), \"Silent\"," + " $generation)", + {ok, Tokens11C, _EndLine11C} = leveled_evallexer:string(EvalString11C), + {ok, ParsedExp11C} = leveled_evalparser:parse(Tokens11C), + EvalOut11C = + apply_eval( + ParsedExp11C, + <<"SMITH|19861216||Willow#Mia#Vera|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), + ?assertMatch(<<"Millenial">>, maps:get(<<"generation">>, EvalOut11C)), + EvalString11D = + EvalString1 ++ + " | map($dob, >=, " + "((\"1997\", \"GenZ\"), (\"1980\", \"Millenial\")," + "(\"1966\", \"GenX\"), (\"1946\", \"Boomer\")), \"Silent\"," + " $generation)", + {ok, Tokens11D, _EndLine11D} = leveled_evallexer:string(EvalString11D), + {ok, ParsedExp11D} = leveled_evalparser:parse(Tokens11D), + EvalOut11D = + apply_eval( + ParsedExp11D, + <<"SMITH|19861216||Willow#Mia#Vera|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), + ?assertMatch(<<"Millenial">>, maps:get(<<"generation">>, EvalOut11D)), + + EvalString12 = + "kvsplit($term, \"|\", \"=\") | index($term, 0, 12, $ts) |" + " to_integer($ts, $ts) |" + " to_integer($DEBUG, $DEBUG) |" + " to_integer($INFO, $INFO) |" + " to_integer($WARN, $WARN) |" + " to_integer($ERROR, $ERROR) |" + " to_integer($CRITICAL, $CRITICAL) |" + " add($DEBUG, $INFO, $TOTAL) |" + " add($TOTAL, $WARN, $TOTAL) |" + " add($TOTAL, $ERROR, $TOTAL) |" + " add($TOTAL, $CRITICAL, $TOTAL)" + , + {ok, Tokens12, _EndLine12} = leveled_evallexer:string(EvalString12), + {ok, ParsedExp12} = leveled_evalparser:parse(Tokens12), + EvalOut12 = + apply_eval( + ParsedExp12, + <<"063881703147|DEBUG=804|INFO=186|WARN=10">>, + <<"ABC1233">>, + maps:new() + ), + ?assertMatch(63881703147, maps:get(<<"ts">>, EvalOut12)), + ?assertMatch(1000, maps:get(<<"TOTAL">>, EvalOut12)), + ?assertNot(maps:is_key(<<"CRITICAL">>, EvalOut12)), + + EvalString13 = + "kvsplit($term, \"|\", \":\") |" + " map($cup_year, =, " + "((\"1965\", \"bad\"), (\"1970\", \"bad\"), " + "(\"1972\", \"good\"), (\"1974\", \"bad\")), " + "\"indifferent\", $cup_happy) ", + {ok, Tokens13, _EndLine13} = leveled_evallexer:string(EvalString13), + {ok, ParsedExp13} = leveled_evalparser:parse(Tokens13), + EvalOut13A = + apply_eval(ParsedExp13, <<"cup_year:1972">>, <<"ABC1">>, maps:new()), + ?assertMatch(<<"good">>, maps:get(<<"cup_happy">>, EvalOut13A)), + EvalOut13B = + apply_eval(ParsedExp13, <<"cup_year:1970">>, <<"ABC1">>, maps:new()), + ?assertMatch(<<"bad">>, maps:get(<<"cup_happy">>, EvalOut13B)), + EvalOut13C = + apply_eval(ParsedExp13, <<"cup_year:2024">>, <<"ABC1">>, maps:new()), + ?assertMatch(<<"indifferent">>, maps:get(<<"cup_happy">>, EvalOut13C)), + + ExtractRegex = + "(?P[^\\|]*)\\|(?P[0-9]{8})\\|(?P[0-9]{0,8})\\|" + "(?P[^\\|]*)\\|(?P[^\\|]*)|.", + ok = + check_regex_eval( + "regex($term, :regex, pcre, ($fn, $dob, $dod, $gns, $pcs))", + ExtractRegex + ), + ok = + check_regex_eval( + "regex($term, :regex, ($fn, $dob, $dod, $gns, $pcs))", + ExtractRegex + ) + . + +unicode_test() -> + EvalString1 = "delim($term, \"|\", ($fn, $dob, $dod, $gns, $pcs))", + EvalString2 = "delim($gns, \"#\", ($gn1, $gn2, $gn3))", + + EvalString3 = EvalString1 ++ " | " ++ EvalString2, + {ok, Tokens3, _EndLine3} = leveled_evallexer:string(EvalString3), + {ok, ParsedExp3} = leveled_evalparser:parse(Tokens3), + + EvalOutUnicode0 = + apply_eval( + ParsedExp3, + <<"ÅßERG|19861216||Willow#Mia|LS1 4BT#LS8 1ZZ"/utf8>>, + % Note index terms will have to be unicode_binary() type + % for this to work a latin-1 binary of + % <<"ÅßERG|19861216||Willow#Mia|LS1 4BT#LS8 1ZZ">> will fail to + % match - use unicode:characters_to_binary(B, latin1, utf8) to + % convert + <<"9000000001">>, + maps:new() + ), + ?assertMatch(<<"ÅßERG"/utf8>>, maps:get(<<"fn">>, EvalOutUnicode0)), + FE19 = "begins_with($fn, :prefix)", + {ok, Filter19} = + leveled_filter:generate_filter_expression( + FE19, + #{<<"prefix">> => <<"ÅßE"/utf8>>} + ), + ?assert( + leveled_filter:apply_filter( + Filter19, + EvalOutUnicode0 + ) + ), + + EvalString4 = EvalString1 ++ "| slice($gns, 2, $gns)", + {ok, Tokens4, _EndLine4} = leveled_evallexer:string(EvalString4), + {ok, ParsedExp4} = leveled_evalparser:parse(Tokens4), + EvalOutUnicode1 = + apply_eval( + ParsedExp4, + <<"ÅßERG|19861216||Åbß0Ca|LS1 4BT#LS8 1ZZ"/utf8>>, + <<"9000000001">>, + maps:new() + ), + FE20 = ":gsc_check IN $gns", + {ok, Filter20} = + leveled_filter:generate_filter_expression( + FE20, + #{<<"gsc_check">> => <<"Åb"/utf8>>} + ), + ?assert( + leveled_filter:apply_filter( + Filter20, + EvalOutUnicode1 + ) + ), + {ok, Filter21} = + leveled_filter:generate_filter_expression( + FE20, + #{<<"gsc_check">> => <<"ß0"/utf8>>} + ), + ?assert( + leveled_filter:apply_filter( + Filter21, + EvalOutUnicode1 + ) + ), + {ok, Filter22} = + leveled_filter:generate_filter_expression( + FE20, + #{<<"gsc_check">> => <<"Ca">>} + ), + ?assert( + leveled_filter:apply_filter( + Filter22, + EvalOutUnicode1 + ) + ), + {ok, Filter23} = + leveled_filter:generate_filter_expression( + FE20, + #{<<"gsc_check">> => <<"Ca"/utf8>>} + ), + ?assert( + leveled_filter:apply_filter( + Filter23, + EvalOutUnicode1 + ) + ) + . + + +check_regex_eval(EvalString14, ExtractRegex) -> + {ok, ParsedExp14} = + generate_eval_expression( + EvalString14, + #{<<"regex">> => list_to_binary(ExtractRegex)} + ), + EvalOut14 = + apply_eval( + ParsedExp14, + <<"SMITH|19861216||Willow#Mia#Vera|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), + ?assertMatch(<<"SMITH">>, maps:get(<<"fn">>, EvalOut14)), + ok. + +bad_type_test() -> + EvalString9 = + "delim($term, \"|\", ($name, $height, $weight, $pick)) |" + " to_integer($height, $height) |" + " to_integer($weight, $weight) |" + " to_integer($pick, $pick) |" + " delim($key, \"|\", ($team, $number)) |" + " index($team, 0, 9, $doh)", + {ok, Tokens9, _EndLine9} = leveled_evallexer:string(EvalString9), + {ok, ParsedExp9} = leveled_evalparser:parse(Tokens9), + EvalOut9 = + apply_eval( + ParsedExp9, + <<"WEMBANYAMA|224cm|95kg|#1">>, + <<"SPURS|00001">>, + maps:new() + ), + ?assertMatch(<<"WEMBANYAMA">>, maps:get(<<"name">>, EvalOut9)), + ?assertMatch(224, maps:get(<<"height">>, EvalOut9)), + ?assertMatch(95, maps:get(<<"weight">>, EvalOut9)), + ?assertMatch(<<"#1">>, maps:get(<<"pick">>, EvalOut9)), + % Not changes as not starting with integer + ?assertMatch(<<"SPURS">>, maps:get(<<"team">>, EvalOut9)), + ?assertMatch(<<"00001">>, maps:get(<<"number">>, EvalOut9)), + ?assertNot(maps:is_key(<<"doh">>, EvalOut9)), + + EvalStringF1 = EvalString9 ++ " | delim($height, \"|\", ($foo, $bar))", + {ok, TokensF1, _EndLineF1} = leveled_evallexer:string(EvalStringF1), + {ok, ParsedExpF1} = leveled_evalparser:parse(TokensF1), + EvalOutF1 = + apply_eval( + ParsedExpF1, + <<"WEMBANYAMA|224cm|95kg|#1">>, + <<"SPURS|00001">>, + maps:new() + ), + ?assertNot(maps:is_key(<<"foo">>, EvalOutF1)), + ?assertNot(maps:is_key(<<"bar">>, EvalOutF1)), + ?assertMatch(224, maps:get(<<"height">>, EvalOutF1)), + + EvalStringF2 = EvalString9 ++ " | split($height, \"|\", $foo)", + {ok, TokensF2, _EndLineF2} = leveled_evallexer:string(EvalStringF2), + {ok, ParsedExpF2} = leveled_evalparser:parse(TokensF2), + EvalOutF2 = + apply_eval( + ParsedExpF2, + <<"WEMBANYAMA|224cm|95kg|#1">>, + <<"SPURS|00001">>, + maps:new() + ), + ?assertNot(maps:is_key(<<"foo">>, EvalOutF2)), + ?assertMatch(224, maps:get(<<"height">>, EvalOutF2)), + + EvalStringF3 = EvalString9 ++ " | slice($height, 1, $foo)", + {ok, TokensF3, _EndLineF3} = leveled_evallexer:string(EvalStringF3), + {ok, ParsedExpF3} = leveled_evalparser:parse(TokensF3), + EvalOutF3 = + apply_eval( + ParsedExpF3, + <<"WEMBANYAMA|224cm|95kg|#1">>, + <<"SPURS|00001">>, + maps:new() + ), + ?assertNot(maps:is_key(<<"foo">>, EvalOutF3)), + ?assertMatch(224, maps:get(<<"height">>, EvalOutF3)), + + EvalStringF4 = EvalString9 ++ " | index($height, 1, 1, $foo)", + {ok, TokensF4, _EndLineF4} = leveled_evallexer:string(EvalStringF4), + {ok, ParsedExpF4} = leveled_evalparser:parse(TokensF4), + EvalOutF4 = + apply_eval( + ParsedExpF4, + <<"WEMBANYAMA|224cm|95kg|#1">>, + <<"SPURS|00001">>, + maps:new() + ), + ?assertNot(maps:is_key(<<"foo">>, EvalOutF4)), + ?assertMatch(224, maps:get(<<"height">>, EvalOutF4)), + + EvalStringF5 = EvalString9 ++ " | kvsplit($height, \"|\", \"#\")", + {ok, TokensF5, _EndLineF5} = leveled_evallexer:string(EvalStringF5), + {ok, ParsedExpF5} = leveled_evalparser:parse(TokensF5), + EvalOutF5 = + apply_eval( + ParsedExpF5, + <<"WEMBANYAMA|224cm|95kg|#1">>, + <<"SPURS|00001">>, + maps:new() + ), + ?assertNot(maps:is_key(<<"foo">>, EvalOutF5)), + ?assertMatch(224, maps:get(<<"height">>, EvalOutF5)), + + EvalStringF6 = EvalString9 ++ " | to_integer($height, $height_int)", + {ok, TokensF6, _EndLineF6} = leveled_evallexer:string(EvalStringF6), + {ok, ParsedExpF6} = leveled_evalparser:parse(TokensF6), + EvalOutF6 = + apply_eval( + ParsedExpF6, + <<"WEMBANYAMA|224cm|95kg|#1">>, + <<"SPURS|00001">>, + maps:new() + ), + ?assertMatch(224, maps:get(<<"height">>, EvalOutF6)), + ?assertMatch(224, maps:get(<<"height_int">>, EvalOutF6)), + + EvalStringF7 = EvalString9 ++ " | to_string($name, $name_str)", + {ok, TokensF7, _EndLineF7} = leveled_evallexer:string(EvalStringF7), + {ok, ParsedExpF7} = leveled_evalparser:parse(TokensF7), + EvalOutF7 = + apply_eval( + ParsedExpF7, + <<"WEMBANYAMA|224cm|95kg|#1">>, + <<"SPURS|00001">>, + maps:new() + ), + ?assertMatch(<<"WEMBANYAMA">>, maps:get(<<"name">>, EvalOutF7)), + ?assertMatch(<<"WEMBANYAMA">>, maps:get(<<"name_str">>, EvalOutF7)), + + EvalStringF8 = + EvalString9 ++ + " | regex($height, :regex, ($height_int)) |" + " to_integer($height_int, $height_int)", + + {ok, ParsedExpF8} = + generate_eval_expression( + EvalStringF8, + #{<<"regex">> => list_to_binary("(?P[0-9]+)")} + ), + EvalOutF8 = + apply_eval( + ParsedExpF8, + <<"WEMBANYAMA|224cm|95kg|#1">>, + <<"SPURS|00001">>, + maps:new() + ), + ?assertNot(maps:is_key(<<"height_int">>, EvalOutF8)), + + EvalStringF9 = + EvalString9 ++ + " | to_string($height, $height)" + " | regex($height, :regex, ($height_int)) |" + " to_integer($height_int, $height_int)", + + {ok, ParsedExpF9} = + generate_eval_expression( + EvalStringF9, + #{<<"regex">> => list_to_binary("(?P[0-9]+)")} + ), + EvalOutF9 = + apply_eval( + ParsedExpF9, + <<"WEMBANYAMA|224cm|95kg|#1">>, + <<"SPURS|00001">>, + maps:new() + ), + ?assertMatch(224, maps:get(<<"height_int">>, EvalOutF9)) + . + + +generate_test() -> + EvalString13 = + "kvsplit($term, \"|\", \":\") |" + " map($cup_year, =, " + "((\"1965\", \"bad\"), (\"1970\", \"bad\"), " + "(:clarke, \"good\"), (\"1974\", \"bad\")), " + "\"indifferent\", $cup_happy) ", + {ok, ParsedExp13} = + generate_eval_expression(EvalString13, #{<<"clarke">> => <<"1972">>}), + EvalOut13A = + apply_eval(ParsedExp13, <<"cup_year:1972">>, <<"ABC1">>, maps:new()), + ?assertMatch(<<"good">>, maps:get(<<"cup_happy">>, EvalOut13A)), + ?assertMatch( + {error, "Substitution <<\"clarke\">> not found"}, + generate_eval_expression(EvalString13, maps:new()) + ). + +-endif. \ No newline at end of file diff --git a/src/leveled_evallexer.xrl b/src/leveled_evallexer.xrl new file mode 100644 index 00000000..f0016d03 --- /dev/null +++ b/src/leveled_evallexer.xrl @@ -0,0 +1,54 @@ +%% Lexer for eval expressions + +Definitions. +WhiteSpace = ([\t\f\v\r\n\s]+) + +Rules. + +{WhiteSpace} : skip_token. + +\( : {token, {'(', TokenLine}}. +\) : {token, {')', TokenLine}}. +, : {token, {',', TokenLine}}. +\| : {token, {'PIPE', TokenLine}}. + +delim : {token, {delim, TokenLine}}. +join : {token, {join, TokenLine}}. +split : {token, {split, TokenLine}}. +slice : {token, {slice, TokenLine}}. +index : {token, {index, TokenLine}}. +kvsplit : {token, {kvsplit, TokenLine}}. +regex : {token, {regex, TokenLine}}. +to_integer : {token, {to_integer, TokenLine}}. +to_string : {token, {to_string, TokenLine}}. +add : {token, {add, TokenLine}}. +subtract : {token, {subtract, TokenLine}}. +map : {token, {map, TokenLine}}. +pcre : {token, {pcre, TokenLine}}. + += : {token, {comparator, '=', TokenLine}}. +< : {token, {comparator, '<', TokenLine}}. +> : {token, {comparator, '>', TokenLine}}. +<= : {token, {comparator, '<=', TokenLine}}. +>= : {token, {comparator, '>=', TokenLine}}. + +\$[a-zA-Z_][a-zA-Z_0-9]* : {token, {identifier, TokenLine, strip_identifier(TokenChars)}}. +\:[a-zA-Z_][a-zA-Z_0-9]* : {token, {substitution, TokenLine, strip_substitution(TokenChars)}}. +[1-9][0-9]* : {token, {pos_integer, TokenLine, list_to_integer(TokenChars)}}. +0 : {token, {zero, TokenLine, list_to_integer(TokenChars)}}. +\-[0-9]+ : {token, {neg_integer, TokenLine, list_to_integer(TokenChars)}}. +\"[^"]+\" : {token, {string, TokenLine, strip_string(TokenChars)}}. %" + +Erlang code. + +strip_string(TokenChars) -> + unicode:characters_to_binary(lists:droplast(tl(TokenChars))). + +strip_identifier(TokenChars) -> + [36|StrippedChars] = TokenChars, + unicode:characters_to_binary(StrippedChars). + +strip_substitution(TokenChars) -> + [58|StrippedChars] = TokenChars, + unicode:characters_to_binary(StrippedChars). + diff --git a/src/leveled_evalparser.yrl b/src/leveled_evalparser.yrl new file mode 100644 index 00000000..ef4c01e2 --- /dev/null +++ b/src/leveled_evalparser.yrl @@ -0,0 +1,84 @@ +%% Grammar for eval expressions + +Nonterminals +top_level eval +operand math_operand +integer non_neg_integer +regex_method +mapping mappings mappings_list +identifiers identifier_list. + +Terminals +'(' ')' ',' +identifier string +pos_integer neg_integer zero +comparator +'PIPE' +delim join split slice index kvsplit regex map +add subtract +to_integer to_string +pcre. + +Rootsymbol top_level. + +top_level -> eval: {eval, '$1'}. + +eval -> eval 'PIPE' eval : {'PIPE', '$1', 'INTO', '$3'}. +eval -> delim '(' identifier ',' string ',' identifier_list ')' : {delim, '$3', '$5', '$7'}. +eval -> join '(' identifier_list ',' string ',' identifier ')' : {join, '$3', '$5', '$7'}. +eval -> split '(' identifier ',' string ',' identifier ')' : {split, '$3', '$5', '$7'}. +eval -> slice '(' identifier ',' pos_integer ',' identifier ')' : {slice, '$3', '$5', '$7'}. +eval -> index '(' identifier ',' non_neg_integer ',' pos_integer ',' 'identifier' ')' : {index, '$3', '$5', '$7', '$9'}. +eval -> kvsplit '(' identifier ',' string ',' string ')' : {kvsplit, '$3', '$5', '$7'}. +eval -> regex '(' identifier ',' string ',' regex_method ',' identifier_list ')' : {regex, '$3', re_compile('$5', '$7'), '$9'}. +eval -> regex '(' identifier ',' string ',' identifier_list ')' : {regex, '$3', re_compile('$5'), '$7'}. +eval -> map '(' identifier ',' comparator ',' mappings_list ',' operand ',' identifier ')' : {map, '$3', '$5', '$7', '$9', '$11'}. +eval -> to_integer '(' identifier ',' identifier ')' : {to_integer, '$3', '$5'}. +eval -> to_string '(' identifier ',' identifier ')' : {to_string, '$3', '$5'}. +eval -> subtract '(' math_operand ',' math_operand ',' identifier ')' : {subtract, '$3', '$5', '$7'}. +eval -> add '(' math_operand ',' math_operand ',' identifier ')' : {add, '$3', '$5', '$7'}. + +mappings_list -> '(' mappings ')' : '$2'. + +mappings -> mapping ',' mappings : ['$1' | '$3']. +mappings -> mapping : ['$1']. + +mapping -> '(' operand ',' operand ')' : {mapping, '$2', '$4'}. + +non_neg_integer -> pos_integer : '$1'. +non_neg_integer -> zero : '$1'. + +integer -> non_neg_integer : '$1'. +integer -> neg_integer : '$1'. + +operand -> string : '$1'. +operand -> integer : '$1'. + +math_operand -> integer : '$1'. +math_operand -> identifier : '$1'. + +regex_method -> pcre : '$1'. + +identifier_list -> '(' identifiers ')' : strip_ids('$2'). + +identifiers -> identifier ',' identifiers : ['$1' | '$3']. +identifiers -> identifier : ['$1']. + +Endsymbol '$end'. + +Right 100 'PIPE'. + +Erlang code. + +strip_ids(IDL) -> + lists:map( + fun(ID) -> element(3, ID) end, + lists:flatten(IDL) + ). + +re_compile(RegexStr) -> + re_compile(RegexStr, {pcre, element(2, RegexStr)}). + +re_compile({string, _LN, Regex}, Method) -> + {ok, CRE} = leveled_util:regex_compile(Regex, element(1, Method)), + CRE. \ No newline at end of file diff --git a/src/leveled_filter.erl b/src/leveled_filter.erl new file mode 100644 index 00000000..0b38d3b3 --- /dev/null +++ b/src/leveled_filter.erl @@ -0,0 +1,710 @@ +%% -------- Filter Functions --------- +%% +%% Support for different filter expressions within leveled +%% + +-module(leveled_filter). + +-export( + [ + generate_filter_function/2, + generate_filter_expression/2, + apply_filter/2, + substitute_items/3 + ]). + +%%%============================================================================ +%%% External API +%%%============================================================================ + +-spec generate_filter_function( + string(), map()) -> fun((map()) -> boolean())|{error, term()}. +generate_filter_function(FilterString, Substitutions) -> + try + {ok, ParsedFilter} = + generate_filter_expression(FilterString, Substitutions), + fun(AttrMap) -> + apply_filter(ParsedFilter, AttrMap) + end + catch + error:{badmatch, {error, Error, _LN}} -> + {error, Error}; + error:{badmatch, {error, Error}} -> + {error, Error} + end. + + +%%%============================================================================ +%%% Internal functions +%%%============================================================================ + +apply_filter({condition, Condition}, AttrMap) -> + apply_filter(Condition, AttrMap); +apply_filter({'OR', P1, P2}, AttrMap) -> + apply_filter(P1, AttrMap) orelse apply_filter(P2, AttrMap); +apply_filter({'AND', P1, P2}, AttrMap) -> + apply_filter(P1, AttrMap) andalso apply_filter(P2, AttrMap); +apply_filter({'NOT', P1}, AttrMap) -> + not apply_filter(P1, AttrMap); +apply_filter({'BETWEEN', {identifier, _, ID}, CmpA, CmpB}, AttrMap) -> + case maps:get(ID, AttrMap, notfound) of + V when is_integer(V) -> + apply_filter({'BETWEEN', {integer, 0, V}, CmpA, CmpB}, AttrMap); + V when is_binary(V) -> + apply_filter({'BETWEEN', {string, 0, V}, CmpA, CmpB}, AttrMap); + _ -> + false + end; +apply_filter( + {'BETWEEN', {Type, _, V0}, {Type, _, VL}, {Type, _, VH}}, _) + when VL =< VH -> + V0 >= VL andalso V0 =< VH; +apply_filter( + {'BETWEEN', {integer, TL0, I0}, {identifier, _, ID}, CmpB}, AttrMap) -> + case maps:get(ID, AttrMap, notfound) of + V when is_integer(V) -> + apply_filter( + {'BETWEEN', {integer, TL0, I0}, {integer, 0, V}, CmpB}, + AttrMap + ); + _ -> + false + end; +apply_filter( + {'BETWEEN', + {integer, TL0, I0}, {integer, TLL, IL}, {identifier, _, ID} + }, + AttrMap) -> + case maps:get(ID, AttrMap, notfound) of + V when is_integer(V) -> + apply_filter( + {'BETWEEN', + {integer, TL0, I0}, {integer, TLL, IL}, {integer, 0, V} + }, + AttrMap + ); + _ -> + false + end; +apply_filter( + {'BETWEEN', {string, TL0, S0}, {identifier, _, ID}, CmpB}, AttrMap) -> + case maps:get(ID, AttrMap, notfound) of + V when is_binary(V) -> + apply_filter( + {'BETWEEN', {string, TL0, S0}, {string, 0, V}, CmpB}, AttrMap); + _ -> + false + end; +apply_filter( + {'BETWEEN', + {string, TL0, S0}, {string, TLL, SL}, {identifier, _, ID} + }, + AttrMap) -> + case maps:get(ID, AttrMap, notfound) of + V when is_binary(V) -> + apply_filter( + {'BETWEEN', + {string, TL0, S0}, {string, TLL, SL}, {string, 0, V} + }, + AttrMap + ); + _ -> + false + end; +apply_filter({'BETWEEN', _, _, _}, _) -> + false; +apply_filter({'IN', {string, _, TestString}, {identifier, _, ID}}, AttrMap) -> + case maps:get(ID, AttrMap, notfound) of + CheckList when is_list(CheckList) -> + lists:member(TestString, CheckList); + _ -> + false + end; +apply_filter( + {'IN', {identifier, _, ID}, CheckList}, AttrMap) + when is_list(CheckList) -> + case maps:get(ID, AttrMap, notfound) of + notfound -> + false; + V -> + lists:member(V, lists:map(fun(C) -> element(3, C) end, CheckList)) + end; +apply_filter({{comparator, Cmp, TLC}, {identifier, _ , ID}, CmpB}, AttrMap) -> + case maps:get(ID, AttrMap, notfound) of + notfound -> + false; + V when is_integer(V) -> + apply_filter( + {{comparator, Cmp, TLC}, {integer, 0, V}, CmpB}, AttrMap + ); + V when is_binary(V) -> + apply_filter( + {{comparator, Cmp, TLC}, {string, 0, V}, CmpB}, AttrMap + ) + end; +apply_filter({{comparator, Cmp, TLC}, CmpA, {identifier, _, ID}}, AttrMap) -> + case maps:get(ID, AttrMap, notfound) of + notfound -> + false; + V when is_integer(V) -> + apply_filter( + {{comparator, Cmp, TLC}, CmpA, {integer, 0, V}}, AttrMap + ); + V when is_binary(V) -> + apply_filter( + {{comparator, Cmp, TLC}, CmpA, {string, 0, V}}, AttrMap + ) + end; +apply_filter({{comparator, Cmp, _}, {Type, _, TL}, {Type, _, TR}}, _AttrMap) -> + compare(Cmp, TL, TR); +apply_filter({{comparator, _, _}, _, _}, _AttrMap) -> + false; +apply_filter({contains, {identifier, _, ID}, {string, _ , SubStr}}, AttrMap) -> + case maps:get(ID, AttrMap, notfound) of + V when is_binary(V) -> + case string:find(V, SubStr) of + nomatch -> + false; + _ -> + true + end; + _ -> + false + end; +apply_filter( + {begins_with, {identifier, _, ID}, {string, _ , SubStr}}, AttrMap) -> + case maps:get(ID, AttrMap, notfound) of + V when is_binary(V) -> + case string:prefix(V, SubStr) of + nomatch -> + false; + _ -> + true + end; + _ -> + false + end; +apply_filter( + {ends_with, {identifier, _, ID}, {string, _ , SubStr}}, AttrMap) -> +case maps:get(ID, AttrMap, notfound) of + V when is_binary(V) -> + case string:prefix(string:reverse(V), string:reverse(SubStr)) of + nomatch -> + false; + _ -> + true + end; + _ -> + false +end; +apply_filter({attribute_exists, {identifier, _, ID}}, AttrMap) -> + maps:is_key(ID, AttrMap); +apply_filter({attribute_not_exists, {identifier, _, ID}}, AttrMap) -> + not maps:is_key(ID, AttrMap); +apply_filter({attribute_empty, {identifier, _, ID}}, AttrMap) -> + case maps:get(ID, AttrMap, notfound) of + <<>> -> + true; + _ -> + false + end. + +generate_filter_expression(FilterString, Substitutions) -> + String = unicode:characters_to_list(FilterString), + {ok, Tokens, _EndLine} = leveled_filterlexer:string(String), + case substitute_items(Tokens, Substitutions, []) of + {error, Error} -> + {error, Error}; + UpdTokens -> + leveled_filterparser:parse(UpdTokens) + end. + +substitute_items([], _Subs, UpdTokens) -> + lists:reverse(UpdTokens); +substitute_items([{substitution, LN, ID}|Rest], Subs, UpdTokens) -> + case maps:get(ID, Subs, notfound) of + notfound -> + {error, + lists:flatten( + io_lib:format("Substitution ~p not found", [ID]))}; + Value when is_binary(Value) -> + substitute_items( + Rest, Subs, [{string, LN, Value}|UpdTokens]); + Value when is_integer(Value) -> + substitute_items(Rest, Subs, [{integer, LN, Value}|UpdTokens]); + _UnexpectedValue -> + {error, + lists:flatten( + io_lib:format("Substitution ~p unexpected type", [ID]))} + end; +substitute_items([Token|Rest], Subs, UpdTokens) -> + substitute_items(Rest, Subs, [Token|UpdTokens]). + +compare('>', V, CmpA) -> V > CmpA; +compare('>=', V, CmpA) -> V >= CmpA; +compare('<', V, CmpA) -> V < CmpA; +compare('<=', V, CmpA) -> V =< CmpA; +compare('=', V, CmpA) -> V == CmpA; +compare('<>', V, CmpA) -> V =/= CmpA. + + +%%%============================================================================ +%%% Test +%%%============================================================================ + +-ifdef(TEST). + +-include_lib("eunit/include/eunit.hrl"). + +parse_error_test() -> + FE1 = "($a BETWEN \"A\" AND \"A12\") OR (($b >= \"30\") AND contains($c, :d))", + FE2 = "($a BETWEEN \"A\" AND \"A12\") ANDOR (($b >= \"30\") AND contains($c, :d))", + FE3 = "($a BETWEEN \"A\" AND \"A12\") OR (($b >= \"30\") AND contains($c, :d)))", + FE4 = "($a BETWEEN \"A\" AND \"A12\") OR (($b >= \"30\") AND contains($c, :d))", + SubsMissing = maps:from_list([{<<"a">>, <<"MA">>}]), + SubsWrongType = maps:from_list([{<<"d">>, "42"}]), + SubsCorrect = maps:from_list([{<<"d">>, <<"MA">>}]), + ?assertMatch( + {error, _E1}, + generate_filter_function(FE1, SubsCorrect) + ), + ?assertMatch( + {error, _E2}, + generate_filter_function(FE2, SubsCorrect) + ), + ?assertMatch( + {error, _E3}, + generate_filter_function(FE3, SubsCorrect) + ), + ?assertMatch( + {error, _E4A}, + generate_filter_function(FE4, SubsMissing) + ), + ?assertMatch( + {error, _E4B}, + generate_filter_function(FE4, SubsWrongType) + ). + +invalid_filterexpression_test() -> + FE1 = "($a BETWEEN \"A\" AND \"A12\") OR (($b >= \"30\") AND contains($c, :d))", + SubsMissing = maps:from_list([{<<"a">>, <<"MA">>}]), + ?assertMatch( + {error, "Substitution <<\"d\">> not found"}, + generate_filter_expression(FE1, SubsMissing) + ), + SubsWrongType = maps:from_list([{<<"d">>, "42"}]), + ?assertMatch( + {error, "Substitution <<\"d\">> unexpected type"}, + generate_filter_expression(FE1, SubsWrongType) + ), + SubsPresent = maps:from_list([{<<"d">>, <<"MA">>}]), + FE2 = "($a IN (\"A\", 12)) OR (($b >= \"30\") AND contains($c, :d))", + ?assertMatch( + {error, {1, leveled_filterparser,["syntax error before: ","12"]}}, + generate_filter_expression(FE2, SubsPresent) + ), + SubsWrongTypeForContains = maps:from_list([{<<"d">>, 42}]), + FE4 = "($a BETWEEN 12 AND 12) OR (($b >= \"30\") AND contains($c, :d))", + ?assertMatch( + {error, {1, leveled_filterparser, ["syntax error before: ","42"]}}, + generate_filter_expression(FE4, SubsWrongTypeForContains) + ). + +filterexpression_test() -> + FE1 = "($a BETWEEN \"A\" AND \"A12\") AND (($b >= 30) AND contains($c, :d))", + SubsPresent = maps:from_list([{<<"d">>, <<"MA">>}]), + {ok, Filter1} = generate_filter_expression(FE1, SubsPresent), + M1 = #{<<"a">> => <<"A11">>, <<"b">> => 100, <<"c">> => <<"CARTMAN">>}, + ?assert(apply_filter(Filter1, M1)), + % ok + + M2 = #{<<"a">> => <<"A11">>, <<"b">> => 10, <<"c">> => <<"CARTMAN">>}, + ?assertNot(apply_filter(Filter1, M2)), + % $b < 30 + + FE2 = "($a BETWEEN \"A\" AND \"A12\") AND (($b >= 30) OR contains($c, :d))", + {ok, Filter2} = generate_filter_expression(FE2, SubsPresent), + ?assert(apply_filter(Filter2, M2)), + % OR used so ($b >= 30) = false is ok + + FE3 = "($a BETWEEN \"A12\" AND \"A\") AND (($b >= 30) OR contains($c, :d))", + {ok, Filter3} = generate_filter_expression(FE3, SubsPresent), + ?assertNot(apply_filter(Filter3, M2)), + % swapping the low/high - not ok - between explicitly requires low/high + + M3 = #{<<"a">> => <<"A11">>, <<"b">> => <<"100">>, <<"c">> => <<"CARTMAN">>}, + ?assertNot(apply_filter(Filter1, M3)), + % substitution b is not an integer + M3A = #{<<"a">> => 11, <<"b">> => 100, <<"c">> => <<"CARTMAN">>}, + ?assertNot(apply_filter(Filter1, M3A)), + % substitution a is an integer + + FE4 = + "($dob BETWEEN \"19700101\" AND \"19791231\") " + "AND (contains($gns, \"#Willow\") AND contains($pcs, \"#LS\"))", + {ok, Filter4} = generate_filter_expression(FE4, maps:new()), + M4 = + #{ + <<"dob">> => <<"19751124">>, + <<"gns">> => <<"#Mia#Willow#Chloe">>, + <<"pcs">> => <<"#BD1 1DU#LS1 4BT">> + }, + ?assert(apply_filter(Filter4, M4)), + + FE5 = + "($dob >= \"19740301\" AND $dob <= \"19761030\")" + " OR ($dod > \"20200101\" AND $dod < \"20230101\")", + + {ok, Filter5} = generate_filter_expression(FE5, maps:new()), + F = fun(M) -> apply_filter(Filter5, M) end, + + M5 = maps:from_list([{<<"dob">>, <<"19750202">>}, {<<"dod">>, <<"20221216">>}]), + M6 = maps:from_list([{<<"dob">>, <<"19750202">>}, {<<"dod">>, <<"20191216">>}]), + M7 = maps:from_list([{<<"dob">>, <<"19790202">>}, {<<"dod">>, <<"20221216">>}]), + M8 = maps:from_list([{<<"dob">>, <<"19790202">>}, {<<"dod">>, <<"20191216">>}]), + M9 = maps:from_list([{<<"dob">>, <<"19790202">>}, {<<"dod">>, <<"20241216">>}]), + M10 = maps:new(), + ?assertMatch(true, F(M5)), + ?assertMatch(true, F(M6)), + ?assertMatch(true, F(M7)), + ?assertMatch(false, F(M8)), + ?assertMatch(false, F(M9)), + ?assertMatch(false, F(M10)), + + FE5A = + "($dob >= \"19740301\" AND $dob <= \"19761030\")" + " AND ($dod = \"20221216\")", + {ok, Filter5A} = generate_filter_expression(FE5A, maps:new()), + ?assert(apply_filter(Filter5A, M5)), + ?assertNot(apply_filter(Filter5A, M6)), + FE5B = + "$dob >= \"19740301\" AND $dob <= \"19761030\"" + " AND $dod = \"20221216\"", + {ok, Filter5B} = generate_filter_expression(FE5B, maps:new()), + ?assert(apply_filter(Filter5B, M5)), + ?assertNot(apply_filter(Filter5B, M6)), + + FE6 = + "(contains($gn, \"MA\") OR $fn BETWEEN \"SM\" AND \"SN\")" + " OR $dob <> \"19993112\"", + {ok, Filter6} = generate_filter_expression(FE6, maps:new()), + M11 = maps:from_list([{<<"dob">>, <<"19993112">>}]), + ?assertMatch(false, apply_filter(Filter6, M11)), + + FE7 = + "(contains($gn, \"MA\") OR $fn BETWEEN \"SM\" AND \"SN\")" + " OR $dob = \"19993112\"", + {ok, Filter7} = generate_filter_expression(FE7, maps:new()), + ?assert(apply_filter(Filter7, M11)), + + FE8 = "(contains($gn, \"MA\") OR $fn BETWEEN \"SM\" AND \"SN\")" + " OR $dob IN (\"19910301\", \"19910103\")", + {ok, Filter8} = generate_filter_expression(FE8, maps:new()), + ?assert(apply_filter(Filter8, #{<<"dob">> => <<"19910301">>})), + ?assert(apply_filter(Filter8, #{<<"dob">> => <<"19910103">>})), + ?assertNot(apply_filter(Filter8, #{<<"dob">> => <<"19910102">>})), + ?assertNot(apply_filter(Filter8, #{<<"gn">> => <<"Nikki">>})), + + FE9 = "(contains($gn, \"MA\") OR $fn BETWEEN \"SM\" AND \"SN\")" + " OR $dob IN (\"19910301\", \"19910103\")", + % Only match with a type match + {ok, Filter9} = generate_filter_expression(FE9, maps:new()), + ?assert(apply_filter(Filter9, #{<<"dob">> => <<"19910301">>})), + ?assert(apply_filter(Filter9, #{<<"dob">> => <<"19910103">>})), + ?assertNot(apply_filter(Filter9, #{<<"dob">> => <<"19910401">>})), + ?assertNot(apply_filter(Filter9, #{<<"dob">> => <<"19910104">>})), + + FE10 = "NOT contains($gn, \"MA\") AND " + "(NOT $dob IN (\"19910301\", \"19910103\"))", + {ok, Filter10} = generate_filter_expression(FE10, maps:new()), + ?assert( + apply_filter( + Filter10, + #{<<"gn">> => <<"JAMES">>, <<"dob">> => <<"19910201">>})), + ?assertNot( + apply_filter( + Filter10, + #{<<"gn">> => <<"EMMA">>, <<"dob">> => <<"19910201">>})), + ?assertNot( + apply_filter( + Filter10, + #{<<"gn">> => <<"JAMES">>, <<"dob">> => <<"19910301">>})), + + FE11 = "NOT contains($gn, \"MA\") AND " + "NOT $dob IN (\"19910301\", \"19910103\")", + {ok, Filter11} = generate_filter_expression(FE11, maps:new()), + ?assert( + apply_filter( + Filter11, + #{<<"gn">> => <<"JAMES">>, <<"dob">> => <<"19910201">>})), + ?assertNot( + apply_filter( + Filter11, + #{<<"gn">> => <<"EMMA">>, <<"dob">> => <<"19910201">>})), + ?assertNot( + apply_filter( + Filter11, + #{<<"gn">> => <<"JAMES">>, <<"dob">> => <<"19910301">>})), + + FE12 = "begins_with($gn, \"MA\") AND begins_with($fn, :fn)", + {ok, Filter12} = generate_filter_expression(FE12, #{<<"fn">> => <<"SU">>}), + ?assert( + apply_filter( + Filter12, + #{<<"gn">> => <<"MATTY">>, <<"fn">> => <<"SUMMER">>})), + ?assertNot( + apply_filter( + Filter12, + #{<<"gn">> => <<"MITTY">>, <<"fn">> => <<"SUMMER">>})), + ?assertNot( + apply_filter( + Filter12, + #{<<"gn">> => <<"MATTY">>, <<"fn">> => <<"SIMMS">>})), + ?assertNot( + apply_filter( + Filter12, + #{<<"gn">> => 42, <<"fn">> => <<"SUMMER">>})), + + FE12E = "ends_with($gn, \"TY\") AND begins_with($fn, :fn)", + {ok, Filter12E} = generate_filter_expression(FE12E, #{<<"fn">> => <<"SU">>}), + ?assert( + apply_filter( + Filter12E, + #{<<"gn">> => <<"MATTY">>, <<"fn">> => <<"SUMMER">>})), + ?assertNot( + apply_filter( + Filter12E, + #{<<"gn">> => <<"MATTI">>, <<"fn">> => <<"SUMMER">>})), + ?assertNot( + apply_filter( + Filter12E, + #{<<"gn">> => <<"MATTY">>, <<"fn">> => <<"SIMMS">>})), + ?assertNot( + apply_filter( + Filter12E, + #{<<"gn">> => 42, <<"fn">> => <<"SUMMER">>})), + + FE13 = "attribute_exists($dob) AND attribute_not_exists($consent) " + "AND attribute_empty($dod)", + {ok, Filter13} = generate_filter_expression(FE13, maps:new()), + ?assert( + apply_filter( + Filter13, + #{<<"dob">> => <<"19440812">>, <<"dod">> => <<>>})), + ?assertNot( + apply_filter( + Filter13, + #{<<"dod">> => <<>>})), + ?assertNot( + apply_filter( + Filter13, + #{<<"dob">> => <<"19440812">>, + <<"consent">> => <<>>, + <<"dod">> => <<>>})), + ?assertNot( + apply_filter( + Filter13, + #{<<"dob">> => <<"19440812">>, <<"dod">> => <<"20240213">>})), + + FE14 = "\"M1\" IN $gns", + {ok, Filter14} = generate_filter_expression(FE14, maps:new()), + ?assert( + apply_filter( + Filter14, + #{<<"gns">> => [<<"MA">>, <<"M1">>, <<"A0">>]})), + ?assertNot( + apply_filter( + Filter14, + #{<<"gns">> => [<<"MA">>, <<"M2">>, <<"A0">>]})), + ?assertNot( + apply_filter( + Filter14, + #{<<"gns">> => <<"M1">>})), + + FE15 = + "(attribute_empty($dod) AND $dob < :date)" + "OR :date BETWEEN $dob AND $dod", + {ok, Filter15} = + generate_filter_expression(FE15, #{<<"date">> => <<"20200101">>}), + ?assert( + apply_filter( + Filter15, + #{<<"dob">> => <<"199900303">>, <<"dod">> => <<>>} + ) + ), + ?assert( + apply_filter( + Filter15, + #{<<"dob">> => <<"199900303">>, <<"dod">> => <<"20210105">>} + ) + ), + ?assertNot( + apply_filter( + Filter15, + #{<<"dob">> => <<"20210303">>, <<"dod">> => <<"20230105">>} + ) + ), + ?assertNot( + apply_filter( + Filter15, + #{<<"dob">> => <<"196900303">>, <<"dod">> => <<"19890105">>} + ) + ), + ?assertNot( + apply_filter( + Filter15, + #{<<"dob">> => 199900303, <<"dod">> => <<>>} + ) + ), + + FE15A = + "(attribute_empty($dod) AND :date > $dob)" + "OR :date BETWEEN $dob AND $dod", + {ok, Filter15A} = + generate_filter_expression(FE15A, #{<<"date">> => <<"20200101">>}), + ?assert( + apply_filter( + Filter15A, + #{<<"dob">> => <<"199900303">>, <<"dod">> => <<>>} + ) + ), + ?assertNot( + apply_filter( + Filter15A, + #{<<"dob">> => <<"202300303">>, <<"dod">> => <<>>} + ) + ), + ?assertNot( + apply_filter( + Filter15A, + #{<<"dob">> => <<"202300303">>} + ) + ), + ?assert( + apply_filter( + Filter15A, + #{<<"dob">> => <<"199900303">>, <<"dod">> => <<"20210105">>} + ) + ), + + FE16 = ":response_time BETWEEN $low_point AND $high_point", + {ok, Filter16} = + generate_filter_expression( + FE16, + #{<<"response_time">> => 346} + ), + ?assert( + apply_filter( + Filter16, + #{<<"low_point">> => 200, <<"high_point">> => 420} + ) + ), + ?assertNot( + apply_filter( + Filter16, + #{<<"low_point">> => 360, <<"high_point">> => 420} + ) + ), + ?assertNot( + apply_filter( + Filter16, + #{<<"low_point">> => 210, <<"high_point">> => 320} + ) + ), + ?assertNot( + apply_filter( + Filter16, + #{<<"low_point">> => <<"200">>, <<"high_point">> => 420} + ) + ), + ?assertNot( + apply_filter( + Filter16, + #{<<"low_point">> => 200, <<"high_point">> => <<"420">>} + ) + ), + ?assertNot( + apply_filter( + Filter16, + #{<<"high_point">> => 420} + ) + ), + ?assertNot( + apply_filter( + Filter16, + #{<<"low_point">> => 200} + ) + ), + + FE17 = ":response_time > $high_point", + {ok, Filter17} = + generate_filter_expression( + FE17, + #{<<"response_time">> => 350} + ), + ?assert( + apply_filter( + Filter17, + #{<<"high_point">> => 310} + ) + ), + ?assertNot( + apply_filter( + Filter17, + #{<<"high_point">> => <<"310">>} + ) + ), + ?assertNot( + apply_filter( + Filter17, + #{} + ) + ), + + FE18 = "$dod BETWEEN $dob AND :today", + {ok, Filter18} = + generate_filter_expression(FE18, #{<<"today">> => <<"20240520">>}), + ?assert( + apply_filter( + Filter18, + #{<<"dob">> => <<"19900505">>, <<"dod">> => <<"20231015">>} + ) + ), + ?assertNot( + apply_filter( + Filter18, + #{<<"dob">> => <<"19900505">>, <<"dod">> => <<"20261015">>} + ) + ), + ?assertNot( + apply_filter( + Filter18, + #{<<"dob">> => <<"19900505">>} + ) + ), + + FE19 = "begins_with($fn, :prefix)", + {ok, Filter19} = + generate_filter_expression(FE19, #{<<"prefix">> => <<"Åb"/utf8>>}), + ?assert( + apply_filter( + Filter19, + #{<<"fn">> => <<"Åberg"/utf8>>} + ) + ), + ?assertNot( + apply_filter( + Filter19, + #{<<"fn">> => <<"Aberg">>} + ) + ), + ?assertNot( + apply_filter( + Filter19, + #{<<"fn">> => <<"Aberg"/utf8>>} + ) + ) + + . + +-endif. diff --git a/src/leveled_filterlexer.xrl b/src/leveled_filterlexer.xrl new file mode 100644 index 00000000..d85b51f9 --- /dev/null +++ b/src/leveled_filterlexer.xrl @@ -0,0 +1,51 @@ +%% Lexer for filter and conditional expressions +%% Author: Thomas Arts + +Definitions. +WhiteSpace = ([\t\f\v\r\n\s]+) + +Rules. + +{WhiteSpace} : skip_token. + +\( : {token, {'(', TokenLine}}. +\) : {token, {')', TokenLine}}. + +, : {token, {',', TokenLine}}. +NOT : {token, {'NOT', TokenLine}}. +AND : {token, {'AND', TokenLine}}. +OR : {token, {'OR', TokenLine}}. +BETWEEN : {token, {'BETWEEN', TokenLine}}. +IN : {token, {'IN', TokenLine}}. += : {token, {comparator, '=', TokenLine}}. +< : {token, {comparator, '<', TokenLine}}. +> : {token, {comparator, '>', TokenLine}}. +<> : {token, {comparator, '<>', TokenLine}}. +<= : {token, {comparator, '<=', TokenLine}}. +>= : {token, {comparator, '>=', TokenLine}}. + +contains : {token, {contains, TokenLine}}. +begins_with : {token, {begins_with, TokenLine}}. +ends_with : {token, {ends_with, TokenLine}}. +attribute_exists : {token, {attribute_exists, TokenLine}}. +attribute_not_exists : {token, {attribute_not_exists, TokenLine}}. +attribute_empty : {token, {attribute_empty, TokenLine}}. + +\$[a-zA-Z_][a-zA-Z_0-9]* : {token, {identifier, TokenLine, strip_identifier(TokenChars)}}. +\:[a-zA-Z_][a-zA-Z_0-9]* : {token, {substitution, TokenLine, strip_substitution(TokenChars)}}. +\-[0-9]+ : {token, {integer, TokenLine, list_to_integer(TokenChars)}}. +[0-9]+ : {token, {integer, TokenLine, list_to_integer(TokenChars)}}. +\"[^"]+\" : {token, {string, TokenLine, strip_string(TokenChars)}}. %" + +Erlang code. + +strip_string(TokenChars) -> + unicode:characters_to_binary(lists:droplast(tl(TokenChars))). + +strip_identifier(TokenChars) -> + [36|StrippedChars] = TokenChars, + unicode:characters_to_binary(StrippedChars). + +strip_substitution(TokenChars) -> + [58|StrippedChars] = TokenChars, + unicode:characters_to_binary(StrippedChars). diff --git a/src/leveled_filterparser.yrl b/src/leveled_filterparser.yrl new file mode 100644 index 00000000..4a94a798 --- /dev/null +++ b/src/leveled_filterparser.yrl @@ -0,0 +1,53 @@ +%% Grammar for filter expressions +%% Author: Thomas Arts + +Nonterminals +top_level condition operand str_list strings. + + +Terminals +'(' ')' comparator identifier string integer +',' +'NOT' 'AND' 'OR' 'IN' 'BETWEEN' +contains begins_with ends_with +attribute_exists attribute_not_exists attribute_empty. + + +Rootsymbol top_level. + +top_level -> condition: {condition, '$1'}. + +condition -> operand comparator operand : {'$2', '$1', '$3'}. +condition -> operand 'BETWEEN' operand 'AND' operand : {'BETWEEN', '$1', '$3', '$5'}. +condition -> identifier 'IN' str_list : {'IN', '$1', '$3'}. +condition -> string 'IN' identifier : {'IN', '$1', '$3'}. + +condition -> contains '(' identifier ',' string ')' : {contains, '$3', '$5'}. +condition -> begins_with '(' identifier ',' string ')' : {begins_with, '$3', '$5'}. +condition -> ends_with '(' identifier ',' string ')' : {ends_with, '$3', '$5'}. +condition -> attribute_exists '(' identifier ')' : {attribute_exists, '$3'}. +condition -> attribute_not_exists '(' identifier ')' : {attribute_not_exists, '$3'}. +condition -> attribute_empty '(' identifier ')' : {attribute_empty, '$3'}. + +condition -> condition 'AND' condition : {'AND', '$1', '$3'}. +condition -> condition 'OR' condition : {'OR', '$1', '$3'}. +condition -> 'NOT' condition : {'NOT', '$2'}. +condition -> '(' condition ')' : '$2'. + +operand -> identifier : '$1'. +operand -> integer : '$1'. +operand -> string : '$1'. + +str_list -> '(' strings ')' : '$2'. + +strings -> string ',' strings : ['$1' | '$3']. +strings -> string : ['$1']. + +Endsymbol '$end'. + +Right 200 'NOT'. +Nonassoc 200 comparator. +Left 150 'AND'. +Left 100 'OR'. + +Erlang code. diff --git a/src/leveled_runner.erl b/src/leveled_runner.erl index 43d96e1f..de6ad80e 100644 --- a/src/leveled_runner.erl +++ b/src/leveled_runner.erl @@ -61,7 +61,7 @@ -type objectacc_fun() :: fun((leveled_codec:object_key(), any(), foldacc()) -> foldacc()). -type mp() - :: {re_pattern, term(), term(), term(), term()}. + :: any(). -export_type([fold_keys_fun/0, mp/0]). @@ -128,12 +128,11 @@ bucket_list(SnapFun, Tag, FoldBucketsFun, InitAcc, MaxBuckets) -> end, {async, Runner}. --spec index_query(snap_fun(), - {leveled_codec:ledger_key(), - leveled_codec:ledger_key(), - {boolean(), undefined|mp()}}, - {fold_keys_fun(), foldacc()}) - -> {async, runner_fun()}. +-spec index_query( + snap_fun(), + {leveled_codec:ledger_key(), leveled_codec:ledger_key(), + {boolean()|binary(), leveled_codec:term_expression()}}, + {fold_keys_fun(), foldacc()}) -> {async, runner_fun()}. %% @doc %% Secondary index query %% This has the special capability that it will expect a message to be thrown @@ -166,7 +165,7 @@ index_query(SnapFun, {StartKey, EndKey, TermHandling}, FoldAccT) -> leveled_codec:key()|null, {leveled_codec:single_key()|null, leveled_codec:single_key()|null}, {fold_keys_fun(), foldacc()}, - leveled_codec:regular_expression()) + leveled_codec:term_expression()) -> {async, runner_fun()}. %% @doc %% Fold over all keys in `KeyRange' under tag (restricted to a given bucket) @@ -514,7 +513,7 @@ foldobjects(SnapFun, Tag, KeyRanges, FoldObjFun, DeferredFetch, SegmentList) -> %% the full object), or {true, CheckPresence} - in which case a proxy object %% will be created that if understood by the fold function will allow the fold %% function to work on the head of the object, and defer fetching the body in -%% case such a fetch is unecessary. +%% case such a fetch is unnecessary. foldobjects(SnapFun, Tag, KeyRanges, FoldObjFun, DeferredFetch, SegmentList, LastModRange, MaxObjectCount) -> {FoldFun, InitAcc} = @@ -693,7 +692,7 @@ accumulate_keys(FoldKeysFun, undefined) -> accumulate_keys(FoldKeysFun, TermRegex) -> fun(Key, _Value, Acc) -> {B, K} = leveled_codec:from_ledgerkey(Key), - case re:run(K, TermRegex) of + case leveled_util:regex_run(K, TermRegex, []) of nomatch -> Acc; _ -> diff --git a/src/leveled_setop.erl b/src/leveled_setop.erl new file mode 100644 index 00000000..b4d70212 --- /dev/null +++ b/src/leveled_setop.erl @@ -0,0 +1,170 @@ +%% -------- Set Operations --------- +%% +%% Support for set operations (i.e on sets of keys) within leveled +%% + +-module(leveled_setop). + +-export([generate_setop_function/1]). + + +%%%============================================================================ +%%% External API +%%%============================================================================ + +-spec generate_setop_function( + string()) -> + fun((#{non_neg_integer() => sets:set(binary())}) + -> sets:set(binary()) + )| + {error, term()}. +generate_setop_function(EvalString) -> + try + {ok, ParsedEval} = generate_setop_expression(EvalString), + fun(MapOfSets) -> + apply_setop(ParsedEval, MapOfSets) + end + catch + error:{badmatch, {error, Error, _LN}} -> + {error, Error}; + error:{badmatch, {error, Error}} -> + {error, Error} + end. + +%%%============================================================================ +%%% Internal functions +%%%============================================================================ + +generate_setop_expression(EvalString) -> + String = unicode:characters_to_list(EvalString), + {ok, Tokens, _EndLine} = leveled_setoplexer:string(String), + leveled_setopparser:parse(Tokens). + +apply_setop({setop, SetOp}, SetList) -> + apply_setop(SetOp, SetList); +apply_setop({set_id, _, SetID}, SetList) -> + get_set(SetID, SetList); +apply_setop( + {SetFunctionName, {set_id, _, SetIDa}, {set_id, _, SetIDb}}, + SetList) -> + SetFunction = set_function(SetFunctionName), + SetFunction(get_set(SetIDa, SetList), get_set(SetIDb, SetList)); +apply_setop( + {SetFunctionName, {set_id, _, SetIDa}, Condition}, + SetList) -> + SetFunction = set_function(SetFunctionName), + SetFunction(get_set(SetIDa, SetList), apply_setop(Condition, SetList)); +apply_setop( + {SetFunctionName, Condition, {set_id, _, SetIDb}}, + SetList) -> + SetFunction = set_function(SetFunctionName), + SetFunction(apply_setop(Condition, SetList), get_set(SetIDb, SetList)); +apply_setop({SetFunctionName, ConditionA, ConditionB}, SetList) -> + SetFunction = set_function(SetFunctionName), + SetFunction( + apply_setop(ConditionA, SetList), apply_setop(ConditionB, SetList) + ). + +set_function('UNION') -> + fun(A, B) -> sets:union(A, B) end; +set_function('INTERSECT') -> + fun(A, B) -> sets:intersection(A, B) end; +set_function('SUBTRACT') -> + fun(A, B) -> sets:subtract(A, B) end. + +%% Return empty set if index not present in given set +%% (That is, do not throw an error) +get_set(SetID, SetMap) -> + maps:get(SetID, SetMap, sets:new()). + + +%%%============================================================================ +%%% Test +%%%============================================================================ + +-ifdef(TEST). + +-include_lib("eunit/include/eunit.hrl"). + +generate_setop_function_noerror(S) -> + case generate_setop_function(S) of + F when is_function(F, 1) -> + F + end. + +parse_error_test() -> + Q1 = "($1 INTERSECT $2) XOR $3", + Q2 = "($1 INTERSECT $2) UNION ($3 INTERSEC$4)", + Q3 = "($S1 INTERSECT $2 INTERSECT $5) UNION ($3 INTERSECT $4)", + Q4 = "($1 INTERSECT $2 INTERSECT $5) UNION ($3 SUBTRACT $4) UNION ()", + ?assertMatch({error, _E1}, generate_setop_function(Q1)), + ?assertMatch({error, _E2}, generate_setop_function(Q2)), + ?assertMatch({error, _E3}, generate_setop_function(Q3)), + ?assertMatch({error, _E4}, generate_setop_function(Q4)). + + +parser_formal_test() -> + Q1 = "($1 INTERSECT $2) UNION $3", + Q2 = "($1 INTERSECT $2) UNION ($3 INTERSECT $4)", + Q3 = "($1 INTERSECT $2 INTERSECT $5) UNION ($3 INTERSECT $4)", + Q4 = "($1 INTERSECT $2 INTERSECT $5) UNION ($3 SUBTRACT $4)", + parser_tester(Q1, Q2, Q3, Q4). + +parser_tester(Q1, Q2, Q3, Q4) -> + S1 = sets:from_list([<<"K1">>, <<"K2">>, <<"K3">>, <<"K4">>, <<"K5">>]), + S2 = sets:from_list([<<"K3">>, <<"K4">>, <<"K5">>, <<"K6">>, <<"K7">>]), + S3 = sets:from_list([<<"K7">>, <<"K8">>, <<"K9">>]), + S4 = sets:from_list([<<"K7">>, <<"K9">>, <<"K0">>]), + S5 = sets:from_list([<<"K1">>, <<"K2">>, <<"K3">>, <<"K8">>, <<"K9">>]), + + F1 = generate_setop_function_noerror(Q1), + F2 = generate_setop_function_noerror(Q2), + F3 = generate_setop_function_noerror(Q3), + F4 = generate_setop_function_noerror(Q4), + + R1 = + lists:sort( + sets:to_list(F1(#{1 => S1, 2 => S2, 3 => S3}) + ) + ), + R2 = + lists:sort( + sets:to_list(F2(#{1 => S1, 2 => S2, 3 => S3, 4 => S4}) + ) + ), + R3 = + lists:sort( + sets:to_list(F3(#{1 => S1, 2 => S2, 3 => S3, 4 => S4, 5 => S5}) + ) + ), + R4 = + lists:sort( + sets:to_list(F4(#{1 => S1, 2 => S2, 3 => S3, 4 => S4, 5 => S5}) + ) + ), + + ?assertMatch( + [<<"K3">>, <<"K4">>, <<"K5">>, <<"K7">>, <<"K8">>, <<"K9">>], R1), + ?assertMatch( + [<<"K3">>, <<"K4">>, <<"K5">>, <<"K7">>, <<"K9">>], R2), + ?assertMatch( + [<<"K3">>, <<"K7">>, <<"K9">>], R3), + ?assertMatch( + [<<"K3">>, <<"K8">>], R4). + +minimal_test() -> + S1 = sets:from_list([<<"K1">>, <<"K2">>, <<"K3">>, <<"K4">>, <<"K5">>]), + F1 = generate_setop_function_noerror("$1"), + R1 = lists:sort(sets:to_list(F1(#{1 => S1}))), + ?assertMatch([<<"K1">>, <<"K2">>, <<"K3">>, <<"K4">>, <<"K5">>], R1), + S2 = sets:from_list([<<"K3">>, <<"K4">>, <<"K5">>, <<"K6">>, <<"K7">>]), + S3 = sets:from_list([<<"K1">>, <<"K2">>]), + F2 = generate_setop_function_noerror("$1 INTERSECT ($2 UNION $3)"), + R2 = lists:sort(sets:to_list(F2(#{1 => S1, 2 => S2, 3 => S3}))), + ?assertMatch([<<"K1">>, <<"K2">>, <<"K3">>, <<"K4">>, <<"K5">>], R2), + F3 = generate_setop_function_noerror("$1 INTERSECT ($2 UNION $2)"), + R3 = lists:sort(sets:to_list(F3(#{1 => S1, 2 => S2}))), + ?assertMatch([<<"K3">>, <<"K4">>, <<"K5">>], R3). + + +-endif. \ No newline at end of file diff --git a/src/leveled_setoplexer.xrl b/src/leveled_setoplexer.xrl new file mode 100644 index 00000000..1a94cdf5 --- /dev/null +++ b/src/leveled_setoplexer.xrl @@ -0,0 +1,21 @@ +Definitions. +WhiteSpace = ([\t\f\v\r\n\s]+) + +Rules. + +{WhiteSpace} : skip_token. + +\( : {token, {'(', TokenLine}}. +\) : {token, {')', TokenLine}}. + +UNION : {token, {'UNION', TokenLine}}. +INTERSECT : {token, {'INTERSECT', TokenLine}}. +SUBTRACT : {token, {'SUBTRACT', TokenLine}}. + +\$[1-9][0-9]* : {token, {set_id, TokenLine, strip_identifier(TokenChars)}}. + +Erlang code. + +strip_identifier(TokenChars) -> + [36|StrippedChars] = TokenChars, + list_to_integer(StrippedChars). \ No newline at end of file diff --git a/src/leveled_setopparser.yrl b/src/leveled_setopparser.yrl new file mode 100644 index 00000000..e261efc0 --- /dev/null +++ b/src/leveled_setopparser.yrl @@ -0,0 +1,28 @@ +%% Grammar for key set operations + +Nonterminals +top_level condition. + + +Terminals +'(' ')' set_id +'UNION' 'INTERSECT' 'SUBTRACT'. + + +Rootsymbol top_level. + +top_level -> condition: {setop, '$1'}. + +condition -> condition 'UNION' condition : {'UNION', '$1', '$3'}. +condition -> condition 'INTERSECT' condition : {'INTERSECT', '$1', '$3'}. +condition -> condition 'SUBTRACT' condition : {'SUBTRACT', '$1', '$3'}. +condition -> '(' condition ')' : '$2'. +condition -> set_id : '$1'. + +Endsymbol '$end'. + +Right 200 'SUBTRACT'. +Left 150 'INTERSECT'. +Left 100 'UNION'. + +Erlang code. diff --git a/src/leveled_tree.erl b/src/leveled_tree.erl index 9bb74c26..22aa626f 100644 --- a/src/leveled_tree.erl +++ b/src/leveled_tree.erl @@ -942,15 +942,19 @@ empty_test() -> search_range_idx_test() -> Tree = {idxt,1, - {{[{{o_rkv,"Bucket1","Key1",null}, - {manifest_entry,{o_rkv,"Bucket","Key9083",null}, - {o_rkv,"Bucket1","Key1",null}, - "<0.320.0>","./16_1_6.sst", none}}]}, - {1,{{o_rkv,"Bucket1","Key1",null},1,nil,nil}}}}, + {{[{{o_rkv,<<"Bucket1">>,<<"Key1">>,null}, + leveled_pmanifest:new_entry( + {o_rkv, <<"Bucket">>, <<"Key9083">>, null}, + {o_rkv, <<"Bucket1">>, <<"Key1">>, null}, + list_to_pid("<0.320.0>"), + "./16_1_6.sst", + none + )}]}, + {1, {{o_rkv, <<"Bucket1">>, <<"Key1">>, null}, 1, nil, nil}}}}, R = search_range( - {o_rkv, "Bucket", null, null}, - {o_rkv, "Bucket", null, null}, + {o_rkv, <<"Bucket">>, null, null}, + {o_rkv, <<"Bucket">>, null, null}, Tree, fun leveled_pmanifest:entry_startkey/1 ), diff --git a/src/leveled_util.erl b/src/leveled_util.erl index b6d45676..4b21166e 100644 --- a/src/leveled_util.erl +++ b/src/leveled_util.erl @@ -11,9 +11,11 @@ integer_time/1, magic_hash/1, t2b/1, - safe_rename/4 - ] - ). + safe_rename/4, + regex_run/3, + regex_compile/1, + regex_compile/2 + ]). -define(WRITE_OPS, [binary, raw, read, write]). @@ -42,6 +44,34 @@ integer_time(TS) -> calendar:datetime_to_gregorian_seconds(DT). +-type match_option() :: + caseless | + {offset, non_neg_integer()} | + {capture, value_spec()} | + {capture, value_spec(), value_spec_type()}. +-type value_spec() :: + all | all_but_first | first | none | [value_id()]. +-type value_spec_type() :: binary. +-type value_id() :: string(). +-type match_index() :: {non_neg_integer(), non_neg_integer()}. + +-spec regex_run( + iodata(), leveled_codec:actual_regex(), list(match_option())) -> + match | + nomatch | + {match, list(match_index())} | + {match, list(binary())} | + {error, atom()}. +regex_run(Subject, CompiledPCRE, Opts) -> + re:run(Subject, CompiledPCRE, Opts). + +-spec regex_compile(iodata()) -> {ok, leveled_codec:actual_regex()}. +regex_compile(PlainRegex) -> + regex_compile(PlainRegex, pcre). + +regex_compile(PlainRegex, pcre) -> + re:compile(PlainRegex). + -spec magic_hash(any()) -> 0..16#FFFFFFFF. %% @doc %% Use DJ Bernstein magic hash function. Note, this is more expensive than diff --git a/test/end_to_end/basic_SUITE.erl b/test/end_to_end/basic_SUITE.erl index 13beb8d4..5a4d4a3f 100644 --- a/test/end_to_end/basic_SUITE.erl +++ b/test/end_to_end/basic_SUITE.erl @@ -802,7 +802,12 @@ space_clear_ondelete(_Config) -> {ok, Book1} = leveled_bookie:book_start(StartOpts1), G2 = fun testutil:generate_compressibleobjects/2, testutil:load_objects( - 20000, [uuid, uuid, uuid, uuid], Book1, no_check, G2), + 20000, + [binary_uuid, binary_uuid, binary_uuid, binary_uuid], + Book1, + no_check, + G2 + ), FoldKeysFun = fun(B, K, Acc) -> [{B, K}|Acc] end, diff --git a/test/end_to_end/iterator_SUITE.erl b/test/end_to_end/iterator_SUITE.erl index 07beabd2..19cd34dd 100644 --- a/test/end_to_end/iterator_SUITE.erl +++ b/test/end_to_end/iterator_SUITE.erl @@ -12,7 +12,10 @@ query_count/1, multibucket_fold/1, foldobjects_bybucket_range/1, - rotating_objects/1]). + rotating_objects/1, + capture_and_filter_terms/1, + complex_queries/1 + ]). all() -> [ expiring_indexes, @@ -22,7 +25,9 @@ all() -> [ query_count, multibucket_fold, rotating_objects, - foldobjects_bybucket_range + foldobjects_bybucket_range, + capture_and_filter_terms, + complex_queries ]. init_per_suite(Config) -> @@ -147,13 +152,6 @@ expiring_indexes(_Config) -> Bookie1, B0, K0, 5, <<"value">>, leveled_util:integer_now() + 10), timer:sleep(1000), {async, Folder2} = IndexFold(), - leveled_bookie:book_indexfold( - Bookie1, - B0, - {FoldFun, InitAcc}, - {<<"temp_int">>, 5, 8}, - {true, undefined} - ), QR2 = Folder2(), io:format("Query with additional entry length ~w~n", [length(QR2)]), true = lists:sort(QR2) == lists:sort([{5, B0, K0}|LoadedEntriesInRange]), @@ -481,7 +479,7 @@ small_load_with2i(_Config) -> IndexGen = testutil:get_randomindexes_generator(8), ObjL1 = testutil:generate_objects( - 10000, uuid, [], ObjectGen, IndexGen), + 10000, binary_uuid, [], ObjectGen, IndexGen), testutil:riakload(Bookie1, ObjL1), ChkList1 = lists:sublist(lists:sort(ObjL1), 100), testutil:check_forlist(Bookie1, ChkList1), @@ -569,8 +567,9 @@ small_load_with2i(_Config) -> lists:foldl(SumFromObjLFun, 0, ObjL1), ChkList1Total = lists:foldl(SumFromObjLFun, 0, ChkList1), - io:format("Total in original object list ~w and from removed list ~w~n", - [ObjL1Total, ChkList1Total]), + io:format( + "Total in original object list ~w and from removed list ~w~n", + [ObjL1Total, ChkList1Total]), Total1 = ObjL1Total - ChkList1Total, @@ -608,7 +607,7 @@ query_count(_Config) -> testutil:check_forobject(Book1, TestObject), lists:foreach( fun(_X) -> - V = testutil:get_compressiblevalue(), + V = <<"TestValue">>, Indexes = testutil:get_randomindexes_generator(8), SW = os:timestamp(), ObjL1 = @@ -727,12 +726,13 @@ query_count(_Config) -> Mia2000Count2 = lists:foldl( fun({Term, _Key}, Acc) -> - case re:run(Term, RegMia) of + case leveled_util:regex_run(Term, RegMia, []) of nomatch -> Acc; _ -> Acc + 1 - end end, + end + end, 0, Mia2KFolder2()), ok = case Mia2000Count2 of @@ -741,15 +741,24 @@ query_count(_Config) -> [Mia2000Count1]), ok end, - {ok, RxMia2K} = re:compile("^2000[0-9]+Mia"), + {ok, RxMia2K} = leveled_util:regex_compile("^2000[0-9]+Mia"), Query3 = {index_query, BucketBin, {fun testutil:foldkeysfun/3, []}, {<<"idx2_bin">>, <<"1980">>, <<"2100">>}, {false, RxMia2K}}, - {async, - Mia2KFolder3} = leveled_bookie:book_returnfolder(Book2, Query3), + {async, Mia2KFolder3} = leveled_bookie:book_returnfolder(Book2, Query3), Mia2000Count1 = length(Mia2KFolder3()), + {ok, RxMia2KPCRE} = re:compile("^2000[0-9]+Mia"), + Query3PCRE = + {index_query, + BucketBin, + {fun testutil:foldkeysfun/3, []}, + {<<"idx2_bin">>, <<"1980">>, <<"2100">>}, + {false, RxMia2KPCRE}}, + {async, Mia2KFolder3PCRE} = + leveled_bookie:book_returnfolder(Book2, Query3PCRE), + Mia2000Count1 = length(Mia2KFolder3PCRE()), V9 = testutil:get_compressiblevalue(), Indexes9 = testutil:get_randomindexes_generator(8), @@ -881,6 +890,446 @@ query_count(_Config) -> testutil:reset_filestructure(). +capture_and_filter_terms(_Config) -> + RootPath = testutil:reset_filestructure(), + Bucket = {<<"Type1">>, <<"Bucket1">>}, + IdxName = <<"people_bin">>, + {ok, Book1} = + leveled_bookie:book_start( + RootPath, 2000, 50000000, testutil:sync_strategy()), + V1 = <<"V1">>, + IndexGen = + fun() -> + [{add, IdxName, list_to_binary(perf_SUITE:random_people_index())}] + end, + ObjL1 = + testutil:generate_objects( + 100000, binary_uuid, [], V1, IndexGen, Bucket), + testutil:riakload(Book1, ObjL1), + + StartDoB = <<"19740301">>, + EndDoB = <<"19761031">>, + + WillowLeedsFinder = + "[^\\|]*\\|[0-9]{8}\\|[0-9]{0,8}\\|[^\\|]*#Willow[^\\|]*\\|" + "[^\\|]*#LS[^\\|]*", + + SW0 = os:timestamp(), + {ok, WillowLeedsPCRE} = re:compile(WillowLeedsFinder), + + QueryPCRE0 = + {index_query, + {Bucket, null}, + {fun testutil:foldkeysfun/3, []}, + {IdxName, <<"M">>, <<"Z">>}, + {true, WillowLeedsPCRE}}, + {async, Runner0} = leveled_bookie:book_returnfolder(Book1, QueryPCRE0), + Results0 = Runner0(), + BornMid70s0 = + lists:filtermap( + fun({IdxValue, Key}) -> + DoB = + list_to_binary( + lists:nth( + 2, + string:tokens(binary_to_list(IdxValue), "|") + ) + ), + case (DoB >= StartDoB) andalso (DoB =< EndDoB) of + true -> + {true, Key}; + false -> + false + end + end, + Results0 + ), + + SW1 = os:timestamp(), + + WillowLeedsExtractor = + "[^\\|]*\\|(?P[0-9]{8})\\|[0-9]{0,8}\\|[^\\|]*#Willow[^\\|]*\\|" + "[^\\|]*#LS[^\\|]*", + FilterFun1 = + fun(Captures) -> + DoB = maps:get(<<"dob">>, Captures, notfound), + (DoB >= StartDoB) andalso (DoB =< EndDoB) + end, + EvalFunPCRE = + leveled_eval:generate_eval_function( + "regex($term, :regex, pcre, ($dob))", + #{<<"regex">> => list_to_binary(WillowLeedsExtractor)} + ), + + QueryPCRE1 = + {index_query, + {Bucket, null}, + {fun testutil:foldkeysfun/3, []}, + {IdxName, <<"M">>, <<"Z">>}, + {false, {query, EvalFunPCRE, FilterFun1}} + }, + {async, RunnerPCRE1} = leveled_bookie:book_returnfolder(Book1, QueryPCRE1), + BornMid70sPCRE1 = RunnerPCRE1(), + + SW2 = os:timestamp(), + + EvalFunRE2 = + leveled_eval:generate_eval_function( + "regex($term, :regex, pcre, ($dob))", + #{<<"regex">> => list_to_binary(WillowLeedsExtractor)} + ), + QueryRE2_2 = + {index_query, + {Bucket, null}, + {fun testutil:foldkeysfun/3, []}, + {IdxName, <<"M">>, <<"Z">>}, + {false, {query, EvalFunRE2, FilterFun1}} + }, + {async, RunnerRE2_2} = leveled_bookie:book_returnfolder(Book1, QueryRE2_2), + BornMid70sRE2_2 = RunnerRE2_2(), + + SW3 = os:timestamp(), + + AllFun = fun(_) -> true end, + QueryRE2_3 = + {index_query, + {Bucket, null}, + {fun testutil:foldkeysfun/3, []}, + {IdxName, <<"M">>, <<"Z">>}, + {<<"dob">>, {query, EvalFunRE2, AllFun}} + }, + {async, RunnerRE2_3} = leveled_bookie:book_returnfolder(Book1, QueryRE2_3), + Results3 = RunnerRE2_3(), + BornMid70sRE2_3 = + lists:filtermap( + fun({DoB, Key}) -> + case (DoB >= StartDoB) andalso (DoB =< EndDoB) of + true -> + {true, Key}; + false -> + false + end + end, + Results3 + ), + + SW4 = os:timestamp(), + + WillowLeedsDoubleExtractor = + "[^\\|]*\\|(?P[0-9]{8})\\|(?P[0-9]{0,8})\\|" + "[^\\|]*#Willow[^\\|]*\\|[^\\|]*#LS[^\\|]*", + EvalFunRE2_2 = + leveled_eval:generate_eval_function( + "regex($term, :regex, pcre, ($dob, $dod))", + #{<<"regex">> => list_to_binary(WillowLeedsDoubleExtractor)} + ), + + FilterFun2 = + fun(Captures) -> + DoB = maps:get(<<"dob">>, Captures, notfound), + (DoB >= StartDoB) andalso (DoB =< EndDoB) + end, + QueryRE2_4 = + {index_query, + {Bucket, null}, + {fun testutil:foldkeysfun/3, []}, + {IdxName, <<"M">>, <<"Z">>}, + {false, {query, EvalFunRE2_2, FilterFun2}} + }, + {async, RunnerRE2_4} = leveled_bookie:book_returnfolder(Book1, QueryRE2_4), + BornMid70sRE2_4 = RunnerRE2_4(), + + SW5 = os:timestamp(), + + QueryRE2_5 = + {index_query, + {Bucket, null}, + {fun testutil:foldkeysfun/3, []}, + {IdxName, <<"M">>, <<"Z">>}, + {true, {query, EvalFunRE2, FilterFun1}} + }, + {async, RunnerRE2_5} = leveled_bookie:book_returnfolder(Book1, QueryRE2_5), + {ok, WillowLeedsExtractorRE} = re:compile(WillowLeedsExtractor), + BornMid70sRE2_5 = + lists:filtermap( + fun({T, K}) -> + {match, _} = + leveled_util:regex_run(T, WillowLeedsExtractorRE, []), + {true, K} + end, + RunnerRE2_5()), + + SW8 = os:timestamp(), + + FilterExpression1 = "($dob BETWEEN \"19740301\" AND \"19761030\")", + FilterFun5 = + leveled_filter:generate_filter_function(FilterExpression1, maps:new()), + + QueryRE2_8 = + {index_query, + {Bucket, null}, + {fun testutil:foldkeysfun/3, []}, + {IdxName, <<"M">>, <<"Z">>}, + {false, {query, EvalFunRE2, FilterFun5}} + }, + {async, RunnerRE2_8} = leveled_bookie:book_returnfolder(Book1, QueryRE2_8), + BornMid70sRE2_8 = RunnerRE2_8(), + + SW9 = os:timestamp(), + + PreFilterRE = + "[^\\|]*\\|(?P197[4-6]{1}[0-9]{4})\\|" + "[0-9]{0,8}\\|[^\\|]*#Willow[^\\|]*\\|" + "[^\\|]*#LS[^\\|]*", + PreFilterEvalFun = + leveled_eval:generate_eval_function( + "regex($term, :regex, pcre, ($dob))", + #{<<"regex">> => list_to_binary(PreFilterRE)} + ), + + QueryRE2_9 = + {index_query, + {Bucket, null}, + {fun testutil:foldkeysfun/3, []}, + {IdxName, <<"M">>, <<"Z">>}, + {false, {query, PreFilterEvalFun, FilterFun5}} + }, + {async, RunnerRE2_9} = leveled_bookie:book_returnfolder(Book1, QueryRE2_9), + BornMid70sRE2_9 = RunnerRE2_9(), + + SW10 = os:timestamp(), + + WillowLeedsExtractor = + "[^\\|]*\\|(?P[0-9]{8})\\|[0-9]{0,8}\\|[^\\|]*#Willow[^\\|]*\\|" + "[^\\|]*#LS[^\\|]*", + + FilterExpression2 = + "($dob BETWEEN \"19740301\" AND \"19761030\")" + "AND (contains($gns, \"#Willow\") AND contains($pcs, \"#LS\"))", + FilterFun6 = + leveled_filter:generate_filter_function(FilterExpression2, maps:new()), + EvalFun2 = + leveled_eval:generate_eval_function( + "delim($term, \"|\", ($surname, $dob, $dod, $gns, $pcs))", + maps:new() + ), + QueryRE2_10 = + {index_query, + {Bucket, null}, + {fun testutil:foldkeysfun/3, []}, + {IdxName, <<"M">>, <<"Z">>}, + {false, {query, EvalFun2, FilterFun6}} + }, + {async, RunnerRE2_10} = leveled_bookie:book_returnfolder(Book1, QueryRE2_10), + BornMid70sRE2_10 = RunnerRE2_10(), + + SW11 = os:timestamp(), + + true = length(BornMid70s0) > 0, + + true = lists:sort(BornMid70s0) == lists:sort(BornMid70sPCRE1), + true = lists:sort(BornMid70s0) == lists:sort(BornMid70sRE2_2), + true = lists:sort(BornMid70s0) == lists:sort(BornMid70sRE2_3), + true = lists:sort(BornMid70s0) == lists:sort(BornMid70sRE2_4), + true = lists:sort(BornMid70s0) == lists:sort(BornMid70sRE2_5), + true = lists:sort(BornMid70s0) == lists:sort(BornMid70sRE2_8), + true = lists:sort(BornMid70s0) == lists:sort(BornMid70sRE2_9), + true = lists:sort(BornMid70s0) == lists:sort(BornMid70sRE2_10), + + maybe_log_toscreen( + "~nFilter outside took ~w ms~n", + [timer:now_diff(SW1, SW0) div 1000]), + maybe_log_toscreen( + "~nPCRE Capture filter inside took ~w ms~n", + [timer:now_diff(SW2, SW1) div 1000]), + maybe_log_toscreen( + "~nRE2 Capture filter inside took ~w ms~n", + [timer:now_diff(SW3, SW2) div 1000]), + maybe_log_toscreen( + "~nRE2 Capture filter outside took ~w ms~n", + [timer:now_diff(SW4, SW3) div 1000]), + maybe_log_toscreen( + "~nRE2 double-capture filter outside took ~w ms~n", + [timer:now_diff(SW5, SW4) div 1000]), + maybe_log_toscreen( + "~nRE2 single-capture filter with parsed filter expression took ~w ms~n", + [timer:now_diff(SW9, SW8) div 1000]), + maybe_log_toscreen( + "~nRE2 single-capture pre-filter with parsed query string took ~w ms~n", + [timer:now_diff(SW10, SW9) div 1000]), + maybe_log_toscreen( + "~nEval processed index with parsed filter expression took ~w ms~n", + [timer:now_diff(SW11, SW10) div 1000]), + + + QueryRE2_3_WrongCapture = + {index_query, + {Bucket, null}, + {fun testutil:foldkeysfun/3, []}, + {IdxName, <<"M">>, <<"Z">>}, + {<<"gns">>, {query, EvalFunRE2, FilterFun6}} + }, + {async, RunnerRE2_3_WC} = + leveled_bookie:book_returnfolder(Book1, QueryRE2_3_WrongCapture), + true = [] == RunnerRE2_3_WC(), + + ok = leveled_bookie:book_close(Book1), + + testutil:reset_filestructure(). + +maybe_log_toscreen(Log, Subs) -> + io:format( + % user, + Log, + Subs + ). + +complex_queries(_Config) -> + KeyCount = 200000, + RootPath = testutil:reset_filestructure(), + Bucket = {<<"Type1">>, <<"Bucket1">>}, + IdxGivenName = <<"given_bin">>, + IdxFamilyName = <<"family_bin">>, + IdxPostCode = <<"postcode_bin">>, + IdxFullData = <<"fulldata_bin">>, + {ok, Book1} = + leveled_bookie:book_start( + RootPath, 2000, 50000000, testutil:sync_strategy()), + V1 = <<"V1">>, + IndexGen = + fun() -> + DoB = perf_SUITE:get_random_dob(), + DoD = perf_SUITE:get_random_dod(), + FN = perf_SUITE:get_random_surname(), + GN1 = perf_SUITE:get_random_givenname(), + GN2 = perf_SUITE:get_random_givenname(), + GN3 = perf_SUITE:get_random_givenname(), + PC1 = perf_SUITE:get_random_postcode(), + PC2 = perf_SUITE:get_random_postcode(), + PC3 = perf_SUITE:get_random_postcode(), + FNIdx1 = set_index_term(FN, DoB, DoD), + GNIdx1 = set_index_term(GN1, DoB, DoD), + GNIdx2 = set_index_term(GN2, DoB, DoD), + GNIdx3 = set_index_term(GN3, DoB, DoD), + PCIdx1 = set_index_term(PC1, DoB, DoD), + PCIdx2 = set_index_term(PC2, DoB, DoD), + PCIdx3 = set_index_term(PC3, DoB, DoD), + FullIdx = + set_full_index_term( + FN, DoB, DoD, GN1, GN2, GN3, PC1, PC2, PC3), + [ + {add, IdxFamilyName, FNIdx1}, + {add, IdxGivenName, GNIdx1}, + {add, IdxGivenName, GNIdx2}, + {add, IdxGivenName, GNIdx3}, + {add, IdxPostCode, PCIdx1}, + {add, IdxPostCode, PCIdx2}, + {add, IdxPostCode, PCIdx3}, + {add, IdxFullData, FullIdx} + ] + end, + ObjL1 = + testutil:generate_objects( + KeyCount, binary_uuid, [], V1, IndexGen, Bucket), + testutil:riakload(Book1, ObjL1), + + DoBLow = <<"19730930">>, + DobHigh = <<"19770301">>, + GivenName = <<"#Willow">>, + PostCode = <<"#LS8 ">>, + + %% Search for SM*, Leeds Postcode, bo3n in mid70s + FullIndexEvalFun = + leveled_eval:generate_eval_function( + "delim($term, \"|\", ($fn, $dob, $dod, $gns, $pcs))", + maps:new()), + FilterString = + "($dob BETWEEN :doblow AND :dobhigh) AND (contains($gcs, :givenname) " + "OR contains($pcs, :postcode))", + FullIndexFilterFun = + leveled_filter:generate_filter_function( + FilterString, + #{<<"doblow">> => DoBLow, + <<"dobhigh">> => DobHigh, + <<"givenname">> => GivenName, + <<"postcode">> => PostCode + }), + {async, FullR0} = + leveled_bookie:book_indexfold( + Book1, + {Bucket, null}, + {fun testutil:foldkeysfun/3, []}, + {IdxFullData, <<"Sm">>, <<"Sm~">>}, + {false, {query, FullIndexEvalFun, FullIndexFilterFun}} + ), + STFull0 = os:system_time(millisecond), + FullKL0 = lists:sort(FullR0()), + print_query_results(STFull0, single_index, FullKL0), + + SplitIndexEvalFun = + leveled_eval:generate_eval_function( + "delim($term, \"|\", ($sk, $dob, $dod))", + maps:new()), + SplitIndexFilterFun = + leveled_filter:generate_filter_function( + "$dob BETWEEN :doblow AND :dobhigh", + #{<<"doblow">> => DoBLow, <<"dobhigh">> => DobHigh}), + Q1 = + {IdxFamilyName, + <<"Sm">>, <<"Sm~">>, + {query, SplitIndexEvalFun, SplitIndexFilterFun}}, + Q2 = + {IdxGivenName, + <<"Willow">>, <<"Willow#">>, + {query, SplitIndexEvalFun, SplitIndexFilterFun}}, + Q3 = + {IdxPostCode, + <<"LS8 ">>, <<"LS8#">>, + {query, SplitIndexEvalFun, SplitIndexFilterFun}}, + + ComboFun = + leveled_setop:generate_setop_function("$1 INTERSECT ($2 UNION $3)"), + + {async, SplitR0} = + leveled_bookie:book_multiindexfold( + Book1, + Bucket, + {fun testutil:foldkeysfun/3, []}, + [{1, Q1}, {2, Q2}, {3, Q3}], + ComboFun), + STSplit0 = os:system_time(millisecond), + SplitKL0 = lists:sort(SplitR0()), + print_query_results(STSplit0, multi_index, SplitKL0), + + true = FullKL0 == SplitKL0, + + ok = leveled_bookie:book_close(Book1), + + testutil:reset_filestructure(). + +print_query_results(ST, QT, Results) -> + io:format( + % user, + "Query type ~w took ~w ms with ~w results~n", + [QT, os:system_time(millisecond) - ST, length(Results)] + ). + +set_index_term(SortKey, DoB, DoD) -> + list_to_binary( + lists:flatten( + io_lib:format( + "~s|~s|~s", + [SortKey, DoB, DoD]) + )). + +set_full_index_term(FN, DoB, DoD, GN1, GN2, GN3, PC1, PC2, PC3) -> + list_to_binary( + lists:flatten( + io_lib:format( + "~s|~s|~s|#~s#~s#~s|#~s#~s#~s", + [FN, DoB, DoD, GN1, GN2, GN3, PC1, PC2, PC3]) + )). + count_termsonindex(Bucket, IdxField, Book, QType) -> lists:foldl( fun(X, Acc) -> @@ -906,31 +1355,30 @@ count_termsonindex(Bucket, IdxField, Book, QType) -> multibucket_fold(_Config) -> RootPath = testutil:reset_filestructure(), - {ok, Bookie1} = leveled_bookie:book_start(RootPath, - 2000, - 50000000, - testutil:sync_strategy()), - ObjectGen = testutil:get_compressiblevalue_andinteger(), + {ok, Bookie1} = + leveled_bookie:book_start( + RootPath, 2000, 50000000, testutil:sync_strategy()), + ObjectGen = <<"V1">>, IndexGen = fun() -> [] end, + B1 = {<<"Type1">>, <<"Bucket1">>}, + B2 = <<"Bucket2">>, + B3 = <<"Bucket3">>, + B4 = {<<"Type2">>, <<"Bucket4">>}, ObjL1 = testutil:generate_objects( - 13000, uuid, [], ObjectGen, IndexGen, {<<"Type1">>, <<"Bucket1">>} - ), + 13000, binary_uuid, [], ObjectGen, IndexGen, B1), testutil:riakload(Bookie1, ObjL1), ObjL2 = testutil:generate_objects( - 17000, uuid, [], ObjectGen, IndexGen, <<"Bucket2">> - ), + 17000, binary_uuid, [], ObjectGen, IndexGen, B2), testutil:riakload(Bookie1, ObjL2), ObjL3 = testutil:generate_objects( - 7000, uuid, [], ObjectGen, IndexGen, <<"Bucket3">> - ), + 7000, binary_uuid, [], ObjectGen, IndexGen, B3), testutil:riakload(Bookie1, ObjL3), - ObjL4 = + ObjL4 = testutil:generate_objects( - 23000, uuid, [], ObjectGen, IndexGen, {<<"Type2">>, <<"Bucket4">>} - ), + 23000, binary_uuid, [], ObjectGen, IndexGen, B4), testutil:riakload(Bookie1, ObjL4), FF = fun(B, K, _PO, Acc) -> @@ -1005,7 +1453,7 @@ foldobjects_bybucket_range(_Config) -> 1300, {fixed_binary, 1}, [], ObjectGen, IndexGen, <<"Bucket1">>), testutil:riakload(Bookie1, ObjL1), - FoldKeysFun = fun(_B, K,_V, Acc) -> [ K |Acc] end, + FoldKeysFun = fun(_B, K, _V, Acc) -> [ K |Acc] end, StartKey = testutil:fixed_bin_key(123), EndKey = testutil:fixed_bin_key(779), diff --git a/test/end_to_end/perf_SUITE.erl b/test/end_to_end/perf_SUITE.erl index 0a4bc32f..462c020f 100644 --- a/test/end_to_end/perf_SUITE.erl +++ b/test/end_to_end/perf_SUITE.erl @@ -7,11 +7,21 @@ -export([ riak_ctperf/1, riak_fullperf/1, riak_profileperf/1, riak_miniperf/1 ]). +-export( + [ + random_people_index/0, + get_random_dob/0, + get_random_dod/0, + get_random_givenname/0, + get_random_surname/0, + get_random_postcode/0 + ]). --define(PEOPLE_INDEX, <<"people_bin">>). --define(MINI_QUERY_DIVISOR, 8). --define(RGEX_QUERY_DIVISOR, 32). --define(PUT_PAUSE, 40). +-ifdef(test_filter_expression). + -define(TEST_FE, true). +-else. + -define(TEST_FE, false). +-endif. -ifndef(performance). -define(performance, riak_ctperf). @@ -25,6 +35,11 @@ all() -> [?performance]. -define(ACCOUNTING, false). -endif. +-define(PEOPLE_INDEX, <<"people_bin">>). +-define(MINI_QUERY_DIVISOR, 8). +-define(RGEX_QUERY_DIVISOR, 32). +-define(PUT_PAUSE, 40). + suite() -> [{timetrap, {hours, 16}}]. init_per_suite(Config) -> @@ -660,8 +675,56 @@ random_queries(Bookie, Bucket, IDs, IdxCnt, MaxRange, IndexesReturned) -> ), TC div 1000. - random_people_queries(Bookie, Bucket, IndexesReturned) -> + random_people_queries(?TEST_FE, Bookie, Bucket, IndexesReturned). + +random_people_queries(true, Bookie, Bucket, IndexesReturned) -> + FilterExpression = + "($dob BETWEEN \"19700101\" AND \"19791231\") " + "AND (contains($gns, \"#Willow\") AND contains($pcs, \"#LS\"))", + {ok, ParsedFilter} = + leveled_filter:generate_filter_expression( + FilterExpression, maps:new()), + FilterFun = + fun(AttrMap) -> leveled_filter:apply_filter(ParsedFilter, AttrMap) end, + EvalExpression = "delim($term, \"|\", ($surname, $dob, $dod, $gns, $pcs))", + {ok, ParsedEval} = + leveled_eval:generate_eval_expression(EvalExpression, maps:new()), + EvalFun = + fun(Term, Key) -> + leveled_eval:apply_eval(ParsedEval, Term, Key, maps:new()) + end, + + QueryFun = + fun() -> + Surname = get_random_surname(), + Range = + {?PEOPLE_INDEX, + Surname, + <> + }, + FoldKeysFun = fun(_B, _K, Cnt) -> Cnt + 1 end, + {async, R} = + leveled_bookie:book_indexfold( + Bookie, + {Bucket, <<>>}, + {FoldKeysFun, 0}, + Range, + {true, {eval, EvalFun, FilterFun} + }), + R() + end, + + {TC, {QC, EF}} = + timer:tc(fun() -> run_queries(QueryFun, 0, 0, IndexesReturned) end), + ct:log( + ?INFO, + "Fetch of ~w index entries by regex in ~w queries took ~w ms" + " with filter_expression=~w", + [EF, QC, TC div 1000, true] + ), + TC div 1000; +random_people_queries(false, Bookie, Bucket, IndexesReturned) -> SeventiesWillowRegex = "[^\\|]*\\|197[0-9]{5}\\|[^\\|]*\\|" "[^\\|]*#Willow[^\\|]*\\|[^\\|]*#LS[^\\|]*", @@ -674,8 +737,7 @@ random_people_queries(Bookie, Bucket, IndexesReturned) -> Surname, <> }, - {ok, TermRegex} = - re:compile(SeventiesWillowRegex), + {ok, TermRegex} = leveled_util:regex_compile(SeventiesWillowRegex), FoldKeysFun = fun(_B, _K, Cnt) -> Cnt + 1 end, {async, R} = leveled_bookie:book_indexfold( @@ -691,8 +753,9 @@ random_people_queries(Bookie, Bucket, IndexesReturned) -> timer:tc(fun() -> run_queries(QueryFun, 0, 0, IndexesReturned) end), ct:log( ?INFO, - "Fetch of ~w index entries by regex in ~w queries took ~w ms", - [EF, QC, TC div 1000] + "Fetch of ~w index entries by regex in ~w queries took ~w ms" + " with filter_expression=~w", + [EF, QC, TC div 1000, false] ), TC div 1000. diff --git a/test/end_to_end/recovery_SUITE.erl b/test/end_to_end/recovery_SUITE.erl index 8bff06b3..93e8cbd4 100644 --- a/test/end_to_end/recovery_SUITE.erl +++ b/test/end_to_end/recovery_SUITE.erl @@ -65,10 +65,10 @@ replace_everything(_Config) -> {reload_strategy, [{?RIAK_TAG, recalc}]}] end, {ok, Book1} = leveled_bookie:book_start(BookOpts(StdJournalCount)), - BKT = "ReplaceAll", - BKT1 = "ReplaceAll1", - BKT2 = "ReplaceAll2", - BKT3 = "ReplaceAll3", + BKT = <<"ReplaceAll">>, + BKT1 = <<"ReplaceAll1">>, + BKT2 = <<"ReplaceAll2">>, + BKT3 = <<"ReplaceAll3">>, {KSpcL1, V1} = testutil:put_indexed_objects(Book1, BKT, 50000), ok = testutil:check_indexed_objects(Book1, BKT, KSpcL1, V1), @@ -204,12 +204,12 @@ close_duringcompaction(_Config) -> {cache_size, 2000}, {max_journalsize, 2000000}, {sync_strategy, testutil:sync_strategy()}], - {ok, Spcl1, LastV1} = rotating_object_check(BookOpts, "Bucket1", 6400), + {ok, Spcl1, LastV1} = rotating_object_check(BookOpts, <<"Bucket1">>, 6400), {ok, Book1} = leveled_bookie:book_start(BookOpts), ok = leveled_bookie:book_compactjournal(Book1, 30000), ok = leveled_bookie:book_close(Book1), {ok, Book2} = leveled_bookie:book_start(BookOpts), - ok = testutil:check_indexed_objects(Book2, "Bucket1", Spcl1, LastV1), + ok = testutil:check_indexed_objects(Book2, <<"Bucket1">>, Spcl1, LastV1), ok = leveled_bookie:book_close(Book2). recovery_with_samekeyupdates(_Config) -> @@ -401,7 +401,7 @@ hot_backup_changes(_Config) -> {cache_size, 1000}, {max_journalsize, 10000000}, {sync_strategy, testutil:sync_strategy()}], - B = "Bucket0", + B = <<"Bucket0">>, {ok, Book1} = leveled_bookie:book_start(BookOpts), {KSpcL1, _V1} = testutil:put_indexed_objects(Book1, B, 20000), @@ -475,41 +475,62 @@ rotate_wipe_compact(Strategy1, Strategy2) -> {sync_strategy, testutil:sync_strategy()}, {reload_strategy, [{?RIAK_TAG, Strategy2}]}, {max_run_length, 8}], - {ok, Spcl3, LastV3} = rotating_object_check(BookOpts, "Bucket3", 400), - ok = restart_from_blankledger(BookOpts, [{"Bucket3", Spcl3, LastV3}]), - {ok, Spcl4, LastV4} = rotating_object_check(BookOpts, "Bucket4", 800), - ok = restart_from_blankledger(BookOpts, [{"Bucket3", Spcl3, LastV3}, - {"Bucket4", Spcl4, LastV4}]), - {ok, Spcl5, LastV5} = rotating_object_check(BookOpts, "Bucket5", 1600), - ok = restart_from_blankledger(BookOpts, [{"Bucket3", Spcl3, LastV3}, - {"Bucket5", Spcl5, LastV5}]), - {ok, Spcl6, LastV6} = rotating_object_check(BookOpts, "Bucket6", 3200), + {ok, Spcl3, LastV3} = + rotating_object_check(BookOpts, <<"Bucket3">>, 400), + ok = restart_from_blankledger(BookOpts, [{<<"Bucket3">>, Spcl3, LastV3}]), + {ok, Spcl4, LastV4} = + rotating_object_check(BookOpts, <<"Bucket4">>, 800), + ok = + restart_from_blankledger( + BookOpts, + [{<<"Bucket3">>, Spcl3, LastV3}, {<<"Bucket4">>, Spcl4, LastV4}] + ), + {ok, Spcl5, LastV5} = rotating_object_check(BookOpts, <<"Bucket5">>, 1600), + ok = + restart_from_blankledger( + BookOpts, + [{<<"Bucket3">>, Spcl3, LastV3}, {<<"Bucket5">>, Spcl5, LastV5}] + ), + {ok, Spcl6, LastV6} = rotating_object_check(BookOpts, <<"Bucket6">>, 3200), {ok, Book1} = leveled_bookie:book_start(BookOpts), compact_and_wait(Book1), ok = leveled_bookie:book_close(Book1), - ok = restart_from_blankledger(BookOptsAlt, [{"Bucket3", Spcl3, LastV3}, - {"Bucket4", Spcl4, LastV4}, - {"Bucket5", Spcl5, LastV5}, - {"Bucket6", Spcl6, LastV6}]), + ok = + restart_from_blankledger( + BookOptsAlt, + [ + {<<"Bucket3">>, Spcl3, LastV3}, + {<<"Bucket4">>, Spcl4, LastV4}, + {<<"Bucket5">>, Spcl5, LastV5}, + {<<"Bucket6">>, Spcl6, LastV6} + ] + ), {ok, Book2} = leveled_bookie:book_start(BookOptsAlt), compact_and_wait(Book2), ok = leveled_bookie:book_close(Book2), - ok = restart_from_blankledger(BookOptsAlt, [{"Bucket3", Spcl3, LastV3}, - {"Bucket4", Spcl4, LastV4}, - {"Bucket5", Spcl5, LastV5}, - {"Bucket6", Spcl6, LastV6}]), + ok = + restart_from_blankledger( + BookOptsAlt, + [ + {<<"Bucket3">>, Spcl3, LastV3}, + {<<"Bucket4">>, Spcl4, LastV4}, + {<<"Bucket5">>, Spcl5, LastV5}, + {<<"Bucket6">>, Spcl6, LastV6} + ] + ), {ok, Book3} = leveled_bookie:book_start(BookOptsAlt), - {KSpcL2, _V2} = testutil:put_indexed_objects(Book3, "AltBucket6", 3000), + {KSpcL2, _V2} = + testutil:put_indexed_objects(Book3, <<"AltBucket6">>, 3000), Q2 = fun(RT) -> {index_query, - "AltBucket6", + <<"AltBucket6">>, {fun testutil:foldkeysfun/3, []}, {<<"idx1_bin">>, <<"#">>, <<"|">>}, {RT, undefined}} @@ -520,10 +541,13 @@ rotate_wipe_compact(Strategy1, Strategy2) -> DeleteFun = fun({DK, [{add, DIdx, DTerm}]}) -> - ok = testutil:book_riakdelete(Book3, - "AltBucket6", - DK, - [{remove, DIdx, DTerm}]) + ok = + testutil:book_riakdelete( + Book3, + <<"AltBucket6">>, + DK, + [{remove, DIdx, DTerm}] + ) end, lists:foreach(DeleteFun, KSpcL2), @@ -631,33 +655,37 @@ recovr_strategy(_Config) -> {sync_strategy, testutil:sync_strategy()}, {reload_strategy, [{?RIAK_TAG, recovr}]}], - R6 = rotating_object_check(BookOpts, "Bucket6", 6400), + R6 = rotating_object_check(BookOpts, <<"Bucket6">>, 6400), {ok, AllSpcL, V4} = R6, - leveled_penciller:clean_testdir(proplists:get_value(root_path, BookOpts) ++ - "/ledger"), + leveled_penciller:clean_testdir( + proplists:get_value(root_path, BookOpts) ++ "/ledger"), {ok, Book1} = leveled_bookie:book_start(BookOpts), {TestObject, TestSpec} = testutil:generate_testobject(), ok = testutil:book_riakput(Book1, TestObject, TestSpec), - ok = testutil:book_riakdelete(Book1, - testutil:get_bucket(TestObject), - testutil:get_key(TestObject), - []), + ok = + testutil:book_riakdelete( + Book1, + testutil:get_bucket(TestObject), + testutil:get_key(TestObject), + [] + ), - lists:foreach(fun({K, _SpcL}) -> - {ok, OH} = testutil:book_riakhead(Book1, "Bucket6", K), - VCH = testutil:get_vclock(OH), - {ok, OG} = testutil:book_riakget(Book1, "Bucket6", K), - V = testutil:get_value(OG), - VCG = testutil:get_vclock(OG), - true = V == V4, - true = VCH == VCG - end, - lists:nthtail(6400, AllSpcL)), + lists:foreach( + fun({K, _SpcL}) -> + {ok, OH} = testutil:book_riakhead(Book1, <<"Bucket6">>, K), + VCH = testutil:get_vclock(OH), + {ok, OG} = testutil:book_riakget(Book1, <<"Bucket6">>, K), + V = testutil:get_value(OG), + VCG = testutil:get_vclock(OG), + true = V == V4, + true = VCH == VCG + end, + lists:nthtail(6400, AllSpcL)), Q = fun(RT) -> {index_query, - "Bucket6", + <<"Bucket6">>, {fun testutil:foldkeysfun/3, []}, {<<"idx1_bin">>, <<"#">>, <<"|">>}, {RT, undefined}} @@ -682,7 +710,7 @@ recovr_strategy(_Config) -> {ok, Book2} = leveled_bookie:book_start(RevisedOpts), - {KSpcL2, _V2} = testutil:put_indexed_objects(Book2, "AltBucket6", 3000), + {KSpcL2, _V2} = testutil:put_indexed_objects(Book2, <<"AltBucket6">>, 3000), {async, KFolder2} = leveled_bookie:book_returnfolder(Book2, Q(false)), KeyList2 = lists:usort(KFolder2()), true = length(KeyList2) == 6400, @@ -690,7 +718,7 @@ recovr_strategy(_Config) -> Q2 = fun(RT) -> {index_query, - "AltBucket6", + <<"AltBucket6">>, {fun testutil:foldkeysfun/3, []}, {<<"idx1_bin">>, <<"#">>, <<"|">>}, {RT, undefined}} @@ -701,10 +729,9 @@ recovr_strategy(_Config) -> DeleteFun = fun({DK, [{add, DIdx, DTerm}]}) -> - ok = testutil:book_riakdelete(Book2, - "AltBucket6", - DK, - [{remove, DIdx, DTerm}]) + ok = + testutil:book_riakdelete( + Book2, <<"AltBucket6">>, DK, [{remove, DIdx, DTerm}]) end, lists:foreach(DeleteFun, KSpcL2), diff --git a/test/end_to_end/riak_SUITE.erl b/test/end_to_end/riak_SUITE.erl index c753be16..6b19536e 100644 --- a/test/end_to_end/riak_SUITE.erl +++ b/test/end_to_end/riak_SUITE.erl @@ -1820,8 +1820,8 @@ dollar_key_index(_Config) -> io:format("Length of Result of folder ~w~n", [ResLen]), true = 657 == ResLen, - {ok, REMatch} = re:compile("K.y"), - {ok, REMiss} = re:compile("key"), + {ok, REMatch} = leveled_util:regex_compile("K.y"), + {ok, REMiss} = leveled_util:regex_compile("key"), {async, FolderREMatch} = leveled_bookie:book_keylist( @@ -2102,15 +2102,15 @@ dollar_bucket_index(_Config) -> IndexGen = fun() -> [] end, ObjL1 = testutil:generate_objects( - 1300, uuid, [], ObjectGen, IndexGen, <<"Bucket1">>), + 1300, binary_uuid, [], ObjectGen, IndexGen, <<"Bucket1">>), testutil:riakload(Bookie1, ObjL1), ObjL2 = testutil:generate_objects( - 1700, uuid, [], ObjectGen, IndexGen, <<"Bucket2">>), + 1700, binary_uuid, [], ObjectGen, IndexGen, <<"Bucket2">>), testutil:riakload(Bookie1, ObjL2), ObjL3 = testutil:generate_objects( - 7000, uuid, [], ObjectGen, IndexGen, <<"Bucket3">>), + 7000, binary_uuid, [], ObjectGen, IndexGen, <<"Bucket3">>), testutil:riakload(Bookie1, ObjL3), @@ -2125,9 +2125,9 @@ dollar_bucket_index(_Config) -> {<<"Bucket2">>, SampleKey} = lists:nth(100, Results), UUID = "[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12}", - {ok, RESingleMatch} = re:compile(SampleKey), - {ok, REAllMatch} = re:compile(UUID), - {ok, REMiss} = re:compile("no_key"), + {ok, RESingleMatch} = leveled_util:regex_compile(SampleKey), + {ok, REAllMatch} = leveled_util:regex_compile(UUID), + {ok, REMiss} = leveled_util:regex_compile("no_key"), {async, FolderREMiss} = leveled_bookie:book_keylist( @@ -2167,10 +2167,13 @@ dollar_bucket_index(_Config) -> bigobject_memorycheck(_Config) -> RootPath = testutil:reset_filestructure(), - {ok, Bookie} = leveled_bookie:book_start(RootPath, - 200, - 1000000000, - testutil:sync_strategy()), + {ok, Bookie} = + leveled_bookie:book_start( + RootPath, + 200, + 1000000000, + testutil:sync_strategy() + ), Bucket = <<"B">>, IndexGen = fun() -> [] end, ObjPutFun = @@ -2192,10 +2195,13 @@ bigobject_memorycheck(_Config) -> % All processes {_TotalCDBBinMem, _TotalCDBProcesses} = cdb_memory_check(), ok = leveled_bookie:book_close(Bookie), - {ok, BookieR} = leveled_bookie:book_start(RootPath, - 2000, - 1000000000, - testutil:sync_strategy()), + {ok, BookieR} = + leveled_bookie:book_start( + RootPath, + 2000, + 1000000000, + testutil:sync_strategy() + ), {RS_TotalCDBBinMem, _RS_TotalCDBProcesses} = cdb_memory_check(), true = RS_TotalCDBBinMem < 1024 * 1024, % No binary object references exist after startup @@ -2205,25 +2211,29 @@ bigobject_memorycheck(_Config) -> cdb_memory_check() -> TotalCDBProcesses = - lists:filter(fun(P) -> - {dictionary, PD} = - process_info(P, dictionary), - case lists:keyfind('$initial_call', 1, PD) of - {'$initial_call',{leveled_cdb,init,1}} -> - true; - _ -> - false - end - end, - processes()), + lists:filter( + fun(P) -> + {dictionary, PD} = + process_info(P, dictionary), + case lists:keyfind('$initial_call', 1, PD) of + {'$initial_call',{leveled_cdb,init,1}} -> + true; + _ -> + false + end + end, + processes() + ), TotalCDBBinMem = - lists:foldl(fun(P, Acc) -> - BinMem = calc_total_binary_memory(P), - io:format("Memory for pid ~w is ~w~n", [P, BinMem]), - BinMem + Acc - end, - 0, - TotalCDBProcesses), + lists:foldl( + fun(P, Acc) -> + BinMem = calc_total_binary_memory(P), + io:format("Memory for pid ~w is ~w~n", [P, BinMem]), + BinMem + Acc + end, + 0, + TotalCDBProcesses + ), io:format("Total binary memory ~w in ~w CDB processes~n", [TotalCDBBinMem, length(TotalCDBProcesses)]), {TotalCDBBinMem, TotalCDBProcesses}. diff --git a/test/end_to_end/testutil.erl b/test/end_to_end/testutil.erl index 32a569de..24923778 100644 --- a/test/end_to_end/testutil.erl +++ b/test/end_to_end/testutil.erl @@ -550,7 +550,7 @@ generate_objects( ); generate_objects( Count, binary_uuid, ObjL, Value, IndexGen, Bucket) - when is_binary(Bucket) -> + when is_binary(Bucket); is_tuple(Bucket) -> {Obj1, Spec1} = set_object( Bucket, @@ -558,23 +558,14 @@ generate_objects( Value, IndexGen ), - generate_objects(Count - 1, - binary_uuid, - [{rand:uniform(), Obj1, Spec1}|ObjL], - Value, - IndexGen, - Bucket); -generate_objects(Count, uuid, ObjL, Value, IndexGen, Bucket) -> - {Obj1, Spec1} = set_object(Bucket, - leveled_util:generate_uuid(), - Value, - IndexGen), - generate_objects(Count - 1, - uuid, - [{rand:uniform(), Obj1, Spec1}|ObjL], - Value, - IndexGen, - Bucket); + generate_objects( + Count - 1, + binary_uuid, + [{rand:uniform(), Obj1, Spec1}|ObjL], + Value, + IndexGen, + Bucket + ); generate_objects( Count, {binary, KeyNumber}, ObjL, Value, IndexGen, Bucket) when is_list(Bucket) -> @@ -880,7 +871,7 @@ put_indexed_objects(Book, Bucket, Count, V) -> IndexGen = get_randomindexes_generator(1), SW = os:timestamp(), ObjL1 = - generate_objects(Count, uuid, [], V, IndexGen, Bucket), + generate_objects(Count, binary_uuid, [], V, IndexGen, Bucket), KSpecL = lists:map( fun({_RN, Obj, Spc}) -> diff --git a/test/property/evallang_eqc.erl b/test/property/evallang_eqc.erl new file mode 100644 index 00000000..7653690b --- /dev/null +++ b/test/property/evallang_eqc.erl @@ -0,0 +1,161 @@ +-module(evallang_eqc). + +-ifdef(EQC). + +-compile([export_all, nowarn_export_all]). + +-include_lib("eqc/include/eqc.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include("../include/leveled.hrl"). + +-define(QC_OUT(P), + eqc:on_output(fun(Str, Args) -> + io:format(user, Str, Args) end, P)). + +eqc_prop1_test_() -> + {timeout, + ?EQC_TIME_BUDGET + 10, + ?_assertEqual( + true, + eqc:quickcheck( + eqc:testing_time(?EQC_TIME_BUDGET, ?QC_OUT(prop_lang()))))}. + +eqc_prop2_test_() -> + {timeout, + ?EQC_TIME_BUDGET + 10, + ?_assertEqual( + true, + eqc:quickcheck( + eqc:testing_time(?EQC_TIME_BUDGET, ?QC_OUT(prop_negative()))))}. + +identifier() -> + FirstChars = lists:seq($a,$z)++lists:seq($A,$Z)++["_"], + OtherChars = FirstChars++lists:seq($0,$9), + ?LET({X, Xs}, {oneof(FirstChars), list(elements(OtherChars))}, unicode:characters_to_binary([X|Xs])). + +identifier(Context, Type) -> + ?LET(TypedVars, vars(Context, Type), + ?LET(V, oneof([identifier() || TypedVars == []] ++ TypedVars), [ws(), "$", V, ws()])). + +vars(Context, Type) -> + fault([ V || {V, {T, _}} <- Context, T /= Type ], + [ V || {V, {T, _}} <- Context, T == Type ]). + +%% No quotes in strings +%% Filter the quote with `re` instead of string:find to +%% be compatible with lexer +string() -> + ?SUCHTHAT(String, non_empty(utf8()), re:run(String, "\"") == nomatch). + +typed_context() -> + ?SUCHTHAT(KVs, list({identifier(), oneof([{int, int()}, {string, string()}])}), + unique([K || {K, _} <- KVs])). + +unique(Elems) -> + lists:usort(Elems) == lists:sort(Elems). + +ppvalue(string) -> + ppstring(); +ppvalue(Int) -> + ppint(Int). + +ppregex() -> + [ws(), "\"", regex(), "\"", ws()]. + +regex() -> + elements(["a", ".*", "[^0]*"]). + +ws() -> + ?SHRINK(list(elements(" \t\f\v\r\n\s")), " "). + +comparator() -> + oneof([">", "<", "=", "<=", ">="]). + +ppint(Kind) -> + Gen = case Kind of + pos -> ?LET(N, nat(), N+1); + nat -> nat(); + neg -> ?LET(N, nat(), -N); + _ -> int() + end, + [ws(), ?LET(X, fault(int(), Gen), integer_to_list(X)), ws()]. + +ppstring() -> + [ws(), "\"", string(), "\"", ws()]. + +operand(_Context) -> + oneof([ ppint(any), ppstring() ]). + +math_operand(Context) -> + oneof([ identifier(Context, int) || Context /= []] ++ + [ ppint(any) ]). + +pplist(Gen) -> + ?LET(List, non_empty(list(Gen)), + [ws(), "("] ++ lists:join(",", List) ++ [")", ws()]). + + +identifier_list(Context, Type) -> + pplist(identifier(Context, Type)). + +mapping(int, string) -> + [ ws(), "(", ppint(any), ", ", ppstring(), ws(), ")" ]; +mapping(string, string) -> + [ ws(), "(", ppstring(), ", ", ppstring(), ws(), ")" ]; +mapping(string, int) -> + [ ws(), "(", ppstring(), ", ", ppint(any), ws(), ")" ]; +mapping(int, int) -> + [ ws(), "(", ppint(any), ", ", ppint(any), ws(), ")" ]. + +mappings(InType, OutType) -> + pplist(mapping(InType, OutType)). + +expr(0, Context) -> + oneof([ [ "delim(", identifier(Context, string), ",", ppstring(), ",", identifier_list(Context, string), ")" ] + , [ "join(", identifier_list(Context, string), ",", ppstring(), ",", identifier(Context, string), ")" ] + , [ "split(", identifier(Context, string), ",", ppstring(), ",", identifier(Context, string), ")" ] + , [ "slice(", identifier(Context, string), ",", ppint(pos), ",", identifier(Context, string), ")" ] + , [ "index(", identifier(Context, string), ",", ppint(nat), ",", ppint(pos), ",", identifier(Context, string), ")" ] + , [ "kvsplit(", identifier(Context, string), ",", ppstring(), ",", ppstring(), ")" ] + , [ "regex(", identifier(Context, string), ",", ppregex() , ", pcre, ", identifier_list(Context, string), ")"] + , [ "regex(", identifier(Context, string), ",", ppregex() , ",", identifier_list(Context, string), ")"] + , [ "to_integer(", identifier(Context, string), ",", identifier(Context, int), ")" ] + , [ "to_string(", identifier(Context, int), ",", identifier(Context, string), ")" ] + , [ "subtract(", math_operand(Context), ",", math_operand(Context), ",", identifier(Context, int), ")" ] + , [ "add(", math_operand(Context), ",", math_operand(Context), ",", identifier(Context, int), ")" ] + ] ++ + [ [ "map(", lists:join(",", [identifier(Context, LHS), comparator(), mappings(LHS, RHS), + ppvalue(LHS), identifier(Context, RHS)]), ")" ] + || LHS <- [int, string], + RHS <- [int, string], + Context /= [] ] + ); +expr(N, Context) -> + oneof([ expr(0, Context) + , ?LETSHRINK([E1, E2], [expr(N div 2, Context), expr(N div 2, Context)], [E1, "|", E2]) + ]). + +%% A generator for syntactic and semantic correct expressions +evallang(Context) -> + ?SIZED(Size, expr(Size, Context)). + +%% The property. +%% The Context variables are used to replace ":x" substitution vars in the provided +%% tokens to parse. +prop_lang() -> + eqc:dont_print_counterexample( + ?FORALL(Context, typed_context(), + ?FORALL(String, evallang(Context), + ?WHENFAIL(eqc:format("Failing for\n~ts\nwith context ~p\n", [String, Context]), + try Map = maps:from_list([{Var, Val} || {Var, {_Type, Val}} <- Context]), + F = leveled_eval:generate_eval_function(unicode:characters_to_list(String), Map), + is_map(F(<<"hello">>, <<"world">>)) + catch Error:Reason:St -> + eqc:format("~n~p Failed with ~p ~p~n~p~n", [String, Error, Reason, St]), + equals(Error, true) + end)))). + +prop_negative() -> + fails(fault_rate(1, 10, prop_lang())). + +-endif. \ No newline at end of file diff --git a/test/property/filterlang_eqc.erl b/test/property/filterlang_eqc.erl new file mode 100644 index 00000000..7b8a5ea0 --- /dev/null +++ b/test/property/filterlang_eqc.erl @@ -0,0 +1,111 @@ +-module(filterlang_eqc). + +-ifdef(EQC). + +-compile([export_all, nowarn_export_all]). + +-include_lib("eqc/include/eqc.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include("../include/leveled.hrl"). + +-define(lazy_oneof(Gens), ?LAZY(oneof(Gens))). + +-define(QC_OUT(P), + eqc:on_output(fun(Str, Args) -> + io:format(user, Str, Args) end, P)). + +eqc_test_() -> + {timeout, + ?EQC_TIME_BUDGET + 10, + ?_assertEqual( + true, + eqc:quickcheck( + eqc:testing_time(?EQC_TIME_BUDGET, ?QC_OUT(prop_lang()))))}. + +identifier() -> + FirstChars = lists:seq($a,$z)++lists:seq($A,$Z)++["_"], + OtherChars = FirstChars++lists:seq($0,$9), + ?LET({X, Xs}, {oneof(FirstChars), list(elements(OtherChars))}, unicode:characters_to_binary([X|Xs])). + +ppidentifier(Vars) -> + ?LET(V, oneof([identifier() | Vars]), [ws(), "$", V, " ",ws()]). + +%% No quotes in strings +%% Filter the quote with `re` instead of string:find to +%% be compatible with lexer +string() -> + ?SUCHTHAT(String, non_empty(utf8()), re:run(String, "\"") == nomatch). + +context() -> + list({identifier(), oneof([int(), string()])}). + +ws() -> + ?SHRINK(list(elements(" \t\f\v\r\n\s")), [" "]). + +comparator() -> + oneof([">", "<", "=", "<>", "<=", ">="]). + +ppint() -> + [ws(), ?LET(X, int(), integer_to_list(X)), ws()]. + +ppstring() -> + [ws(), "\"", string(), "\"", ws()]. + +pplist(Gen) -> + ?LET(List, non_empty(list(Gen)), + [ws(), "("] ++ lists:join(",", List) ++ [")", ws()]). + +operand(Vars) -> + oneof([ ppidentifier(Vars) ] ++ + [ [ws(), ":", oneof(Vars), " ", ws()] || Vars /= []] ++ + %% Always in context, because + %% should fail with error if substitution vars not in context + [ ppint(), ppstring() ]). + +operand_list(Vars) -> + ?LET(OpList, non_empty(list(operand(Vars))), + [ws(), "("] ++ lists:join(",", OpList) ++ [")", ws()]). + +condition(0, Vars) -> + oneof([ [ operand(Vars), comparator(), operand(Vars) ] + , [ operand(Vars), "BETWEEN", operand(Vars), "AND", operand(Vars) ] + , [ ppidentifier(Vars), " IN", pplist(ppstring()) ] + , [ ppstring(), " IN", ppidentifier(Vars) ] + , [ "contains(", ppidentifier(Vars), ", ", ppstring(), ")" ] + , [ "begins_with(", ppidentifier(Vars), ", ", ppstring(), ")" ] + , [ "attribute_exists(", ppidentifier(Vars), ")" ] + , [ "attribute_not_exists(", ppidentifier(Vars), ")" ] + , [ "attribute_empty(", ppidentifier(Vars), ")" ] + ]); +condition(N, Vars) -> + ?lazy_oneof([ condition(0, Vars) + , ?LETSHRINK([C], [condition(N - 1, Vars)], + ?lazy_oneof([ ["NOT", C] , ["(", ws(), C, ws(), ")"] ])) + , ?LETSHRINK([C1, C2], [condition(N div 2, Vars), condition(N div 2, Vars)], + ?lazy_oneof([ [C1, "AND", C2] , [C1, "OR", C2] ])) + ]). + +%% A generator for syntactic and semantic correct expressions +filterlang(Vars) -> + ?SIZED(Size, filterlang(Size, Vars)). + +filterlang(N, Vars) -> + condition(N, Vars). + +%% The property. +%% The Context variables are used to replace ":x" substitution vars in the provided +%% tokens to parse. +prop_lang() -> + eqc:dont_print_counterexample( + ?FORALL(Context, context(), + ?FORALL(String, filterlang([V || {V, _} <- Context]), + ?WHENFAIL(eqc:format("Failing for\n~ts\nwith context ~p\n", [String, Context]), + try Map = maps:from_list(Context), + {ok, Expr} = leveled_filter:generate_filter_expression(unicode:characters_to_list(String), Map), + is_boolean(leveled_filter:apply_filter(Expr, Map)) + catch Error:Reason:St -> + eqc:format("~n~p Failed with ~p ~p~n~p~n", [String, Error, Reason, St]), + equals(Error, true) + end)))). + +-endif. diff --git a/test/property/leveled_simpleeqc.erl b/test/property/leveled_simpleeqc.erl index 9bb6eade..c02c2cff 100644 --- a/test/property/leveled_simpleeqc.erl +++ b/test/property/leveled_simpleeqc.erl @@ -42,7 +42,6 @@ }). -define(NUMTESTS, 10000). --define(TIME_BUDGET, 300). -define(QC_OUT(P), eqc:on_output(fun(Str, Args) -> io:format(user, Str, Args) end, P)). @@ -51,11 +50,11 @@ eqc_test_() -> {timeout, - ?TIME_BUDGET + 10, + ?EQC_TIME_BUDGET + 10, ?_assertEqual( true, eqc:quickcheck( - eqc:testing_time(?TIME_BUDGET, ?QC_OUT(prop_db()))))}. + eqc:testing_time(?EQC_TIME_BUDGET, ?QC_OUT(prop_db()))))}. run() -> run(?NUMTESTS). diff --git a/test/property/leveled_statemeqc.erl b/test/property/leveled_statemeqc.erl index e0cb13e5..1f37e59f 100644 --- a/test/property/leveled_statemeqc.erl +++ b/test/property/leveled_statemeqc.erl @@ -342,7 +342,7 @@ mput_pre(S) -> %% %% Really weird to have to specify a value in case of a remove action mput_args(#{leveled := Pid, previous_keys := PK}) -> - ?LET(Objs, list({gen_key_in_bucket(PK), nat()}), + ?LET(Objs, list({gen_key_in_bucket(PK), null}), [Pid, [ {weighted_default({5, add}, {1, remove}), Bucket, Key, SubKey, gen_val()} || {{Key, Bucket}, SubKey} <- Objs ]]). @@ -680,7 +680,7 @@ indexfold(Pid, Constraint, FoldAccT, Range, {_, undefined} = TermHandling, _Coun {async, Folder} = leveled_bookie:book_indexfold(Pid, Constraint, FoldAccT, Range, TermHandling), Folder; indexfold(Pid, Constraint, FoldAccT, Range, {ReturnTerms, RegExp}, _Counter) -> - {ok, RE} = re:compile(RegExp), + {ok, RE} = leveled_util:regex_compile(RegExp), {async, Folder} = leveled_bookie:book_indexfold(Pid, Constraint, FoldAccT, Range, {ReturnTerms, RE}), Folder. diff --git a/test/property/setoplang_eqc.erl b/test/property/setoplang_eqc.erl new file mode 100644 index 00000000..18e5b547 --- /dev/null +++ b/test/property/setoplang_eqc.erl @@ -0,0 +1,105 @@ +%%% File : setoplang_eqc.erl +%%% Created : 14 May 2024 by Thomas Arts +%%% +%%% Lexer does not accept binary strings it seems (in OTP26) +%%% 3> leveled_setoplexer:string("$7"). +%%% {ok,[{set_id,1,7}],1} +%%% 4> leveled_setoplexer:string(<<"$7">>). +%% ** exception error: no function clause matching lists:sublist(<<"$7">>,1) (lists.erl, line 394) +%% +-module(setoplang_eqc). + +-ifdef(EQC). + +-compile([export_all, nowarn_export_all]). + +-include_lib("eqc/include/eqc.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include("../include/leveled.hrl"). + +-define( + QC_OUT(P), + eqc:on_output(fun(Str, Args) -> io:format(user, Str, Args) end, P)). + +eqc_prop1_test_() -> + {timeout, + ?EQC_TIME_BUDGET + 10, + ?_assertEqual( + true, + eqc:quickcheck( + eqc:testing_time(?EQC_TIME_BUDGET, ?QC_OUT(prop_gen_fun()))))}. + +eqc_prop2_test_() -> + {timeout, + ?EQC_TIME_BUDGET + 10, + ?_assertEqual( + true, + eqc:quickcheck( + eqc:testing_time(?EQC_TIME_BUDGET, ?QC_OUT(prop_check_eval()))))}. + +set_id() -> + ?LET(N, choose(1,20), integer_to_list(N)). + +value() -> + ?LET(Set, list(int()), sets:from_list(Set)). + +%% This context is always enumartion. +%% Consider implementing a context in which keys are not consecutive +context() -> + ?LET(Sets, list(value()), lists:enumerate(Sets)). +%context() -> +% ?LET(Map, map(set_id(), value()), +% lists:sort(maps:to_list(Map))). + +ws() -> + ?SHRINK(list(elements(" \t\f\v\r\n\s")), " "). + +setoplang(Context) -> + ?SIZED(Size, setoplang(Size, Context)). + +setoplang(0, Vars) -> + ["$", oneof(Vars), ws()]; +setoplang(Size, Vars) -> + ?LAZY( + oneof([setoplang(0, Vars), + ?LETSHRINK([Cond], [setoplang(Size - 1, Vars)], + ["(", ws(), Cond, ws(), " )"]), + ?LETSHRINK([Cond1, Cond2], + [setoplang(Size div 2, Vars), + setoplang(Size div 2, Vars)], + [Cond1, ws(), oneof(["SUBTRACT", "UNION", "INTERSECT"]), ws(), Cond2])])). + + + +%% -- Property --------------------------------------------------------------- + +%% The property. +prop_gen_fun() -> + ?FORALL(Context, non_empty(context()), + ?FORALL(String, setoplang([integer_to_list(V) || {V, _} <- Context]), + try F = leveled_setop:generate_setop_function(String), + sets:is_set(F(maps:from_list(Context))) + catch Error:Reason -> + eqc:format("~n~ts Failed with ~p ~p~n", [String, Error, Reason]), + equals(Error, true) + end)). + +prop_check_eval() -> + ?FORALL(Context, non_empty(context()), + begin + Vars = [ "$"++integer_to_list(Id) || {Id,_} <- Context], + String = "(" ++ lists:flatten(lists:join(" UNION ", Vars) ++ ") SUBTRACT " ++ hd(Vars)), + ?WHENFAIL(eqc:format("setop ~ts~n", [String]), + begin + F = leveled_setop:generate_setop_function(String), + equal_sets(F(maps:from_list(Context)), + sets:subtract(sets:union([Set || {_, Set} <- Context]), + element(2, hd(Context)))) + end) + end). + +equal_sets(S1, S2) -> + ?WHENFAIL(eqc:format("~p /= ~p", [sets:to_list(S1), sets:to_list(S2)]), + sets:is_subset(S1, S2) andalso sets:is_subset(S2, S1)). + +-endif. From 0fc7c67b083db45663d4dff5e83ab5af1bf1f191 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 14 Aug 2025 16:59:55 +0100 Subject: [PATCH 3/4] Standardise formatting using erlfmt (#479) * Standardise formatting using erlfmt Adding check of format to github actions * Exclude generated files from formatting * Use correct filelib function Previously checked a file and also not a directory - where is_regular/1 is specifically checking the same thing. Avoids a coverage miss, now the line has been reformated (as in our tests it is always a file), the only variance is when it is a directory. * Revise formatting Exclude where formatting has created less readable code (e.g. leveled_sstblock). Also where erlfmt splits across lines and isolates the fun keyworrd in a fun((_) -> _) definition - this can cause problems with some editors/linters. * Resolve highlighting issue by removing parenthesis --- .github/workflows/erlang.yml | 2 + include/leveled.hrl | 207 +- rebar.config | 90 +- src/leveled.app.src | 32 +- src/leveled_bookie.erl | 2745 ++++++++++--------- src/leveled_cdb.erl | 1670 +++++++----- src/leveled_codec.erl | 786 +++--- src/leveled_ebloom.erl | 114 +- src/leveled_eval.erl | 364 +-- src/leveled_filter.erl | 293 ++- src/leveled_head.erl | 447 ++-- src/leveled_iclerk.erl | 940 ++++--- src/leveled_imanifest.erl | 130 +- src/leveled_inker.erl | 1193 +++++---- src/leveled_log.erl | 704 ++--- src/leveled_monitor.erl | 233 +- src/leveled_pclerk.erl | 425 +-- src/leveled_penciller.erl | 1722 ++++++------ src/leveled_pmanifest.erl | 974 +++---- src/leveled_pmem.erl | 253 +- src/leveled_runner.erl | 607 +++-- src/leveled_setop.erl | 70 +- src/leveled_sst.erl | 3616 +++++++++++++++----------- src/leveled_sstblock.erl | 95 +- src/leveled_tictac.erl | 446 ++-- src/leveled_tree.erl | 435 ++-- src/leveled_util.erl | 58 +- test/end_to_end/appdefined_SUITE.erl | 211 +- test/end_to_end/basic_SUITE.erl | 1058 ++++---- test/end_to_end/iterator_SUITE.erl | 793 +++--- test/end_to_end/perf_SUITE.erl | 517 ++-- test/end_to_end/recovery_SUITE.erl | 1064 ++++---- test/end_to_end/riak_SUITE.erl | 1230 +++++---- test/end_to_end/testutil.erl | 883 ++++--- test/end_to_end/tictac_SUITE.erl | 652 +++-- 35 files changed, 14316 insertions(+), 10743 deletions(-) diff --git a/.github/workflows/erlang.yml b/.github/workflows/erlang.yml index 6336ef87..e42381d3 100644 --- a/.github/workflows/erlang.yml +++ b/.github/workflows/erlang.yml @@ -29,5 +29,7 @@ jobs: otp-version: ${{ matrix.otp }} - name: Compile run: ./rebar3 compile + - name: Check format + run: ./rebar3 fmt --check - name: Run tests run: ./rebar3 do xref, dialyzer, eunit diff --git a/include/leveled.hrl b/include/leveled.hrl index 43512e08..ac84e402 100644 --- a/include/leveled.hrl +++ b/include/leveled.hrl @@ -12,8 +12,8 @@ -define(MAX_CACHE_MULTTIPLE, 2). -define(MIN_CACHE_SIZE, 100). -define(MIN_PCL_CACHE_SIZE, 400). --define(MAX_PCL_CACHE_SIZE, 28000). - % This is less than actual max - but COIN_SIDECOUNT +-define(MAX_PCL_CACHE_SIZE, 28000). +% This is less than actual max - but COIN_SIDECOUNT -define(ABSOLUTEMAX_JOURNALSIZE, 4000000000). -define(COMPRESSION_METHOD, lz4). -define(COMPRESSION_POINT, on_receipt). @@ -21,8 +21,10 @@ -define(LOG_LEVEL, info). -define(DEFAULT_DBID, 65536). -define(OPEN_LASTMOD_RANGE, {0, infinity}). --define(SNAPTIMEOUT_SHORT, 900). % 15 minutes --define(SNAPTIMEOUT_LONG, 43200). % 12 hours +% 15 minutes +-define(SNAPTIMEOUT_SHORT, 900). +% 12 hours +-define(SNAPTIMEOUT_LONG, 43200). -define(SST_PAGECACHELEVEL_NOLOOKUP, 1). -define(SST_PAGECACHELEVEL_LOOKUP, 4). -define(DEFAULT_STATS_PERC, 10). @@ -40,11 +42,11 @@ -define(CACHE_SIZE_JITTER, 25). -define(JOURNAL_SIZE_JITTER, 20). -define(LONG_RUNNING, 1000000). - % An individual task taking > 1s gets a specific log +% An individual task taking > 1s gets a specific log -define(MAX_KEYCHECK_FREQUENCY, 100). -define(MIN_KEYCHECK_FREQUENCY, 1). -define(MAX_LEVELS, 8). - %% Should equal the length of the LEVEL_SCALEFACTOR +%% Should equal the length of the LEVEL_SCALEFACTOR -define(CACHE_TYPE, skpl). %%%============================================================================ @@ -52,24 +54,24 @@ %%% Tags %%%============================================================================ -define(RIAK_TAG, o_rkv). - %% Tag to be used on standard Riak KV objects +%% Tag to be used on standard Riak KV objects -define(STD_TAG, o). - %% Tag to be used on K/V objects for non-Riak purposes +%% Tag to be used on K/V objects for non-Riak purposes -define(IDX_TAG, i). - %% Tag used for secondary index keys +%% Tag used for secondary index keys -define(HEAD_TAG, h). - %% Tag used for head-only objects +%% Tag used for head-only objects -define(INKT_STND, stnd). - %% Inker key type used for 'normal' objects +%% Inker key type used for 'normal' objects -define(INKT_MPUT, mput). - %% Inker key type used for 'batch' objects --define(INKT_KEYD, keyd). - %% Inker key type used for objects which contain no value, only key changes - %% This is used currently for objects formed under a 'retain' strategy - %% on Inker compaction +%% Inker key type used for 'batch' objects +-define(INKT_KEYD, keyd). +%% Inker key type used for objects which contain no value, only key changes +%% This is used currently for objects formed under a 'retain' strategy +%% on Inker compaction -define(INKT_TOMB, tomb). - %% Inker key type used for tombstones +%% Inker key type used for tombstones %%%============================================================================ %%%============================================================================ @@ -91,92 +93,95 @@ %%%============================================================================ %%% Shared records %%%============================================================================ --record(level, - {level :: integer(), - is_basement = false :: boolean(), - timestamp :: integer()}). +-record(level, { + level :: integer(), + is_basement = false :: boolean(), + timestamp :: integer() +}). --record(cdb_options, - {max_size :: pos_integer() | undefined, - max_count :: pos_integer() | undefined, - file_path :: string() | undefined, - waste_path :: string() | undefined, - binary_mode = false :: boolean(), - % Default set by bookie to be `true` - % `false` set here due to legacy of unit tests - % using non-binary keys - sync_strategy = ?DEFAULT_SYNC_STRATEGY, - log_options = leveled_log:get_opts() - :: leveled_log:log_options(), - monitor = {no_monitor, 0} - :: leveled_monitor:monitor()}). +-record(cdb_options, { + max_size :: pos_integer() | undefined, + max_count :: pos_integer() | undefined, + file_path :: string() | undefined, + waste_path :: string() | undefined, + binary_mode = false :: boolean(), + % Default set by bookie to be `true` + % `false` set here due to legacy of unit tests + % using non-binary keys + sync_strategy = ?DEFAULT_SYNC_STRATEGY, + log_options = leveled_log:get_opts() :: + leveled_log:log_options(), + monitor = {no_monitor, 0} :: + leveled_monitor:monitor() +}). --record(sst_options, - { - press_method = ?COMPRESSION_METHOD - :: leveled_sst:press_method(), - block_version = ?DEFAULT_BLOCK_VERSION - :: leveled_sst:block_version(), - press_level = ?COMPRESSION_LEVEL :: non_neg_integer(), - log_options = leveled_log:get_opts() - :: leveled_log:log_options(), - max_sstslots = ?MAX_SSTSLOTS :: pos_integer()|infinity, - max_mergebelow = ?MAX_MERGEBELOW :: pos_integer()|infinity, - pagecache_level = ?SST_PAGECACHELEVEL_NOLOOKUP - :: pos_integer(), - monitor = {no_monitor, 0} - :: leveled_monitor:monitor() - } - ). +-record(sst_options, { + press_method = ?COMPRESSION_METHOD :: + leveled_sst:press_method(), + block_version = ?DEFAULT_BLOCK_VERSION :: + leveled_sst:block_version(), + press_level = ?COMPRESSION_LEVEL :: non_neg_integer(), + log_options = leveled_log:get_opts() :: + leveled_log:log_options(), + max_sstslots = ?MAX_SSTSLOTS :: pos_integer() | infinity, + max_mergebelow = ?MAX_MERGEBELOW :: pos_integer() | infinity, + pagecache_level = ?SST_PAGECACHELEVEL_NOLOOKUP :: + pos_integer(), + monitor = {no_monitor, 0} :: + leveled_monitor:monitor() +}). --record(inker_options, - {cdb_max_size :: integer() | undefined, - root_path :: string() | undefined, - cdb_options = #cdb_options{} :: #cdb_options{}, - start_snapshot = false :: boolean(), - bookies_pid :: pid() | undefined, - source_inker :: pid() | undefined, - reload_strategy = [] :: list(), - waste_retention_period :: integer() | undefined, - compression_method = ?COMPRESSION_METHOD - :: lz4|native|none, - compress_on_receipt = false :: boolean(), - max_run_length, - singlefile_compactionperc :: float()|undefined, - maxrunlength_compactionperc :: float()|undefined, - score_onein = 1 :: pos_integer(), - snaptimeout_long = 60 :: pos_integer(), - monitor = {no_monitor, 0} - :: leveled_monitor:monitor()}). +-record(inker_options, { + cdb_max_size :: integer() | undefined, + root_path :: string() | undefined, + cdb_options = #cdb_options{} :: #cdb_options{}, + start_snapshot = false :: boolean(), + bookies_pid :: pid() | undefined, + source_inker :: pid() | undefined, + reload_strategy = [] :: list(), + waste_retention_period :: integer() | undefined, + compression_method = ?COMPRESSION_METHOD :: + lz4 | native | none, + compress_on_receipt = false :: boolean(), + max_run_length, + singlefile_compactionperc :: float() | undefined, + maxrunlength_compactionperc :: float() | undefined, + score_onein = 1 :: pos_integer(), + snaptimeout_long = 60 :: pos_integer(), + monitor = {no_monitor, 0} :: + leveled_monitor:monitor() +}). --record(penciller_options, - {root_path :: string() | undefined, - sst_options = #sst_options{} :: #sst_options{}, - max_inmemory_tablesize = ?MIN_PCL_CACHE_SIZE - :: pos_integer(), - start_snapshot = false :: boolean(), - snapshot_query, - bookies_pid :: pid() | undefined, - bookies_mem :: tuple() | undefined, - source_penciller :: pid() | undefined, - snapshot_longrunning = true :: boolean(), - compression_method = ?COMPRESSION_METHOD - :: lz4|native|none, - levelzero_cointoss = false :: boolean(), - snaptimeout_short :: pos_integer() | undefined, - snaptimeout_long :: pos_integer() | undefined, - monitor = {no_monitor, 0} - :: leveled_monitor:monitor()}). +-record(penciller_options, { + root_path :: string() | undefined, + sst_options = #sst_options{} :: #sst_options{}, + max_inmemory_tablesize = ?MIN_PCL_CACHE_SIZE :: + pos_integer(), + start_snapshot = false :: boolean(), + snapshot_query, + bookies_pid :: pid() | undefined, + bookies_mem :: tuple() | undefined, + source_penciller :: pid() | undefined, + snapshot_longrunning = true :: boolean(), + compression_method = ?COMPRESSION_METHOD :: + lz4 | native | none, + levelzero_cointoss = false :: boolean(), + snaptimeout_short :: pos_integer() | undefined, + snaptimeout_long :: pos_integer() | undefined, + monitor = {no_monitor, 0} :: + leveled_monitor:monitor() +}). --record(iclerk_options, - {inker :: pid() | undefined, - max_run_length :: integer() | undefined, - cdb_options = #cdb_options{} :: #cdb_options{}, - waste_retention_period :: integer() | undefined, - compression_method = ?COMPRESSION_METHOD - :: lz4|native|none, - singlefile_compactionperc :: float()|undefined, - maxrunlength_compactionperc :: float()|undefined, - score_onein = 1 :: pos_integer(), - reload_strategy = [] :: list()}). -%%%============================================================================ \ No newline at end of file +-record(iclerk_options, { + inker :: pid() | undefined, + max_run_length :: integer() | undefined, + cdb_options = #cdb_options{} :: #cdb_options{}, + waste_retention_period :: integer() | undefined, + compression_method = ?COMPRESSION_METHOD :: + lz4 | native | none, + singlefile_compactionperc :: float() | undefined, + maxrunlength_compactionperc :: float() | undefined, + score_onein = 1 :: pos_integer(), + reload_strategy = [] :: list() +}). +%%%============================================================================ diff --git a/rebar.config b/rebar.config index a3794cc3..2f262a44 100644 --- a/rebar.config +++ b/rebar.config @@ -1,41 +1,75 @@ {minimum_otp_vsn, "22.3"}. -{xref_checks, - [undefined_function_calls,undefined_functions, - deprecated_function_calls, deprecated_functions]}. - -{cover_excl_mods, - [leveled_filterlexer, leveled_filterparser, - leveled_evallexer, leveled_evalparser, - leveled_setoplexer, leveled_setopparser, +{xref_checks, [ + undefined_function_calls, + undefined_functions, + deprecated_function_calls, + deprecated_functions +]}. + +{cover_excl_mods, [ + leveled_filterlexer, + leveled_filterparser, + leveled_evallexer, + leveled_evalparser, + leveled_setoplexer, + leveled_setopparser, testutil, - appdefined_SUITE, basic_SUITE, iterator_SUITE, - perf_SUITE, recovery_SUITE, riak_SUITE, tictac_SUITE]}. + appdefined_SUITE, + basic_SUITE, + iterator_SUITE, + perf_SUITE, + recovery_SUITE, + riak_SUITE, + tictac_SUITE +]}. {eunit_opts, [verbose]}. {project_plugins, [ - {eqwalizer_rebar3, {git_subdir, "https://github.com/OpenRiak/eqwalizer.git", {branch, "openriak-3.4"}, "eqwalizer_rebar3"}} + {eqwalizer_rebar3, + {git_subdir, "https://github.com/OpenRiak/eqwalizer.git", + {branch, "openriak-3.4"}, "eqwalizer_rebar3"}}, + {erlfmt, {git, "https://github.com/OpenRiak/erlfmt.git", {branch, "main"}}} ]}. -{profiles, - [{eqc, [{deps, [meck, fqc]}, - {erl_opts, [debug_info, {d, 'EQC'}]}, - {extra_src_dirs, ["test/property", "test/end_to_end"]}, - {shell, [{apps, [lz4]}]}, - {plugins, [rebar_eqc]} - ]}, - {test, [{extra_src_dirs, ["test/end_to_end", "test/property"]} - ]}, - {perf_full, [{erl_opts, [{d, performance, riak_fullperf}]}]}, - {perf_mini, [{erl_opts, [{d, performance, riak_miniperf}]}]}, - {perf_prof, [{erl_opts, [{d, performance, riak_profileperf}]}]} - ]}. +{erlfmt, [ + write, + {print_width, 80}, + {files, [ + "{src,include}/*.{hrl,erl,app.src}", + "test/end_to_end/*.erl", + "rebar.config" + ]}, + {exclude_files, [ + "src/erlfmt_parse.erl", "src/*lexer.erl", "src/*parser.erl" + ]} +]}. + +{profiles, [ + {eqc, [ + {deps, [meck, fqc]}, + {erl_opts, [debug_info, {d, 'EQC'}]}, + {extra_src_dirs, ["test/property", "test/end_to_end"]}, + {shell, [{apps, [lz4]}]}, + {plugins, [rebar_eqc]} + ]}, + {test, [{extra_src_dirs, ["test/end_to_end", "test/property"]}]}, + {perf_full, [{erl_opts, [{d, performance, riak_fullperf}]}]}, + {perf_mini, [{erl_opts, [{d, performance, riak_miniperf}]}]}, + {perf_prof, [{erl_opts, [{d, performance, riak_profileperf}]}]} +]}. {deps, [ - {lz4, ".*", {git, "https://github.com/OpenRiak/erlang-lz4", {branch, "openriak-3.4"}}}, - {zstd, ".*", {git, "https://github.com/OpenRiak/zstd-erlang", {branch, "openriak-3.2"}}}, - {eqwalizer_support, {git_subdir, "https://github.com/OpenRiak/eqwalizer.git", {branch, "openriak-3.4"}, "eqwalizer_support"}} - ]}. + {lz4, ".*", + {git, "https://github.com/OpenRiak/erlang-lz4", + {branch, "openriak-3.4"}}}, + {zstd, ".*", + {git, "https://github.com/OpenRiak/zstd-erlang", + {branch, "openriak-3.2"}}}, + {eqwalizer_support, + {git_subdir, "https://github.com/OpenRiak/eqwalizer.git", + {branch, "openriak-3.4"}, "eqwalizer_support"}} +]}. {ct_opts, [{dir, ["test/end_to_end"]}]}. diff --git a/src/leveled.app.src b/src/leveled.app.src index 86bf4233..519a64f3 100644 --- a/src/leveled.app.src +++ b/src/leveled.app.src @@ -1,16 +1,16 @@ -{application, leveled, - [ - {description, "Key Value store based on LSM-Tree and designed for larger values"}, - {vsn, git}, - {registered, []}, - {applications, [ - kernel, - stdlib, - lz4, - zstd - ]}, - {maintainers, ["Martin Sumner"]}, - {licenses, ["Apache"]}, - {links, [{"Github", "https://github.com/martinsumner/leveled"}]}, - {env, [{root_path, "test"}]} - ]}. +{application, leveled, [ + {description, + "Key Value store based on LSM-Tree and designed for larger values"}, + {vsn, git}, + {registered, []}, + {applications, [ + kernel, + stdlib, + lz4, + zstd + ]}, + {maintainers, ["Martin Sumner"]}, + {licenses, ["Apache"]}, + {links, [{"Github", "https://github.com/martinsumner/leveled"}]}, + {env, [{root_path, "test"}]} +]}. diff --git a/src/leveled_bookie.erl b/src/leveled_bookie.erl index 545110c5..65611021 100644 --- a/src/leveled_bookie.erl +++ b/src/leveled_bookie.erl @@ -33,368 +33,374 @@ %% the Ledger. %% - -module(leveled_bookie). -behaviour(gen_server). -include("leveled.hrl"). --export([init/1, - handle_call/3, - handle_cast/2, - handle_info/2, - terminate/2, - code_change/3, - book_start/1, - book_start/4, - book_plainstart/1, - book_put/5, - book_put/6, - book_put/8, - book_tempput/7, - book_mput/2, - book_mput/3, - book_delete/4, - book_get/3, - book_get/4, - book_head/3, - book_head/4, - book_sqn/3, - book_sqn/4, - book_headonly/4, - book_snapshot/4, - book_compactjournal/2, - book_islastcompactionpending/1, - book_trimjournal/1, - book_hotbackup/1, - book_close/1, - book_destroy/1, - book_isempty/2, - book_logsettings/1, - book_loglevel/2, - book_addlogs/2, - book_removelogs/2, - book_headstatus/1 - ]). +-export([ + init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3, + book_start/1, + book_start/4, + book_plainstart/1, + book_put/5, + book_put/6, + book_put/8, + book_tempput/7, + book_mput/2, + book_mput/3, + book_delete/4, + book_get/3, + book_get/4, + book_head/3, + book_head/4, + book_sqn/3, + book_sqn/4, + book_headonly/4, + book_snapshot/4, + book_compactjournal/2, + book_islastcompactionpending/1, + book_trimjournal/1, + book_hotbackup/1, + book_close/1, + book_destroy/1, + book_isempty/2, + book_logsettings/1, + book_loglevel/2, + book_addlogs/2, + book_removelogs/2, + book_headstatus/1 +]). %% folding API -export([ - book_returnfolder/2, - book_indexfold/5, - book_multiindexfold/5, - book_bucketlist/4, - book_keylist/3, - book_keylist/4, - book_keylist/5, - book_keylist/6, - book_objectfold/4, - book_objectfold/5, - book_objectfold/6, - book_headfold/6, - book_headfold/7, - book_headfold/9 - ]). - --export([empty_ledgercache/0, - snapshot_store/7, - fetch_value/2, - journal_notfound/4]). + book_returnfolder/2, + book_indexfold/5, + book_multiindexfold/5, + book_bucketlist/4, + book_keylist/3, + book_keylist/4, + book_keylist/5, + book_keylist/6, + book_objectfold/4, + book_objectfold/5, + book_objectfold/6, + book_headfold/6, + book_headfold/7, + book_headfold/9 +]). + +-export([ + empty_ledgercache/0, + snapshot_store/7, + fetch_value/2, + journal_notfound/4 +]). -ifdef(TEST). -export([book_returnactors/1]). -endif. --define(DUMMY, dummy). % Dummy key used for mput operations - --define(OPTION_DEFAULTS, - [{root_path, undefined}, - {snapshot_bookie, undefined}, - {cache_size, ?CACHE_SIZE}, - {cache_multiple, ?MAX_CACHE_MULTTIPLE}, - {max_journalsize, 1000000000}, - {max_journalobjectcount, 200000}, - {max_sstslots, 256}, - {max_mergebelow, 24}, - {sync_strategy, ?DEFAULT_SYNC_STRATEGY}, - {head_only, false}, - {waste_retention_period, undefined}, - {max_run_length, undefined}, - {singlefile_compactionpercentage, 30.0}, - {maxrunlength_compactionpercentage, 70.0}, - {journalcompaction_scoreonein, 1}, - {reload_strategy, []}, - {max_pencillercachesize, ?MAX_PCL_CACHE_SIZE}, - {ledger_preloadpagecache_level, ?SST_PAGECACHELEVEL_LOOKUP}, - {compression_method, ?COMPRESSION_METHOD}, - {ledger_compression, as_store}, - {block_version, 1}, - {compression_point, ?COMPRESSION_POINT}, - {compression_level, ?COMPRESSION_LEVEL}, - {log_level, ?LOG_LEVEL}, - {forced_logs, []}, - {database_id, ?DEFAULT_DBID}, - {override_functions, []}, - {snapshot_timeout_short, ?SNAPTIMEOUT_SHORT}, - {snapshot_timeout_long, ?SNAPTIMEOUT_LONG}, - {stats_percentage, ?DEFAULT_STATS_PERC}, - {stats_logfrequency, - element(1, leveled_monitor:get_defaults())}, - {monitor_loglist, - element(2, leveled_monitor:get_defaults())}]). - --record(ledger_cache, {mem :: ets:tab(), - loader = leveled_tree:empty(?CACHE_TYPE) - :: tuple()|empty_cache, - load_queue = [] :: list(), - index = leveled_pmem:new_index(), - min_sqn = infinity :: integer()|infinity, - max_sqn = 0 :: integer()}). - --record(state, {inker :: pid() | null, - penciller :: pid() | undefined, - cache_size :: pos_integer() | undefined, - cache_multiple :: pos_integer() | undefined, - ledger_cache = #ledger_cache{} :: ledger_cache(), - is_snapshot :: boolean() | undefined, - slow_offer = false :: boolean(), - head_only = false :: boolean(), - head_lookup = true :: boolean(), - ink_checking = ?MAX_KEYCHECK_FREQUENCY :: integer(), - bookie_monref :: reference() | undefined, - monitor = {no_monitor, 0} :: leveled_monitor:monitor()}). - +% Dummy key used for mput operations +-define(DUMMY, dummy). + +-define(OPTION_DEFAULTS, [ + {root_path, undefined}, + {snapshot_bookie, undefined}, + {cache_size, ?CACHE_SIZE}, + {cache_multiple, ?MAX_CACHE_MULTTIPLE}, + {max_journalsize, 1000000000}, + {max_journalobjectcount, 200000}, + {max_sstslots, 256}, + {max_mergebelow, 24}, + {sync_strategy, ?DEFAULT_SYNC_STRATEGY}, + {head_only, false}, + {waste_retention_period, undefined}, + {max_run_length, undefined}, + {singlefile_compactionpercentage, 30.0}, + {maxrunlength_compactionpercentage, 70.0}, + {journalcompaction_scoreonein, 1}, + {reload_strategy, []}, + {max_pencillercachesize, ?MAX_PCL_CACHE_SIZE}, + {ledger_preloadpagecache_level, ?SST_PAGECACHELEVEL_LOOKUP}, + {compression_method, ?COMPRESSION_METHOD}, + {ledger_compression, as_store}, + {block_version, 1}, + {compression_point, ?COMPRESSION_POINT}, + {compression_level, ?COMPRESSION_LEVEL}, + {log_level, ?LOG_LEVEL}, + {forced_logs, []}, + {database_id, ?DEFAULT_DBID}, + {override_functions, []}, + {snapshot_timeout_short, ?SNAPTIMEOUT_SHORT}, + {snapshot_timeout_long, ?SNAPTIMEOUT_LONG}, + {stats_percentage, ?DEFAULT_STATS_PERC}, + {stats_logfrequency, element(1, leveled_monitor:get_defaults())}, + {monitor_loglist, element(2, leveled_monitor:get_defaults())} +]). + +-record(ledger_cache, { + mem :: ets:tab(), + loader = leveled_tree:empty(?CACHE_TYPE) :: + tuple() | empty_cache, + load_queue = [] :: list(), + index = leveled_pmem:new_index(), + min_sqn = infinity :: integer() | infinity, + max_sqn = 0 :: integer() +}). + +-record(state, { + inker :: pid() | null, + penciller :: pid() | undefined, + cache_size :: pos_integer() | undefined, + cache_multiple :: pos_integer() | undefined, + ledger_cache = #ledger_cache{} :: ledger_cache(), + is_snapshot :: boolean() | undefined, + slow_offer = false :: boolean(), + head_only = false :: boolean(), + head_lookup = true :: boolean(), + ink_checking = ?MAX_KEYCHECK_FREQUENCY :: integer(), + bookie_monref :: reference() | undefined, + monitor = {no_monitor, 0} :: leveled_monitor:monitor() +}). -type book_state() :: #state{}. --type sync_mode() :: sync|none|riak_sync. +-type sync_mode() :: sync | none | riak_sync. -type ledger_cache() :: #ledger_cache{}. - --type open_options() :: +-type open_options() :: %% For full description of options see ../docs/STARTUP_OPTIONS.md - [{root_path, string()|undefined} | - % Folder to be used as the root path for storing all the database - % information. Should be undefined is snapshot_bookie is a pid() - % TODO: Some sort of split root path to allow for mixed classes of - % storage (e.g. like eleveldb tiered storage - only with - % separation between ledger and non-current journal) - {snapshot_bookie, undefined|pid()} | - % Is the bookie being started required to a be a snapshot of an - % existing bookie, rather than a new bookie. The bookie to be - % snapped should have its pid passed as the startup option in this - % case - {cache_size, pos_integer()} | - % The size of the Bookie's memory, the cache of the recent - % additions to the ledger. Defaults to ?CACHE_SIZE, plus some - % randomised jitter (randomised jitter will still be added to - % configured values) - % The minimum value is 100 - any lower value will be ignored - {cache_multiple, pos_integer()} | - % A multiple of the cache size beyond which the cache should not - % grow even if the penciller is busy. A pasue will be returned for - % every PUT when this multiple of the cache_size is reached - {max_journalsize, pos_integer()} | - % The maximum size of a journal file in bytes. The absolute - % maximum must be 4GB due to 4 byte file pointers being used - {max_journalobjectcount, pos_integer()} | - % The maximum size of the journal by count of the objects. The - % journal must remain within the limit set by both this figures and - % the max_journalsize - {max_sstslots, pos_integer()} | - % The maximum number of slots in a SST file. All testing is done - % at a size of 256 (except for Quickcheck tests}, altering this - % value is not recommended - {max_mergeblow, pos_integer()|infinity} | - % The maximum number of files for a single file to be merged into - % within the ledger. If less than this, the merge will continue - % without a maximum. If this or more overlapping below, only up - % to max_mergebelow div 2 additions should be created (the merge - % should be partial) - {sync_strategy, sync_mode()} | - % Should be sync if it is necessary to flush to disk after every - % write, or none if not (allow the OS to schecdule). This has a - % significant impact on performance which can be mitigated - % partially in hardware (e.g through use of FBWC). - % riak_sync is used for backwards compatability with OTP16 - and - % will manually call sync() after each write (rather than use the - % O_SYNC option on startup) - {head_only, false|with_lookup|no_lookup} | - % When set to true, there are three fundamental changes as to how - % leveled will work: - % - Compaction of the journalwill be managed by simply removing any - % journal file thathas a highest sequence number persisted to the - % ledger; - % - GETs are not supported, only head requests; - % - PUTs should arrive batched object specs using the book_mput/2 - % function. - % head_only mode is disabled with false (default). There are two - % different modes in which head_only can run with_lookup or - % no_lookup and heaD_only mode is enabled by passing one of these - % atoms: - % - with_lookup assumes that individual objects may need to be - % fetched; - % - no_lookup prevents individual objects from being fetched, so - % that the store can only be used for folds (without segment list - % acceleration) - {waste_retention_period, undefined|pos_integer()} | - % If a value is not required in the journal (i.e. it has been - % replaced and is now to be removed for compaction) for how long - % should it be retained. For example should it be kept for a - % period until the operator cna be sure a backup has been - % completed? - % If undefined, will not retian waste, otherwise the period is the - % number of seconds to wait - {max_run_length, undefined|pos_integer()} | - % The maximum number of consecutive files that can be compacted in - % one compaction operation. - % Defaults to leveled_iclerk:?MAX_COMPACTION_RUN (if undefined) - {singlefile_compactionpercentage, float()} | - % What is the percentage of space to be recovered from compacting - % a single file, before that file can be a compaction candidate in - % a compaction run of length 1 - {maxrunlength_compactionpercentage, float()} | - % What is the percentage of space to be recovered from compacting - % a run of max_run_length, before that run can be a compaction - % candidate. For runs between 1 and max_run_length, a - % proportionate score is calculated - {journalcompaction_scoreonein, pos_integer()} | - % When scoring for compaction run a probability (1 in x) of whether - % any file will be scored this run. If not scored a cached score - % will be used, and the cached score is the average of the latest - % score and the rolling average of previous scores - {reload_strategy, list()} | - % The reload_strategy is exposed as an option as currently no firm - % decision has been made about how recovery from failure should - % work. For instance if we were to trust everything as permanent - % in the Ledger once it is persisted, then there would be no need - % to retain a skinny history of key changes in the Journal after - % compaction. If, as an alternative we assume the Ledger is never - % permanent, and retain the skinny hisory - then backups need only - % be made against the Journal. The skinny history of key changes - % is primarily related to the issue of supporting secondary indexes - % in Riak. - % - % These two strategies are referred to as recovr (assume we can - % recover any deltas from a lost ledger and a lost history through - % resilience outside of the store), or retain (retain a history of - % key changes, even when the object value has been compacted). - % - % There is a third strategy, which is recalc, where on reloading - % the Ledger from the Journal, the key changes are recalculated by - % comparing the extracted metadata from the Journal object, with the - % extracted metadata from the current Ledger object it is set to - % replace (should one be present). Implementing the recalc - % strategy requires a override function for - % `leveled_head:diff_indexspecs/3`. - % A function for the ?RIAK_TAG is provided and tested. - % - % reload_strategy options are a list - to map from a tag to the - % strategy (recovr|retain|recalc). Defualt strategies are: - % [{?RIAK_TAG, retain}, {?STD_TAG, retain}] - {max_pencillercachesize, pos_integer()|undefined} | - % How many ledger keys should the penciller retain in memory - % between flushing new level zero files. - % Defaults to ?MAX_PCL_CACHE_SIZE when undefined - % The minimum size 400 - attempt to set this vlaue lower will be - % ignored. As a rule the value should be at least 4 x the Bookie's - % cache size - {ledger_preloadpagecache_level, pos_integer()} | - % To which level of the ledger should the ledger contents be - % pre-loaded into the pagecache (using fadvise on creation and - % startup) - {compression_method, native|lz4|zstd|none} | - % Compression method and point allow Leveled to be switched from - % using bif based compression (zlib) to using nif based compression - % (lz4 or zstd). - % Defaults to ?COMPRESSION_METHOD - {ledger_compression, as_store|native|lz4|zstd|none} | - % Define an alternative to the compression method to be used by the - % ledger only. Default is as_store - use the method defined as - % compression_method for the whole store - {block_version, 0|1} | - % Version of the leveled_sst blocks. Block version 0 does not use - % sub-blocks, whereas block version 1 has multiple types of blocks - % which can be split into sub-blocks - {compression_point, on_compact|on_receipt} | - % The =compression point can be changed between on_receipt (all - % values are compressed as they are received), to on_compact where - % values are originally stored uncompressed (speeding PUT times), - % and are only compressed when they are first subject to compaction - % Defaults to ?COMPRESSION_POINT - {compression_level, 0..7} | - % At what level of the LSM tree in the ledger should compression be - % enabled. - % Defaults to ?COMPRESSION_LEVEL - {log_level, debug|info|warn|error|critical} | - % Set the log level. The default log_level of info is noisy - the - % current implementation was targetted at environments that have - % facilities to index large proportions of logs and allow for - % dynamic querying of those indexes to output relevant stats. - % - % As an alternative a higher log_level can be used to reduce this - % 'noise', however, there is currently no separate stats facility - % to gather relevant information outside of info level logs. So - % moving to higher log levels will at present make the operator - % blind to sample performance statistics of leveled sub-components - % etc - {forced_logs, list(atom())} | - % Forced logs allow for specific info level logs, such as those - % logging stats to be logged even when the default log level has - % been set to a higher log level. Using: - % {forced_logs, - % [b0015, b0016, b0017, b0018, p0032, sst12]} - % Will log all timing points even when log_level is not set to - % support info - {database_id, non_neg_integer()} | - % Integer database ID to be used in logs - {override_functions, list(leveled_head:appdefinable_function_tuple())} | - % Provide a list of override functions that will be used for - % user-defined tags - {snapshot_timeout_short, pos_integer()} | - % Time in seconds before a snapshot that has not been shutdown is - % assumed to have failed, and so requires to be torndown. The - % short timeout is applied to queries where long_running is set to - % false - {snapshot_timeout_long, pos_integer()} | - % Time in seconds before a snapshot that has not been shutdown is - % assumed to have failed, and so requires to be torndown. The - % short timeout is applied to queries where long_running is set to - % true - {stats_percentage, 0..100} | - % Probability that stats will be collected for an individual - % request. - {stats_logfrequency, pos_integer()} | - % Time in seconds before logging the next timing log. This covers - % the logs associated with the timing of GET/PUTs in various parts - % of the system. There are 7 such logs - so setting to 30s will - % mean that each inidividual log will occur every 210s - {monitor_loglist, list(leveled_monitor:log_type())} - ]. + [ + {root_path, string() | undefined} + % Folder to be used as the root path for storing all the database + % information. Should be undefined is snapshot_bookie is a pid() + % TODO: Some sort of split root path to allow for mixed classes of + % storage (e.g. like eleveldb tiered storage - only with + % separation between ledger and non-current journal) + | {snapshot_bookie, undefined | pid()} + % Is the bookie being started required to a be a snapshot of an + % existing bookie, rather than a new bookie. The bookie to be + % snapped should have its pid passed as the startup option in this + % case + | {cache_size, pos_integer()} + % The size of the Bookie's memory, the cache of the recent + % additions to the ledger. Defaults to ?CACHE_SIZE, plus some + % randomised jitter (randomised jitter will still be added to + % configured values) + % The minimum value is 100 - any lower value will be ignored + | {cache_multiple, pos_integer()} + % A multiple of the cache size beyond which the cache should not + % grow even if the penciller is busy. A pasue will be returned for + % every PUT when this multiple of the cache_size is reached + | {max_journalsize, pos_integer()} + % The maximum size of a journal file in bytes. The absolute + % maximum must be 4GB due to 4 byte file pointers being used + | {max_journalobjectcount, pos_integer()} + % The maximum size of the journal by count of the objects. The + % journal must remain within the limit set by both this figures and + % the max_journalsize + | {max_sstslots, pos_integer()} + % The maximum number of slots in a SST file. All testing is done + % at a size of 256 (except for Quickcheck tests}, altering this + % value is not recommended + | {max_mergeblow, pos_integer() | infinity} + % The maximum number of files for a single file to be merged into + % within the ledger. If less than this, the merge will continue + % without a maximum. If this or more overlapping below, only up + % to max_mergebelow div 2 additions should be created (the merge + % should be partial) + | {sync_strategy, sync_mode()} + % Should be sync if it is necessary to flush to disk after every + % write, or none if not (allow the OS to schecdule). This has a + % significant impact on performance which can be mitigated + % partially in hardware (e.g through use of FBWC). + % riak_sync is used for backwards compatability with OTP16 - and + % will manually call sync() after each write (rather than use the + % O_SYNC option on startup) + | {head_only, false | with_lookup | no_lookup} + % When set to true, there are three fundamental changes as to how + % leveled will work: + % - Compaction of the journalwill be managed by simply removing any + % journal file thathas a highest sequence number persisted to the + % ledger; + % - GETs are not supported, only head requests; + % - PUTs should arrive batched object specs using the book_mput/2 + % function. + % head_only mode is disabled with false (default). There are two + % different modes in which head_only can run with_lookup or + % no_lookup and heaD_only mode is enabled by passing one of these + % atoms: + % - with_lookup assumes that individual objects may need to be + % fetched; + % - no_lookup prevents individual objects from being fetched, so + % that the store can only be used for folds (without segment list + % acceleration) + | {waste_retention_period, undefined | pos_integer()} + % If a value is not required in the journal (i.e. it has been + % replaced and is now to be removed for compaction) for how long + % should it be retained. For example should it be kept for a + % period until the operator cna be sure a backup has been + % completed? + % If undefined, will not retian waste, otherwise the period is the + % number of seconds to wait + | {max_run_length, undefined | pos_integer()} + % The maximum number of consecutive files that can be compacted in + % one compaction operation. + % Defaults to leveled_iclerk:?MAX_COMPACTION_RUN (if undefined) + | {singlefile_compactionpercentage, float()} + % What is the percentage of space to be recovered from compacting + % a single file, before that file can be a compaction candidate in + % a compaction run of length 1 + | {maxrunlength_compactionpercentage, float()} + % What is the percentage of space to be recovered from compacting + % a run of max_run_length, before that run can be a compaction + % candidate. For runs between 1 and max_run_length, a + % proportionate score is calculated + | {journalcompaction_scoreonein, pos_integer()} + % When scoring for compaction run a probability (1 in x) of whether + % any file will be scored this run. If not scored a cached score + % will be used, and the cached score is the average of the latest + % score and the rolling average of previous scores + | {reload_strategy, list()} + % The reload_strategy is exposed as an option as currently no firm + % decision has been made about how recovery from failure should + % work. For instance if we were to trust everything as permanent + % in the Ledger once it is persisted, then there would be no need + % to retain a skinny history of key changes in the Journal after + % compaction. If, as an alternative we assume the Ledger is never + % permanent, and retain the skinny hisory - then backups need only + % be made against the Journal. The skinny history of key changes + % is primarily related to the issue of supporting secondary indexes + % in Riak. + % + % These two strategies are referred to as recovr (assume we can + % recover any deltas from a lost ledger and a lost history through + % resilience outside of the store), or retain (retain a history of + % key changes, even when the object value has been compacted). + % + % There is a third strategy, which is recalc, where on reloading + % the Ledger from the Journal, the key changes are recalculated by + % comparing the extracted metadata from the Journal object, with the + % extracted metadata from the current Ledger object it is set to + % replace (should one be present). Implementing the recalc + % strategy requires a override function for + % `leveled_head:diff_indexspecs/3`. + % A function for the ?RIAK_TAG is provided and tested. + % + % reload_strategy options are a list - to map from a tag to the + % strategy (recovr|retain|recalc). Defualt strategies are: + % [{?RIAK_TAG, retain}, {?STD_TAG, retain}] + | {max_pencillercachesize, pos_integer() | undefined} + % How many ledger keys should the penciller retain in memory + % between flushing new level zero files. + % Defaults to ?MAX_PCL_CACHE_SIZE when undefined + % The minimum size 400 - attempt to set this vlaue lower will be + % ignored. As a rule the value should be at least 4 x the Bookie's + % cache size + | {ledger_preloadpagecache_level, pos_integer()} + % To which level of the ledger should the ledger contents be + % pre-loaded into the pagecache (using fadvise on creation and + % startup) + | {compression_method, native | lz4 | zstd | none} + % Compression method and point allow Leveled to be switched from + % using bif based compression (zlib) to using nif based compression + % (lz4 or zstd). + % Defaults to ?COMPRESSION_METHOD + | {ledger_compression, as_store | native | lz4 | zstd | none} + % Define an alternative to the compression method to be used by the + % ledger only. Default is as_store - use the method defined as + % compression_method for the whole store + | {block_version, 0 | 1} + % Version of the leveled_sst blocks. Block version 0 does not use + % sub-blocks, whereas block version 1 has multiple types of blocks + % which can be split into sub-blocks + | {compression_point, on_compact | on_receipt} + % The =compression point can be changed between on_receipt (all + % values are compressed as they are received), to on_compact where + % values are originally stored uncompressed (speeding PUT times), + % and are only compressed when they are first subject to compaction + % Defaults to ?COMPRESSION_POINT + | {compression_level, 0..7} + % At what level of the LSM tree in the ledger should compression be + % enabled. + % Defaults to ?COMPRESSION_LEVEL + | {log_level, debug | info | warn | error | critical} + % Set the log level. The default log_level of info is noisy - the + % current implementation was targetted at environments that have + % facilities to index large proportions of logs and allow for + % dynamic querying of those indexes to output relevant stats. + % + % As an alternative a higher log_level can be used to reduce this + % 'noise', however, there is currently no separate stats facility + % to gather relevant information outside of info level logs. So + % moving to higher log levels will at present make the operator + % blind to sample performance statistics of leveled sub-components + % etc + | {forced_logs, list(atom())} + % Forced logs allow for specific info level logs, such as those + % logging stats to be logged even when the default log level has + % been set to a higher log level. Using: + % {forced_logs, + % [b0015, b0016, b0017, b0018, p0032, sst12]} + % Will log all timing points even when log_level is not set to + % support info + | {database_id, non_neg_integer()} + % Integer database ID to be used in logs + | {override_functions, list(leveled_head:appdefinable_function_tuple())} + % Provide a list of override functions that will be used for + % user-defined tags + | {snapshot_timeout_short, pos_integer()} + % Time in seconds before a snapshot that has not been shutdown is + % assumed to have failed, and so requires to be torndown. The + % short timeout is applied to queries where long_running is set to + % false + | {snapshot_timeout_long, pos_integer()} + % Time in seconds before a snapshot that has not been shutdown is + % assumed to have failed, and so requires to be torndown. The + % short timeout is applied to queries where long_running is set to + % true + | {stats_percentage, 0..100} + % Probability that stats will be collected for an individual + % request. + | {stats_logfrequency, pos_integer()} + % Time in seconds before logging the next timing log. This covers + % the logs associated with the timing of GET/PUTs in various parts + % of the system. There are 7 such logs - so setting to 30s will + % mean that each inidividual log will occur every 210s + | {monitor_loglist, list(leveled_monitor:log_type())} + ]. -type load_item() :: { - leveled_codec:journal_key_tag()|null, - leveled_codec:primary_key()|?DUMMY, + leveled_codec:journal_key_tag() | null, + leveled_codec:primary_key() | ?DUMMY, leveled_codec:sqn(), - dynamic(), + dynamic(), leveled_codec:journal_keychanges(), integer() }. -type initial_loadfun() :: - fun((leveled_codec:journal_key(), + fun( + ( + leveled_codec:journal_key(), dynamic(), non_neg_integer(), {non_neg_integer(), non_neg_integer(), list(load_item())}, - fun((any()) -> {binary(), non_neg_integer()})) -> - {loop|stop, - { - non_neg_integer(), - non_neg_integer(), - list(load_item()) - } - } - ). + fun((any()) -> {binary(), non_neg_integer()}) + ) -> + {loop | stop, { + non_neg_integer(), + non_neg_integer(), + list(load_item()) + }} + ). -export_type([initial_loadfun/0, ledger_cache/0]). @@ -432,10 +438,14 @@ %% directly back into the Ledger. book_start(RootPath, LedgerCacheSize, JournalSize, SyncStrategy) -> - book_start(set_defaults([{root_path, RootPath}, - {cache_size, LedgerCacheSize}, - {max_journalsize, JournalSize}, - {sync_strategy, SyncStrategy}])). + book_start( + set_defaults([ + {root_path, RootPath}, + {cache_size, LedgerCacheSize}, + {max_journalsize, JournalSize}, + {sync_strategy, SyncStrategy} + ]) + ). -spec book_start(list(tuple())) -> {ok, pid()}. @@ -449,7 +459,6 @@ book_start(Opts) -> gen_server:start_link(?MODULE, [set_defaults(Opts)], []), {ok, Bookie}. - -spec book_plainstart(list(tuple())) -> {ok, pid()}. %% @doc @@ -459,10 +468,15 @@ book_plainstart(Opts) -> gen_server:start(?MODULE, [set_defaults(Opts)], []), {ok, Bookie}. - --spec book_tempput(pid(), leveled_codec:key(), leveled_codec:key(), any(), - leveled_codec:index_specs(), - leveled_codec:tag(), integer()) -> ok|pause. +-spec book_tempput( + pid(), + leveled_codec:key(), + leveled_codec:key(), + any(), + leveled_codec:index_specs(), + leveled_codec:tag(), + integer() +) -> ok | pause. %% @doc Put an object with an expiry time %% @@ -477,7 +491,8 @@ book_plainstart(Opts) -> %% history of KeyChanges will be lost on reload. book_tempput( - Pid, Bucket, Key, Object, IndexSpecs, Tag, TTL) when is_integer(TTL) -> + Pid, Bucket, Key, Object, IndexSpecs, Tag, TTL +) when is_integer(TTL) -> book_put(Pid, Bucket, Key, Object, IndexSpecs, Tag, TTL). %% @doc - Standard PUT @@ -531,55 +546,71 @@ book_put(Pid, Bucket, Key, Object, IndexSpecs) -> book_put(Pid, Bucket, Key, Object, IndexSpecs, Tag) -> book_put(Pid, Bucket, Key, Object, IndexSpecs, Tag, infinity). --spec book_put(pid(), leveled_codec:key(), leveled_codec:key(), any(), - leveled_codec:index_specs(), - leveled_codec:tag(), infinity|integer()) -> ok|pause. +-spec book_put( + pid(), + leveled_codec:key(), + leveled_codec:key(), + any(), + leveled_codec:index_specs(), + leveled_codec:tag(), + infinity | integer() +) -> ok | pause. book_put(Pid, Bucket, Key, Object, IndexSpecs, Tag, TTL) when is_atom(Tag) -> book_put(Pid, Bucket, Key, Object, IndexSpecs, Tag, TTL, false). --spec book_put(pid(), leveled_codec:key(), leveled_codec:key(), any(), - leveled_codec:index_specs(), - leveled_codec:tag(), infinity|integer(), - boolean()) -> ok|pause. +-spec book_put( + pid(), + leveled_codec:key(), + leveled_codec:key(), + any(), + leveled_codec:index_specs(), + leveled_codec:tag(), + infinity | integer(), + boolean() +) -> ok | pause. book_put(Pid, Bucket, Key, Object, IndexSpecs, Tag, TTL, DataSync) -> - gen_server:call(Pid, - {put, Bucket, Key, Object, IndexSpecs, Tag, TTL, DataSync}, - infinity). - + gen_server:call( + Pid, + {put, Bucket, Key, Object, IndexSpecs, Tag, TTL, DataSync}, + infinity + ). --spec book_mput(pid(), list(leveled_codec:object_spec())) -> ok|pause. +-spec book_mput(pid(), list(leveled_codec:object_spec())) -> ok | pause. %% @doc %% %% When the store is being run in head_only mode, batches of object specs may -%% be inserted in to the store using book_mput/2. ObjectSpecs should be -%% of the form {ObjectOp, Bucket, Key, SubKey, Value}. The Value will be -%% stored within the HEAD of the object (in the Ledger), so the full object +%% be inserted in to the store using book_mput/2. ObjectSpecs should be +%% of the form {ObjectOp, Bucket, Key, SubKey, Value}. The Value will be +%% stored within the HEAD of the object (in the Ledger), so the full object %% is retrievable using a HEAD request. The ObjectOp is either add or remove. %% %% The list should be de-duplicated before it is passed to the bookie. book_mput(Pid, ObjectSpecs) -> book_mput(Pid, ObjectSpecs, infinity). --spec book_mput(pid(), list(leveled_codec:object_spec()), infinity|integer()) - -> ok|pause. +-spec book_mput(pid(), list(leveled_codec:object_spec()), infinity | integer()) -> + ok | pause. %% @doc %% %% When the store is being run in head_only mode, batches of object specs may -%% be inserted in to the store using book_mput/2. ObjectSpecs should be -%% of the form {action, Bucket, Key, SubKey, Value}. The Value will be -%% stored within the HEAD of the object (in the Ledger), so the full object +%% be inserted in to the store using book_mput/2. ObjectSpecs should be +%% of the form {action, Bucket, Key, SubKey, Value}. The Value will be +%% stored within the HEAD of the object (in the Ledger), so the full object %% is retrievable using a HEAD request. %% %% The list should be de-duplicated before it is passed to the bookie. book_mput(Pid, ObjectSpecs, TTL) -> gen_server:call(Pid, {mput, ObjectSpecs, TTL}, infinity). --spec book_delete(pid(), - leveled_codec:key(), leveled_codec:key(), - leveled_codec:index_specs()) -> ok|pause. +-spec book_delete( + pid(), + leveled_codec:key(), + leveled_codec:key(), + leveled_codec:index_specs() +) -> ok | pause. -%% @doc +%% @doc %% %% A thin wrap around the put of a special tombstone object. There is no %% immediate reclaim of space, simply the addition of a more recent tombstone. @@ -587,21 +618,36 @@ book_mput(Pid, ObjectSpecs, TTL) -> book_delete(Pid, Bucket, Key, IndexSpecs) -> book_put(Pid, Bucket, Key, delete, IndexSpecs, ?STD_TAG). +-spec book_get( + pid(), + leveled_codec:key(), + leveled_codec:key(), + leveled_codec:tag() +) -> + {ok, any()} | not_found. +-spec book_head( + pid(), + leveled_codec:key(), + leveled_codec:key(), + leveled_codec:tag() +) -> + {ok, any()} | not_found. --spec book_get(pid(), - leveled_codec:key(), leveled_codec:key(), leveled_codec:tag()) - -> {ok, any()}|not_found. --spec book_head(pid(), - leveled_codec:key(), leveled_codec:key(), leveled_codec:tag()) - -> {ok, any()}|not_found. - --spec book_sqn(pid(), - leveled_codec:key(), leveled_codec:key(), leveled_codec:tag()) - -> {ok, non_neg_integer()}|not_found. +-spec book_sqn( + pid(), + leveled_codec:key(), + leveled_codec:key(), + leveled_codec:tag() +) -> + {ok, non_neg_integer()} | not_found. --spec book_headonly(pid(), - leveled_codec:key(), leveled_codec:key(), leveled_codec:key()) - -> {ok, any()}|not_found. +-spec book_headonly( + pid(), + leveled_codec:key(), + leveled_codec:key(), + leveled_codec:key() +) -> + {ok, any()} | not_found. %% @doc - GET and HEAD requests %% @@ -611,12 +657,11 @@ book_delete(Pid, Bucket, Key, IndexSpecs) -> %% %% GET requests first follow the path of a HEAD request, and if an object is %% found, then fetch the value from the Journal via the Inker. -%% -%% to perform a head request in head_only mode with_lookup, book_headonly/4 +%% +%% to perform a head request in head_only mode with_lookup, book_headonly/4 %% should be used. Not if head_only mode is false or no_lookup, then this %% request would not be supported - book_get(Pid, Bucket, Key, Tag) -> gen_server:call(Pid, {get, Bucket, Key, Tag}, infinity). @@ -630,10 +675,11 @@ book_head(Pid, Bucket, Key) -> book_head(Pid, Bucket, Key, ?STD_TAG). book_headonly(Pid, Bucket, Key, SubKey) -> - gen_server:call(Pid, - {head, Bucket, {Key, SubKey}, ?HEAD_TAG, false}, - infinity). - + gen_server:call( + Pid, + {head, Bucket, {Key, SubKey}, ?HEAD_TAG, false}, + infinity + ). book_sqn(Pid, Bucket, Key) -> book_sqn(Pid, Bucket, Key, ?STD_TAG). @@ -704,32 +750,35 @@ book_returnfolder(Pid, RunnerType) -> %% be run against the `_bin' type. %% %% Any book_indexfold query will fold over the snapshot under the control -%% of the worker process controlling the function - and that process can +%% of the worker process controlling the function - and that process can %% be interrupted by a throw, which will be forwarded to the worker (whilst %% still closing down the snapshot). This may be used, for example, to %% curtail a fold in the application at max_results -spec book_indexfold( pid(), - Constraint:: {Bucket, StartKey}, + Constraint :: {Bucket, StartKey}, FoldAccT :: {FoldFun, Acc}, Range :: {IndexField, Start, End}, - TermHandling :: {ReturnTerms, TermExpression}) -> - {async, Runner::fun(() -> dynamic())} - when Bucket::term(), - Key :: term(), - StartKey::term(), - FoldFun::fun((Bucket, Key | {IndexVal, Key}, Acc) -> Acc), - Acc::dynamic(), - IndexField::term(), - IndexVal::term(), - Start::IndexVal, - End::IndexVal, - ReturnTerms::boolean()|binary(), - TermExpression :: leveled_codec:term_expression(). - -book_indexfold(Pid, Constraint, FoldAccT, Range, TermHandling) - when is_tuple(Constraint) -> - RunnerType = + TermHandling :: {ReturnTerms, TermExpression} +) -> + {async, Runner :: fun(() -> dynamic())} +when + Bucket :: term(), + Key :: term(), + StartKey :: term(), + FoldFun :: fun((Bucket, Key | {IndexVal, Key}, Acc) -> Acc), + Acc :: dynamic(), + IndexField :: term(), + IndexVal :: term(), + Start :: IndexVal, + End :: IndexVal, + ReturnTerms :: boolean() | binary(), + TermExpression :: leveled_codec:term_expression(). + +book_indexfold(Pid, Constraint, FoldAccT, Range, TermHandling) when + is_tuple(Constraint) +-> + RunnerType = {index_query, Constraint, FoldAccT, Range, TermHandling}, book_returnfolder(Pid, RunnerType); book_indexfold(Pid, Bucket, FoldAccT, Range, TermHandling) -> @@ -741,22 +790,22 @@ book_indexfold(Pid, Bucket, FoldAccT, Range, TermHandling) -> leveled_log:log(b0019, [Bucket]), book_indexfold(Pid, {Bucket, null}, FoldAccT, Range, TermHandling). --type query() - :: {binary(), binary(), binary(), leveled_codec:term_expression()}. --type combo_fun() - :: fun((list(sets:set(leveled_codec:key()))) - -> sets:set(leveled_codec:key())). +-type query() :: + {binary(), binary(), binary(), leveled_codec:term_expression()}. +-type combo_fun() :: + fun((list(sets:set(leveled_codec:key()))) -> sets:set(leveled_codec:key())). -spec book_multiindexfold( - pid(), - leveled_codec:key(), - { - fun((leveled_codec:key(), leveled_codec:key(), term()) -> term()), - term() - }, - list({non_neg_integer(), query()}), - combo_fun()) - -> {async, fun(() -> term())}. + pid(), + leveled_codec:key(), + { + fun((leveled_codec:key(), leveled_codec:key(), term()) -> term()), + term() + }, + list({non_neg_integer(), query()}), + combo_fun() +) -> + {async, fun(() -> term())}. book_multiindexfold(Pid, Bucket, FoldAccT, Queries, ComboFun) -> RunnerType = {multi_index_query, Bucket, FoldAccT, Queries, ComboFun}, @@ -772,23 +821,23 @@ book_multiindexfold(Pid, Bucket, FoldAccT, Queries, ComboFun) -> %% Runner}' where `Runner' is a fun that returns the final value of %% `FoldFun', the final `Acc' accumulator. -spec book_bucketlist(pid(), Tag, FoldAccT, Constraint) -> - {async, Runner} when - Tag :: leveled_codec:tag(), - FoldAccT :: {FoldFun, Acc}, - FoldFun :: fun((Bucket, Acc) -> Acc), - Acc :: dynamic(), - Constraint :: first | all, - Bucket :: term(), - Runner :: fun(() -> Acc). + {async, Runner} +when + Tag :: leveled_codec:tag(), + FoldAccT :: {FoldFun, Acc}, + FoldFun :: fun((Bucket, Acc) -> Acc), + Acc :: dynamic(), + Constraint :: first | all, + Bucket :: term(), + Runner :: fun(() -> Acc). book_bucketlist(Pid, Tag, FoldAccT, Constraint) -> - RunnerType= + RunnerType = case Constraint of - first-> {first_bucket, Tag, FoldAccT}; + first -> {first_bucket, Tag, FoldAccT}; all -> {bucket_list, Tag, FoldAccT} end, book_returnfolder(Pid, RunnerType). - %% @doc fold over the keys (ledger only) for a given `Tag'. Each key %% will result in a call to `FoldFun' from `FoldAccT'. `FoldFun' is a %% 3-arity function, called with `Bucket', `Key' and `Acc'. The @@ -797,18 +846,18 @@ book_bucketlist(Pid, Tag, FoldAccT, Constraint) -> %% fold and return the final value of `Acc' %% %% Any book_keylist query will fold over the snapshot under the control -%% of the worker process controlling the function - and that process can +%% of the worker process controlling the function - and that process can %% be interrupted by a throw, which will be forwarded to the worker (whilst %% still closing down the snapshot). This may be used, for example, to %% curtail a fold in the application at max_results -spec book_keylist(pid(), Tag, FoldAccT) -> {async, Runner} when - Tag :: leveled_codec:tag(), - FoldAccT :: {FoldFun, Acc}, - FoldFun :: fun((Bucket, Key, Acc) -> Acc), - Acc :: dynamic(), - Bucket :: term(), - Key :: term(), - Runner :: fun(() -> Acc). + Tag :: leveled_codec:tag(), + FoldAccT :: {FoldFun, Acc}, + FoldFun :: fun((Bucket, Key, Acc) -> Acc), + Acc :: dynamic(), + Bucket :: term(), + Key :: term(), + Runner :: fun(() -> Acc). book_keylist(Pid, Tag, FoldAccT) -> RunnerType = {keylist, Tag, FoldAccT}, book_returnfolder(Pid, RunnerType). @@ -816,13 +865,13 @@ book_keylist(Pid, Tag, FoldAccT) -> %% @doc as for book_keylist/3 but constrained to only those keys in %% `Bucket' -spec book_keylist(pid(), Tag, Bucket, FoldAccT) -> {async, Runner} when - Tag :: leveled_codec:tag(), - FoldAccT :: {FoldFun, Acc}, - FoldFun :: fun((Bucket, Key, Acc) -> Acc), - Acc :: dynamic(), - Bucket :: term(), - Key :: term(), - Runner :: fun(() -> Acc). + Tag :: leveled_codec:tag(), + FoldAccT :: {FoldFun, Acc}, + FoldFun :: fun((Bucket, Key, Acc) -> Acc), + Acc :: dynamic(), + Bucket :: term(), + Key :: term(), + Runner :: fun(() -> Acc). book_keylist(Pid, Tag, Bucket, FoldAccT) -> RunnerType = {keylist, Tag, Bucket, FoldAccT}, book_returnfolder(Pid, RunnerType). @@ -833,17 +882,18 @@ book_keylist(Pid, Tag, Bucket, FoldAccT) -> %% (inclusive.) Or the atom `all', which will return all keys in the %% `Bucket'. -spec book_keylist(pid(), Tag, Bucket, KeyRange, FoldAccT) -> - {async, Runner} when - Tag :: leveled_codec:tag(), - FoldAccT :: {FoldFun, Acc}, - FoldFun :: fun((Bucket, Key, Acc) -> Acc), - Acc :: dynamic(), - Bucket :: term(), - KeyRange :: {StartKey, EndKey} | all, - StartKey :: Key, - EndKey :: Key, - Key :: term(), - Runner :: fun(() -> Acc). + {async, Runner} +when + Tag :: leveled_codec:tag(), + FoldAccT :: {FoldFun, Acc}, + FoldFun :: fun((Bucket, Key, Acc) -> Acc), + Acc :: dynamic(), + Bucket :: term(), + KeyRange :: {StartKey, EndKey} | all, + StartKey :: Key, + EndKey :: Key, + Key :: term(), + Runner :: fun(() -> Acc). book_keylist(Pid, Tag, Bucket, KeyRange, FoldAccT) -> RunnerType = {keylist, Tag, Bucket, KeyRange, FoldAccT, undefined}, book_returnfolder(Pid, RunnerType). @@ -852,23 +902,23 @@ book_keylist(Pid, Tag, Bucket, KeyRange, FoldAccT) -> %% expression is passed to be applied against any key that is in the range. %% This is always applied to the Key and only the Key, not to any SubKey. -spec book_keylist(pid(), Tag, Bucket, KeyRange, FoldAccT, TermRegex) -> - {async, Runner} when - Tag :: leveled_codec:tag(), - FoldAccT :: {FoldFun, Acc}, - FoldFun :: fun((Bucket, Key, Acc) -> Acc), - Acc :: dynamic(), - Bucket :: term(), - KeyRange :: {StartKey, EndKey} | all, - StartKey :: Key, - EndKey :: Key, - Key :: term(), - TermRegex :: leveled_codec:term_expression(), - Runner :: fun(() -> Acc). + {async, Runner} +when + Tag :: leveled_codec:tag(), + FoldAccT :: {FoldFun, Acc}, + FoldFun :: fun((Bucket, Key, Acc) -> Acc), + Acc :: dynamic(), + Bucket :: term(), + KeyRange :: {StartKey, EndKey} | all, + StartKey :: Key, + EndKey :: Key, + Key :: term(), + TermRegex :: leveled_codec:term_expression(), + Runner :: fun(() -> Acc). book_keylist(Pid, Tag, Bucket, KeyRange, FoldAccT, TermRegex) -> RunnerType = {keylist, Tag, Bucket, KeyRange, FoldAccT, TermRegex}, book_returnfolder(Pid, RunnerType). - %% @doc fold over all the objects/values in the store in key %% order. `Tag' is the tagged type of object. `FoldAccT' is a 2-tuple, %% the first element being a 4-arity fun, that is called once for each @@ -882,15 +932,15 @@ book_keylist(Pid, Tag, Bucket, KeyRange, FoldAccT, TermRegex) -> %% `Runner' is a 0-arity function that returns the final accumulator %% from `FoldFun' -spec book_objectfold(pid(), Tag, FoldAccT, SnapPreFold) -> {async, Runner} when - Tag :: leveled_codec:tag(), - FoldAccT :: {FoldFun, Acc}, - FoldFun :: fun((Bucket, Key, Value, Acc) -> Acc), - Acc :: dynamic(), - Bucket :: term(), - Key :: term(), - Value :: term(), - SnapPreFold :: boolean(), - Runner :: fun(() -> Acc). + Tag :: leveled_codec:tag(), + FoldAccT :: {FoldFun, Acc}, + FoldFun :: fun((Bucket, Key, Value, Acc) -> Acc), + Acc :: dynamic(), + Bucket :: term(), + Key :: term(), + Value :: term(), + SnapPreFold :: boolean(), + Runner :: fun(() -> Acc). book_objectfold(Pid, Tag, FoldAccT, SnapPreFold) -> RunnerType = {foldobjects_allkeys, Tag, FoldAccT, SnapPreFold}, book_returnfolder(Pid, RunnerType). @@ -903,17 +953,19 @@ book_objectfold(Pid, Tag, FoldAccT, SnapPreFold) -> %% objects, this is quicker than `key_order' due to accessing the %% journal objects in thei ron disk order, not via a fold over the %% ledger. --spec book_objectfold(pid(), Tag, FoldAccT, SnapPreFold, Order) -> {async, Runner} when - Tag :: leveled_codec:tag(), - FoldAccT :: {FoldFun, Acc}, - FoldFun :: fun((Bucket, Key, Value, Acc) -> Acc), - Acc :: dynamic(), - Bucket :: term(), - Key :: term(), - Value :: term(), - SnapPreFold :: boolean(), - Runner :: fun(() -> Acc), - Order :: key_order | sqn_order. +-spec book_objectfold(pid(), Tag, FoldAccT, SnapPreFold, Order) -> + {async, Runner} +when + Tag :: leveled_codec:tag(), + FoldAccT :: {FoldFun, Acc}, + FoldFun :: fun((Bucket, Key, Value, Acc) -> Acc), + Acc :: dynamic(), + Bucket :: term(), + Key :: term(), + Value :: term(), + SnapPreFold :: boolean(), + Runner :: fun(() -> Acc), + Order :: key_order | sqn_order. book_objectfold(Pid, Tag, FoldAccT, SnapPreFold, Order) -> RunnerType = {foldobjects_allkeys, Tag, FoldAccT, SnapPreFold, Order}, book_returnfolder(Pid, RunnerType). @@ -927,39 +979,43 @@ book_objectfold(Pid, Tag, FoldAccT, SnapPreFold, Order) -> %% Query is a 3-tuple of `{IndexField, StartTerm, EndTerm}`, just as %% in book_indexfold/5 -spec book_objectfold(pid(), Tag, Bucket, Limiter, FoldAccT, SnapPreFold) -> - {async, Runner} when - Tag :: leveled_codec:tag(), - FoldAccT :: {FoldFun, Acc}, - FoldFun :: fun((Bucket, Key, Value, Acc) -> Acc), - Acc :: dynamic(), - Bucket :: term(), - Key :: term(), - Value :: term(), - Limiter :: Range | Index, - Range :: {StartKey, EndKey} | all, - Index :: {IndexField, Start, End}, - IndexField::term(), - IndexVal::term(), - Start::IndexVal, - End::IndexVal, - StartKey :: Key, - EndKey :: Key, - SnapPreFold :: boolean(), - Runner :: fun(() -> Acc). + {async, Runner} +when + Tag :: leveled_codec:tag(), + FoldAccT :: {FoldFun, Acc}, + FoldFun :: fun((Bucket, Key, Value, Acc) -> Acc), + Acc :: dynamic(), + Bucket :: term(), + Key :: term(), + Value :: term(), + Limiter :: Range | Index, + Range :: {StartKey, EndKey} | all, + Index :: {IndexField, Start, End}, + IndexField :: term(), + IndexVal :: term(), + Start :: IndexVal, + End :: IndexVal, + StartKey :: Key, + EndKey :: Key, + SnapPreFold :: boolean(), + Runner :: fun(() -> Acc). book_objectfold(Pid, Tag, Bucket, Limiter, FoldAccT, SnapPreFold) -> RunnerType = case Limiter of all -> {foldobjects_bybucket, Tag, Bucket, all, FoldAccT, SnapPreFold}; Range when is_tuple(Range) andalso size(Range) == 2 -> - {foldobjects_bybucket, Tag, Bucket, Range, FoldAccT, SnapPreFold}; - IndexQuery when is_tuple(IndexQuery) andalso size(IndexQuery) == 3 -> + {foldobjects_bybucket, Tag, Bucket, Range, FoldAccT, + SnapPreFold}; + IndexQuery when + is_tuple(IndexQuery) andalso size(IndexQuery) == 3 + -> IndexQuery = Limiter, - {foldobjects_byindex, Tag, Bucket, IndexQuery, FoldAccT, SnapPreFold} + {foldobjects_byindex, Tag, Bucket, IndexQuery, FoldAccT, + SnapPreFold} end, book_returnfolder(Pid, RunnerType). - %% @doc LevelEd stores not just Keys in the ledger, but also may store %% object metadata, referred to as heads (after Riak head request for %% object metadata) Often when folding over objects all that is really @@ -974,7 +1030,7 @@ book_objectfold(Pid, Tag, Bucket, Limiter, FoldAccT, SnapPreFold) -> %% head/metadata, and no object data from the journal. The `Acc' in %% the first call is that provided as the second element of `FoldAccT' %% and thereafter the return of the previous all to the fold fun. -%% +%% %% If `JournalCheck' is `true' then the journal is checked to see if the %% object in the ledger is present, which means a snapshot of the whole store %% is required, if `false', then no such check is performed, and only ledger @@ -984,28 +1040,39 @@ book_objectfold(Pid, Tag, Bucket, Limiter, FoldAccT, SnapPreFold) -> %% option can be used. This will snapshot the Journal, but not check for %% presence. Note that the fetch must still be made within the timefroma of %% the fold (as the snapshot will expire with the fold). -%% +%% %% `SnapPreFold' is a boolean that determines if the snapshot is taken when %% the folder is requested `true', or when when run `false'. `SegmentList' can %% be `false' meaning, all heads, or a list of integers that designate segments %% in a TicTac Tree. --spec book_headfold(pid(), Tag, FoldAccT, JournalCheck, SnapPreFold, SegmentList) -> - {async, Runner} when - Tag :: leveled_codec:tag(), - FoldAccT :: {FoldFun, Acc}, - FoldFun :: fun((Bucket, Key, Value, Acc) -> Acc), - Acc :: dynamic(), - Bucket :: term(), - Key :: term(), - Value :: term(), - JournalCheck :: boolean()|defer, - SnapPreFold :: boolean(), - SegmentList :: false | list(integer()), - Runner :: fun(() -> Acc). +-spec book_headfold( + pid(), Tag, FoldAccT, JournalCheck, SnapPreFold, SegmentList +) -> + {async, Runner} +when + Tag :: leveled_codec:tag(), + FoldAccT :: {FoldFun, Acc}, + FoldFun :: fun((Bucket, Key, Value, Acc) -> Acc), + Acc :: dynamic(), + Bucket :: term(), + Key :: term(), + Value :: term(), + JournalCheck :: boolean() | defer, + SnapPreFold :: boolean(), + SegmentList :: false | list(integer()), + Runner :: fun(() -> Acc). book_headfold(Pid, Tag, FoldAccT, JournalCheck, SnapPreFold, SegmentList) -> - book_headfold(Pid, Tag, all, - FoldAccT, JournalCheck, SnapPreFold, - SegmentList, false, false). + book_headfold( + Pid, + Tag, + all, + FoldAccT, + JournalCheck, + SnapPreFold, + SegmentList, + false, + false + ). %% @doc as book_headfold/6, but with the addition of a `Limiter' that %% restricts the set of objects folded over. `Limiter' can either be a @@ -1018,29 +1085,42 @@ book_headfold(Pid, Tag, FoldAccT, JournalCheck, SnapPreFold, SegmentList) -> %% bucket, and `Range' is a 2-tuple of start key and end key, %% inclusive, or the atom `all'. The rest of the arguments are as %% `book_headfold/6' --spec book_headfold(pid(), Tag, Limiter, FoldAccT, JournalCheck, SnapPreFold, SegmentList) -> - {async, Runner} when - Tag :: leveled_codec:tag(), - Limiter :: BucketList | BucketKeyRange, - BucketList :: {bucket_list, list(Bucket)}, - BucketKeyRange :: {range, Bucket, KeyRange}, - KeyRange :: {StartKey, EndKey} | all, - StartKey :: Key, - EndKey :: Key, - FoldAccT :: {FoldFun, Acc}, - FoldFun :: fun((Bucket, Key, Value, Acc) -> Acc), - Acc :: dynamic(), - Bucket :: term(), - Key :: term(), - Value :: term(), - JournalCheck :: boolean()|defer, - SnapPreFold :: boolean(), - SegmentList :: false | list(integer()), - Runner :: fun(() -> Acc). -book_headfold(Pid, Tag, Limiter, FoldAccT, JournalCheck, SnapPreFold, SegmentList) -> - book_headfold(Pid, Tag, Limiter, - FoldAccT, JournalCheck, SnapPreFold, - SegmentList, false, false). +-spec book_headfold( + pid(), Tag, Limiter, FoldAccT, JournalCheck, SnapPreFold, SegmentList +) -> + {async, Runner} +when + Tag :: leveled_codec:tag(), + Limiter :: BucketList | BucketKeyRange, + BucketList :: {bucket_list, list(Bucket)}, + BucketKeyRange :: {range, Bucket, KeyRange}, + KeyRange :: {StartKey, EndKey} | all, + StartKey :: Key, + EndKey :: Key, + FoldAccT :: {FoldFun, Acc}, + FoldFun :: fun((Bucket, Key, Value, Acc) -> Acc), + Acc :: dynamic(), + Bucket :: term(), + Key :: term(), + Value :: term(), + JournalCheck :: boolean() | defer, + SnapPreFold :: boolean(), + SegmentList :: false | list(integer()), + Runner :: fun(() -> Acc). +book_headfold( + Pid, Tag, Limiter, FoldAccT, JournalCheck, SnapPreFold, SegmentList +) -> + book_headfold( + Pid, + Tag, + Limiter, + FoldAccT, + JournalCheck, + SnapPreFold, + SegmentList, + false, + false + ). %% @doc as book_headfold/7, but with the addition of a Last Modified Date %% Range and Max Object Count. For version 2 objects this will filter out @@ -1050,65 +1130,102 @@ book_headfold(Pid, Tag, Limiter, FoldAccT, JournalCheck, SnapPreFold, SegmentLis %% The Max Object Count will stop the fold once the count has been reached on %% this store only. The Max Object Count if provided will mean that the runner %% will return {RemainingCount, Acc} not just Acc --spec book_headfold(pid(), Tag, Limiter, FoldAccT, JournalCheck, SnapPreFold, - SegmentList, LastModRange, MaxObjectCount) -> - {async, Runner} when - Tag :: leveled_codec:tag(), - Limiter :: BucketList | BucketKeyRange | all, - BucketList :: {bucket_list, list(Bucket)}, - BucketKeyRange :: {range, Bucket, KeyRange}, - KeyRange :: {StartKey, EndKey} | all, - StartKey :: Key, - EndKey :: Key, - FoldAccT :: {FoldFun, Acc}, - FoldFun :: fun((Bucket, Key, Value, Acc) -> Acc), - Acc :: dynamic(), - Bucket :: term(), - Key :: term(), - Value :: term(), - JournalCheck :: boolean()|defer, - SnapPreFold :: boolean(), - SegmentList :: false | list(integer()), - LastModRange :: false | leveled_codec:lastmod_range(), - MaxObjectCount :: false | pos_integer(), - Runner :: fun(() -> ResultingAcc), - ResultingAcc :: Acc | {non_neg_integer(), Acc}. -book_headfold(Pid, Tag, {bucket_list, BucketList}, FoldAccT, JournalCheck, SnapPreFold, - SegmentList, LastModRange, MaxObjectCount) -> - RunnerType = - {foldheads_bybucket, Tag, BucketList, bucket_list, FoldAccT, - JournalCheck, SnapPreFold, - SegmentList, LastModRange, MaxObjectCount}, +-spec book_headfold( + pid(), + Tag, + Limiter, + FoldAccT, + JournalCheck, + SnapPreFold, + SegmentList, + LastModRange, + MaxObjectCount +) -> + {async, Runner} +when + Tag :: leveled_codec:tag(), + Limiter :: BucketList | BucketKeyRange | all, + BucketList :: {bucket_list, list(Bucket)}, + BucketKeyRange :: {range, Bucket, KeyRange}, + KeyRange :: {StartKey, EndKey} | all, + StartKey :: Key, + EndKey :: Key, + FoldAccT :: {FoldFun, Acc}, + FoldFun :: fun((Bucket, Key, Value, Acc) -> Acc), + Acc :: dynamic(), + Bucket :: term(), + Key :: term(), + Value :: term(), + JournalCheck :: boolean() | defer, + SnapPreFold :: boolean(), + SegmentList :: false | list(integer()), + LastModRange :: false | leveled_codec:lastmod_range(), + MaxObjectCount :: false | pos_integer(), + Runner :: fun(() -> ResultingAcc), + ResultingAcc :: Acc | {non_neg_integer(), Acc}. +book_headfold( + Pid, + Tag, + {bucket_list, BucketList}, + FoldAccT, + JournalCheck, + SnapPreFold, + SegmentList, + LastModRange, + MaxObjectCount +) -> + RunnerType = + {foldheads_bybucket, Tag, BucketList, bucket_list, FoldAccT, + JournalCheck, SnapPreFold, SegmentList, LastModRange, + MaxObjectCount}, book_returnfolder(Pid, RunnerType); -book_headfold(Pid, Tag, {range, Bucket, KeyRange}, FoldAccT, JournalCheck, SnapPreFold, - SegmentList, LastModRange, MaxObjectCount) -> - RunnerType = - {foldheads_bybucket, Tag, Bucket, KeyRange, FoldAccT, - JournalCheck, SnapPreFold, - SegmentList, LastModRange, MaxObjectCount}, +book_headfold( + Pid, + Tag, + {range, Bucket, KeyRange}, + FoldAccT, + JournalCheck, + SnapPreFold, + SegmentList, + LastModRange, + MaxObjectCount +) -> + RunnerType = + {foldheads_bybucket, Tag, Bucket, KeyRange, FoldAccT, JournalCheck, + SnapPreFold, SegmentList, LastModRange, MaxObjectCount}, book_returnfolder(Pid, RunnerType); -book_headfold(Pid, Tag, all, FoldAccT, JournalCheck, SnapPreFold, - SegmentList, LastModRange, MaxObjectCount) -> - RunnerType = {foldheads_allkeys, Tag, FoldAccT, - JournalCheck, SnapPreFold, - SegmentList, LastModRange, MaxObjectCount}, +book_headfold( + Pid, + Tag, + all, + FoldAccT, + JournalCheck, + SnapPreFold, + SegmentList, + LastModRange, + MaxObjectCount +) -> + RunnerType = + {foldheads_allkeys, Tag, FoldAccT, JournalCheck, SnapPreFold, + SegmentList, LastModRange, MaxObjectCount}, book_returnfolder(Pid, RunnerType). -spec book_snapshot( - pid(), store|ledger, tuple()|no_lookup|undefined, boolean()) - -> {ok, pid(), pid()|null}. + pid(), store | ledger, tuple() | no_lookup | undefined, boolean() +) -> + {ok, pid(), pid() | null}. %% @doc create a snapshot of the store %% -%% Snapshot can be based on a pre-defined query (which will be used to filter -%% caches prior to copying for the snapshot), and can be defined as long +%% Snapshot can be based on a pre-defined query (which will be used to filter +%% caches prior to copying for the snapshot), and can be defined as long %% running to avoid timeouts (snapshots are generally expected to be required %% for < 60s) book_snapshot(Pid, SnapType, Query, LongRunning) -> gen_server:call(Pid, {snapshot, SnapType, Query, LongRunning}, infinity). --spec book_compactjournal(pid(), integer()) -> ok|busy. +-spec book_compactjournal(pid(), integer()) -> ok | busy. -spec book_islastcompactionpending(pid()) -> boolean(). -spec book_trimjournal(pid()) -> ok. @@ -1128,7 +1245,7 @@ book_islastcompactionpending(Pid) -> %% @doc Trim the journal when in head_only mode %% -%% In head_only mode the journlacna be trimmed of entries which are before the +%% In head_only mode the journlacna be trimmed of entries which are before the %% persisted SQN. This is much quicker than compacting the journal book_trimjournal(Pid) -> @@ -1148,21 +1265,20 @@ book_close(Pid) -> book_destroy(Pid) -> gen_server:call(Pid, destroy, infinity). - -spec book_hotbackup(pid()) -> {async, fun((string()) -> ok)}. %% @doc Backup the Bookie %% Return a function that will take a backup of a snapshot of the Journal. %% The function will be 1-arity, and can be passed the absolute folder name %% to store the backup. -%% +%% %% Backup files are hard-linked. Does not work in head_only mode, or if %% index changes are used with a `recovr` compaction/reload strategy book_hotbackup(Pid) -> - gen_server:call(Pid, hot_backup, infinity). + gen_server:call(Pid, hot_backup, infinity). -spec book_isempty(pid(), leveled_codec:tag()) -> boolean(). -%% @doc -%% Confirm if the store is empty, or if it contains a Key and Value for a +%% @doc +%% Confirm if the store is empty, or if it contains a Key and Value for a %% given tag book_isempty(Pid, Tag) -> FoldAccT = {fun(_B, _Acc) -> false end, true}, @@ -1204,10 +1320,14 @@ book_headstatus(Pid) -> %%% gen_server callbacks %%%============================================================================ --spec init([open_options()]) -> {ok, book_state()}|{stop, atom()}. +-spec init([open_options()]) -> {ok, book_state()} | {stop, atom()}. init([Opts]) -> - case {proplists:get_value(snapshot_bookie, Opts), - proplists:get_value(root_path, Opts)} of + case + { + proplists:get_value(snapshot_bookie, Opts), + proplists:get_value(root_path, Opts) + } + of {undefined, undefined} -> {stop, no_root_path}; {undefined, _RP} -> @@ -1239,22 +1359,22 @@ init([Opts]) -> end, lists:foreach(SetFun, OverrideFunctions), - ConfiguredCacheSize = + ConfiguredCacheSize = max(proplists:get_value(cache_size, Opts), ?MIN_CACHE_SIZE), - CacheJitter = + CacheJitter = max(1, ConfiguredCacheSize div (100 div ?CACHE_SIZE_JITTER)), - CacheSize = + CacheSize = ConfiguredCacheSize + erlang:phash2(self()) rem CacheJitter, MaxCacheMultiple = proplists:get_value(cache_multiple, Opts), PCLMaxSize = PencillerOpts#penciller_options.max_inmemory_tablesize, CacheRatio = PCLMaxSize div ConfiguredCacheSize, - % It is expected that the maximum size of the penciller - % in-memory store should not be more than about 10 x the size - % of the ledger cache. In this case there will be a larger - % than tested list of ledger_caches in the penciller memory, - % and performance may be unpredictable + % It is expected that the maximum size of the penciller + % in-memory store should not be more than about 10 x the size + % of the ledger cache. In this case there will be a larger + % than tested list of ledger_caches in the penciller memory, + % and performance may be unpredictable case CacheRatio > 32 of true -> leveled_log:log(b0020, [PCLMaxSize, ConfiguredCacheSize]); @@ -1262,10 +1382,12 @@ init([Opts]) -> ok end, - PageCacheLevel = proplists:get_value(ledger_preloadpagecache_level, Opts), + PageCacheLevel = proplists:get_value( + ledger_preloadpagecache_level, Opts + ), - {HeadOnly, HeadLookup, SSTPageCacheLevel} = - case proplists:get_value(head_only, Opts) of + {HeadOnly, HeadLookup, SSTPageCacheLevel} = + case proplists:get_value(head_only, Opts) of false -> {false, true, PageCacheLevel}; with_lookup -> @@ -1279,60 +1401,64 @@ init([Opts]) -> SSTOpts = PencillerOpts#penciller_options.sst_options, SSTOpts0 = SSTOpts#sst_options{pagecache_level = SSTPageCacheLevel}, PencillerOpts0 = - PencillerOpts#penciller_options{sst_options = SSTOpts0}, + PencillerOpts#penciller_options{sst_options = SSTOpts0}, - {Inker, Penciller} = startup(InkerOpts, PencillerOpts0), + {Inker, Penciller} = startup(InkerOpts, PencillerOpts0), NewETS = ets:new(mem, [ordered_set]), leveled_log:log(b0001, [Inker, Penciller]), - {ok, - #state{ - cache_size = CacheSize, - cache_multiple = MaxCacheMultiple, - is_snapshot = false, - head_only = HeadOnly, - head_lookup = HeadLookup, - inker = Inker, - penciller = Penciller, - ledger_cache = #ledger_cache{mem = NewETS}, - monitor = {Monitor, StatLogFrequency}}}; + {ok, #state{ + cache_size = CacheSize, + cache_multiple = MaxCacheMultiple, + is_snapshot = false, + head_only = HeadOnly, + head_lookup = HeadLookup, + inker = Inker, + penciller = Penciller, + ledger_cache = #ledger_cache{mem = NewETS}, + monitor = {Monitor, StatLogFrequency} + }}; {Bookie, undefined} -> - {ok, Penciller, Inker} = + {ok, Penciller, Inker} = book_snapshot(Bookie, store, undefined, true), BookieMonitor = erlang:monitor(process, Bookie), NewETS = ets:new(mem, [ordered_set]), {HeadOnly, Lookup} = leveled_bookie:book_headstatus(Bookie), leveled_log:log(b0002, [Inker, Penciller]), - {ok, - #state{penciller = Penciller, - inker = Inker, - ledger_cache = #ledger_cache{mem = NewETS}, - head_only = HeadOnly, - head_lookup = Lookup, - bookie_monref = BookieMonitor, - is_snapshot = true}} + {ok, #state{ + penciller = Penciller, + inker = Inker, + ledger_cache = #ledger_cache{mem = NewETS}, + head_only = HeadOnly, + head_lookup = Lookup, + bookie_monref = BookieMonitor, + is_snapshot = true + }} end. - handle_call( {put, Bucket, Key, Object, IndexSpecs, Tag, TTL, DataSync}, From, - State) - when State#state.head_only == false , Tag =/= ?HEAD_TAG -> + State +) when + State#state.head_only == false, Tag =/= ?HEAD_TAG +-> LedgerKey = leveled_codec:to_objectkey(Bucket, Key, Tag), SWLR = os:timestamp(), SW0 = leveled_monitor:maybe_time(State#state.monitor), {ok, SQN, ObjSize} = leveled_inker:ink_put( State#state.inker, - LedgerKey, - Object, - {IndexSpecs, TTL}, - DataSync), + LedgerKey, + Object, + {IndexSpecs, TTL}, + DataSync + ), {T0, SW1} = leveled_monitor:step_time(SW0), Changes = preparefor_ledgercache( - null, LedgerKey, SQN, Object, ObjSize, {IndexSpecs, TTL}), + null, LedgerKey, SQN, Object, ObjSize, {IndexSpecs, TTL} + ), {T1, SW2} = leveled_monitor:step_time(SW1), Cache0 = addto_ledgercache(Changes, State#state.ledger_cache), {T2, _SW3} = leveled_monitor:step_time(SW2), @@ -1344,26 +1470,31 @@ handle_call( end, maybe_longrunning(SWLR, overall_put), maybelog_put_timing(State#state.monitor, T0, T1, T2, ObjSize), - case maybepush_ledgercache( + case + maybepush_ledgercache( State#state.cache_size, State#state.cache_multiple, Cache0, - State#state.penciller) of + State#state.penciller + ) + of {ok, Cache} -> {noreply, State#state{slow_offer = false, ledger_cache = Cache}}; {returned, Cache} -> {noreply, State#state{slow_offer = true, ledger_cache = Cache}} end; -handle_call({mput, ObjectSpecs, TTL}, From, State) - when State#state.head_only == true -> - {ok, SQN} = +handle_call({mput, ObjectSpecs, TTL}, From, State) when + State#state.head_only == true +-> + {ok, SQN} = leveled_inker:ink_mput(State#state.inker, dummy, {ObjectSpecs, TTL}), - Changes = + Changes = preparefor_ledgercache( ?INKT_MPUT, - ?DUMMY, - SQN, null, - length(ObjectSpecs), + ?DUMMY, + SQN, + null, + length(ObjectSpecs), {ObjectSpecs, TTL} ), Cache0 = addto_ledgercache(Changes, State#state.ledger_cache), @@ -1373,30 +1504,36 @@ handle_call({mput, ObjectSpecs, TTL}, From, State) false -> gen_server:reply(From, ok) end, - case maybepush_ledgercache( + case + maybepush_ledgercache( State#state.cache_size, State#state.cache_multiple, Cache0, - State#state.penciller) of + State#state.penciller + ) + of {ok, Cache} -> {noreply, State#state{ledger_cache = Cache, slow_offer = false}}; {returned, Cache} -> {noreply, State#state{ledger_cache = Cache, slow_offer = true}} end; -handle_call({get, Bucket, Key, Tag}, _From, State) - when State#state.head_only == false -> +handle_call({get, Bucket, Key, Tag}, _From, State) when + State#state.head_only == false +-> LedgerKey = leveled_codec:to_objectkey(Bucket, Key, Tag), SW0 = leveled_monitor:maybe_time(State#state.monitor), {H0, _CacheHit} = - fetch_head(LedgerKey, - State#state.penciller, - State#state.ledger_cache), - HeadResult = + fetch_head( + LedgerKey, + State#state.penciller, + State#state.ledger_cache + ), + HeadResult = case H0 of not_present -> not_found; Head -> - {Seqn, Status, _MH, _MD} = + {Seqn, Status, _MH, _MD} = leveled_codec:striphead_to_v1details(Head), case Status of tomb -> @@ -1411,32 +1548,36 @@ handle_call({get, Bucket, Key, Tag}, _From, State) end end, {TS0, SW1} = leveled_monitor:step_time(SW0), - GetResult = - case HeadResult of - not_found -> + GetResult = + case HeadResult of + not_found -> not_found; {LK, SQN} -> Object = fetch_value(State#state.inker, {LK, SQN}), - case Object of + case Object of not_present -> not_found; _ -> {ok, Object} - end + end end, {TS1, _SW2} = leveled_monitor:step_time(SW1), maybelog_get_timing( - State#state.monitor, TS0, TS1, GetResult == not_found), + State#state.monitor, TS0, TS1, GetResult == not_found + ), {reply, GetResult, State}; -handle_call({head, Bucket, Key, Tag, SQNOnly}, _From, State) - when State#state.head_lookup == true -> +handle_call({head, Bucket, Key, Tag, SQNOnly}, _From, State) when + State#state.head_lookup == true +-> SW0 = leveled_monitor:maybe_time(State#state.monitor), LK = leveled_codec:to_objectkey(Bucket, Key, Tag), {Head, CacheHit} = - fetch_head(LK, - State#state.penciller, - State#state.ledger_cache, - State#state.head_only), + fetch_head( + LK, + State#state.penciller, + State#state.ledger_cache, + State#state.head_only + ), {TS0, SW1} = leveled_monitor:step_time(SW0), JrnalCheckFreq = case State#state.head_only of @@ -1457,8 +1598,11 @@ handle_call({head, Bucket, Key, Tag, SQNOnly}, _From, State) case TS >= leveled_util:integer_now() of true -> I = State#state.inker, - case journal_notfound( - JrnalCheckFreq, I, LK, SeqN) of + case + journal_notfound( + JrnalCheckFreq, I, LK, SeqN + ) + of {true, UppedFrequency} -> {not_found, null, UppedFrequency}; {false, ReducedFrequency} -> @@ -1469,7 +1613,7 @@ handle_call({head, Bucket, Key, Tag, SQNOnly}, _From, State) end end end, - Reply = + Reply = case {LedgerMD, SQNOnly} of {not_found, _} -> not_found; @@ -1480,7 +1624,8 @@ handle_call({head, Bucket, Key, Tag, SQNOnly}, _From, State) end, {TS1, _SW2} = leveled_monitor:step_time(SW1), maybelog_head_timing( - State#state.monitor, TS0, TS1, LedgerMD == not_found, CacheHit), + State#state.monitor, TS0, TS1, LedgerMD == not_found, CacheHit + ), case UpdJrnalCheckFreq of JrnalCheckFreq -> {reply, Reply, State}; @@ -1490,10 +1635,12 @@ handle_call({head, Bucket, Key, Tag, SQNOnly}, _From, State) handle_call( {snapshot, SnapType, Query, LongRunning}, _From, - State = #state{penciller = Pcl}) - when is_pid(Pcl) -> - % Snapshot the store, specifying if the snapshot should be long running - % (i.e. will the snapshot be queued or be required for an extended period + State = #state{penciller = Pcl} +) when + is_pid(Pcl) +-> + % Snapshot the store, specifying if the snapshot should be long running + % (i.e. will the snapshot be queued or be required for an extended period % e.g. many minutes) {ok, PclSnap, InkSnap} = snapshot_store( @@ -1503,70 +1650,85 @@ handle_call( State#state.monitor, SnapType, Query, - LongRunning), - {reply, {ok, PclSnap, InkSnap},State}; + LongRunning + ), + {reply, {ok, PclSnap, InkSnap}, State}; handle_call(log_settings, _From, State) -> {reply, leveled_log:return_settings(), State}; handle_call({return_runner, QueryType}, _From, State) -> Runner = get_runner(State, QueryType), {reply, Runner, State}; -handle_call({compact_journal, Timeout}, From, State) - when State#state.head_only == false -> +handle_call({compact_journal, Timeout}, From, State) when + State#state.head_only == false +-> case leveled_inker:ink_compactionpending(State#state.inker) of true -> {reply, {busy, undefined}, State}; false -> {ok, PclSnap, null} = - snapshot_store( - State#state.ledger_cache, - State#state.penciller, + snapshot_store( + State#state.ledger_cache, + State#state.penciller, + State#state.inker, + State#state.monitor, + ledger, + undefined, + true + ), + R = leveled_inker:ink_compactjournal( State#state.inker, - State#state.monitor, - ledger, - undefined, - true), - R = leveled_inker:ink_compactjournal(State#state.inker, - PclSnap, - Timeout), + PclSnap, + Timeout + ), gen_server:reply(From, R), - case maybepush_ledgercache( + case + maybepush_ledgercache( State#state.cache_size, State#state.cache_multiple, State#state.ledger_cache, - State#state.penciller) of + State#state.penciller + ) + of {_, NewCache} -> {noreply, State#state{ledger_cache = NewCache}} end end; -handle_call(confirm_compact, _From, State) - when State#state.head_only == false -> +handle_call(confirm_compact, _From, State) when + State#state.head_only == false +-> {reply, leveled_inker:ink_compactionpending(State#state.inker), State}; handle_call(trim, _From, State) when State#state.head_only == true -> PSQN = leveled_penciller:pcl_persistedsqn(State#state.penciller), {reply, leveled_inker:ink_trim(State#state.inker, PSQN), State}; handle_call(hot_backup, _From, State) when State#state.head_only == false -> ok = leveled_inker:ink_roll(State#state.inker), - BackupFun = + BackupFun = fun(InkerSnapshot) -> fun(BackupPath) -> ok = leveled_inker:ink_backup(InkerSnapshot, BackupPath), ok = leveled_inker:ink_close(InkerSnapshot) end end, - InkerOpts = - #inker_options{start_snapshot = true, - source_inker = State#state.inker, - bookies_pid = self()}, + InkerOpts = + #inker_options{ + start_snapshot = true, + source_inker = State#state.inker, + bookies_pid = self() + }, {ok, Snapshot} = leveled_inker:ink_snapstart(InkerOpts), {reply, {async, BackupFun(Snapshot)}, State}; handle_call( - close, _From, State = #state{inker = Inker, penciller = Pcl}) - when is_pid(Inker), is_pid(Pcl) -> + close, _From, State = #state{inker = Inker, penciller = Pcl} +) when + is_pid(Inker), is_pid(Pcl) +-> leveled_inker:ink_close(Inker), leveled_penciller:pcl_close(Pcl), leveled_monitor:monitor_close(element(1, State#state.monitor)), {stop, normal, ok, State}; -handle_call(destroy, _From, State=#state{is_snapshot=Snp}) when Snp == false -> +handle_call(destroy, _From, State = #state{is_snapshot = Snp}) when + Snp == false +-> leveled_log:log(b0011, []), {ok, InkPathList} = leveled_inker:ink_doom(State#state.inker), {ok, PCLPathList} = leveled_penciller:pcl_doom(State#state.penciller), @@ -1581,10 +1743,11 @@ handle_call(head_status, _From, State) -> handle_call(Msg, _From, State) -> {reply, {unsupported_message, element(1, Msg)}, State}. - handle_cast( - {log_level, LogLevel}, State = #state{inker = Inker, penciller = Pcl}) - when is_pid(Inker), is_pid(Pcl) -> + {log_level, LogLevel}, State = #state{inker = Inker, penciller = Pcl} +) when + is_pid(Inker), is_pid(Pcl) +-> ok = leveled_penciller:pcl_loglevel(Pcl, LogLevel), ok = leveled_inker:ink_loglevel(Inker, LogLevel), case element(1, State#state.monitor) of @@ -1596,8 +1759,10 @@ handle_cast( ok = leveled_log:set_loglevel(LogLevel), {noreply, State}; handle_cast( - {add_logs, ForcedLogs}, State = #state{inker = Inker, penciller = Pcl}) - when is_pid(Inker), is_pid(Pcl) -> + {add_logs, ForcedLogs}, State = #state{inker = Inker, penciller = Pcl} +) when + is_pid(Inker), is_pid(Pcl) +-> ok = leveled_penciller:pcl_addlogs(Pcl, ForcedLogs), ok = leveled_inker:ink_addlogs(Inker, ForcedLogs), case element(1, State#state.monitor) of @@ -1609,8 +1774,10 @@ handle_cast( ok = leveled_log:add_forcedlogs(ForcedLogs), {noreply, State}; handle_cast( - {remove_logs, ForcedLogs}, State = #state{inker = Inker, penciller = Pcl}) - when is_pid(Inker), is_pid(Pcl) -> + {remove_logs, ForcedLogs}, State = #state{inker = Inker, penciller = Pcl} +) when + is_pid(Inker), is_pid(Pcl) +-> ok = leveled_penciller:pcl_removelogs(Pcl, ForcedLogs), ok = leveled_inker:ink_removelogs(Inker, ForcedLogs), case element(1, State#state.monitor) of @@ -1622,16 +1789,16 @@ handle_cast( ok = leveled_log:remove_forcedlogs(ForcedLogs), {noreply, State}. - %% handle the bookie stopping and stop this snapshot -handle_info({'DOWN', BookieMonRef, process, BookiePid, Info}, - State=#state{bookie_monref = BookieMonRef, is_snapshot = true}) -> +handle_info( + {'DOWN', BookieMonRef, process, BookiePid, Info}, + State = #state{bookie_monref = BookieMonRef, is_snapshot = true} +) -> leveled_log:log(b0004, [BookiePid, Info]), {stop, normal, State}; handle_info(_Info, State) -> {noreply, State}. - terminate(Reason, _State) -> leveled_log:log(b0003, [Reason]). @@ -1643,21 +1810,21 @@ code_change(_OldVsn, State, _Extra) -> %%%============================================================================ -spec empty_ledgercache() -> ledger_cache(). -%% @doc +%% @doc %% Empty the ledger cache table following a push empty_ledgercache() -> #ledger_cache{mem = ets:new(empty, [ordered_set])}. - -spec push_to_penciller( pid(), - list(load_item()), + list(load_item()), ledger_cache(), - leveled_codec:compaction_strategy()) - -> ledger_cache(). + leveled_codec:compaction_strategy() +) -> + ledger_cache(). %% @doc %% The push to penciller must start as a tree to correctly de-duplicate -%% the list by order before becoming a de-duplicated list for loading +%% the list by order before becoming a de-duplicated list for loading push_to_penciller(Penciller, LoadItemList, LedgerCache, ReloadStrategy) -> UpdLedgerCache = lists:foldl( @@ -1666,11 +1833,19 @@ push_to_penciller(Penciller, LoadItemList, LedgerCache, ReloadStrategy) -> case leveled_codec:get_tagstrategy(PK, ReloadStrategy) of recalc -> recalcfor_ledgercache( - InkTag, PK, SQN, Obj, ValSize, IndexSpecs, - AccLC, Penciller); + InkTag, + PK, + SQN, + Obj, + ValSize, + IndexSpecs, + AccLC, + Penciller + ); _ -> preparefor_ledgercache( - InkTag, PK, SQN, Obj, ValSize, IndexSpecs) + InkTag, PK, SQN, Obj, ValSize, IndexSpecs + ) end, addto_ledgercache(Chngs, AccLC, loader) end, @@ -1682,7 +1857,8 @@ push_to_penciller(Penciller, LoadItemList, LedgerCache, ReloadStrategy) -> leveled_log:log(b0006, [UpdLedgerCache#ledger_cache.max_sqn]), ok = push_to_penciller_loop( - Penciller, loadqueue_ledgercache(UpdLedgerCache)), + Penciller, loadqueue_ledgercache(UpdLedgerCache) + ), empty_ledgercache(); _ -> UpdLedgerCache @@ -1698,22 +1874,24 @@ push_to_penciller_loop(Penciller, LedgerCache) -> ok end. --spec push_ledgercache(pid(), ledger_cache()) -> ok|returned. -%% @doc +-spec push_ledgercache(pid(), ledger_cache()) -> ok | returned. +%% @doc %% Push the ledgercache to the Penciller - which should respond ok or %% returned. If the response is ok the cache can be flushed, but if the %% response is returned the cache should continue to build and it should try %% to flush at a later date push_ledgercache(Penciller, Cache) -> - CacheToLoad = {Cache#ledger_cache.loader, - Cache#ledger_cache.index, - Cache#ledger_cache.min_sqn, - Cache#ledger_cache.max_sqn}, + CacheToLoad = { + Cache#ledger_cache.loader, + Cache#ledger_cache.index, + Cache#ledger_cache.min_sqn, + Cache#ledger_cache.max_sqn + }, leveled_penciller:pcl_pushmem(Penciller, CacheToLoad). -spec loadqueue_ledgercache(ledger_cache()) -> ledger_cache(). -%% @doc -%% The ledger cache can be built from a queue, for example when loading the +%% @doc +%% The ledger cache can be built from a queue, for example when loading the %% ledger from the head of the journal on startup %% %% The queue should be build using [NewKey|Acc] so that the most recent @@ -1723,15 +1901,17 @@ loadqueue_ledgercache(Cache) -> T = leveled_tree:from_orderedlist(SL, ?CACHE_TYPE), Cache#ledger_cache{load_queue = [], loader = T}. --spec snapshot_store(ledger_cache(), - pid(), - null|pid(), - leveled_monitor:monitor(), - store|ledger, - undefined|no_lookup|tuple(), - boolean()) -> - {ok, pid(), pid()|null}. -%% @doc +-spec snapshot_store( + ledger_cache(), + pid(), + null | pid(), + leveled_monitor:monitor(), + store | ledger, + undefined | no_lookup | tuple(), + boolean() +) -> + {ok, pid(), pid() | null}. +%% @doc %% Allow all a snapshot to be created from part of the store, preferably %% passing in a query filter so that all of the LoopState does not need to %% be copied from the real actor to the clone @@ -1742,18 +1922,21 @@ loadqueue_ledgercache(Cache) -> %% Query can be no_lookup, indicating the snapshot will be used for non-specific %% range queries and not direct fetch requests. {StartKey, EndKey} if the the %% snapshot is to be used for one specific query only (this is much quicker to -%% setup, assuming the range is a small subset of the overall key space). If -%% lookup is required but the range isn't defined then 'undefined' should be +%% setup, assuming the range is a small subset of the overall key space). If +%% lookup is required but the range isn't defined then 'undefined' should be %% passed as the query snapshot_store( - LedgerCache, Penciller, Ink, Monitor, SnapType, Query, LongRunning) -> + LedgerCache, Penciller, Ink, Monitor, SnapType, Query, LongRunning +) -> SW0 = leveled_monitor:maybe_time(Monitor), LedgerCacheReady = readycache_forsnapshot(LedgerCache, Query), - BookiesMem = {LedgerCacheReady#ledger_cache.loader, - LedgerCacheReady#ledger_cache.index, - LedgerCacheReady#ledger_cache.min_sqn, - LedgerCacheReady#ledger_cache.max_sqn}, - PCLopts = + BookiesMem = { + LedgerCacheReady#ledger_cache.loader, + LedgerCacheReady#ledger_cache.index, + LedgerCacheReady#ledger_cache.min_sqn, + LedgerCacheReady#ledger_cache.max_sqn + }, + PCLopts = #penciller_options{ start_snapshot = true, source_penciller = Penciller, @@ -1772,15 +1955,15 @@ snapshot_store( #inker_options{ start_snapshot = true, bookies_pid = self(), - source_inker = Ink}, + source_inker = Ink + }, {ok, JournalSnapshot} = leveled_inker:ink_snapstart(InkerOpts), {ok, LedgerSnapshot, JournalSnapshot}; ledger -> {ok, LedgerSnapshot, null} end. - --spec fetch_value(pid(), leveled_codec:journal_ref()) -> not_present|any(). +-spec fetch_value(pid(), leveled_codec:journal_ref()) -> not_present | any(). %% @doc %% Fetch a value from the Journal fetch_value(Inker, {Key, SQN}) -> @@ -1793,7 +1976,6 @@ fetch_value(Inker, {Key, SQN}) -> not_present end. - %%%============================================================================ %%% Internal functions %%%============================================================================ @@ -1801,7 +1983,7 @@ fetch_value(Inker, {Key, SQN}) -> -spec startup(#inker_options{}, #penciller_options{}) -> {pid(), pid()}. %% @doc %% Startup the Inker and the Penciller, and prompt the loading of the Penciller -%% from the Inker. The Penciller may be shutdown without the latest data +%% from the Inker. The Penciller may be shutdown without the latest data %% having been persisted: and so the Iker must be able to update the Penciller %% on startup with anything that happened but wasn't flushed to disk. startup(InkerOpts, PencillerOpts) -> @@ -1811,10 +1993,11 @@ startup(InkerOpts, PencillerOpts) -> leveled_log:log(b0005, [LedgerSQN]), ReloadStrategy = InkerOpts#inker_options.reload_strategy, LoadFun = get_loadfun(), - BatchFun = + BatchFun = fun(BatchAcc, Acc) -> push_to_penciller( - Penciller, BatchAcc, Acc, ReloadStrategy) + Penciller, BatchAcc, Acc, ReloadStrategy + ) end, InitAccFun = fun(FN, CurrentMinSQN) -> @@ -1823,39 +2006,46 @@ startup(InkerOpts, PencillerOpts) -> end, FinalAcc = leveled_inker:ink_loadpcl( - Inker, LedgerSQN + 1, LoadFun, InitAccFun, BatchFun), + Inker, LedgerSQN + 1, LoadFun, InitAccFun, BatchFun + ), ok = push_to_penciller_loop(Penciller, loadqueue_ledgercache(FinalAcc)), ok = leveled_inker:ink_checksqn(Inker, LedgerSQN), {Inker, Penciller}. - -spec set_defaults(list()) -> open_options(). %% @doc %% Set any pre-defined defaults for options if the option is not present in %% the passed in options set_defaults(Opts) -> - lists:ukeymerge(1, - lists:ukeysort(1, Opts), - lists:ukeysort(1, ?OPTION_DEFAULTS)). + lists:ukeymerge( + 1, + lists:ukeysort(1, Opts), + lists:ukeysort(1, ?OPTION_DEFAULTS) + ). -spec set_options( - open_options(), leveled_monitor:monitor()) -> - {#inker_options{}, #penciller_options{}}. + open_options(), leveled_monitor:monitor() +) -> + {#inker_options{}, #penciller_options{}}. %% @doc %% Take the passed in property list of operations and extract out any relevant %% options to the Inker or the Penciller set_options(Opts, Monitor) -> - MaxJournalSize0 = - min(?ABSOLUTEMAX_JOURNALSIZE, - proplists:get_value(max_journalsize, Opts)), + MaxJournalSize0 = + min( + ?ABSOLUTEMAX_JOURNALSIZE, + proplists:get_value(max_journalsize, Opts) + ), JournalSizeJitter = MaxJournalSize0 div (100 div ?JOURNAL_SIZE_JITTER), - MaxJournalSize = - min(?ABSOLUTEMAX_JOURNALSIZE, - MaxJournalSize0 - erlang:phash2(self()) rem JournalSizeJitter), + MaxJournalSize = + min( + ?ABSOLUTEMAX_JOURNALSIZE, + MaxJournalSize0 - erlang:phash2(self()) rem JournalSizeJitter + ), MaxJournalCount0 = proplists:get_value(max_journalobjectcount, Opts), JournalCountJitter = MaxJournalCount0 div (100 div ?JOURNAL_SIZE_JITTER), - MaxJournalCount = + MaxJournalCount = MaxJournalCount0 - erlang:phash2(self()) rem JournalCountJitter, SyncStrat = proplists:get_value(sync_strategy, Opts), @@ -1867,7 +2057,7 @@ set_options(Opts, Monitor) -> AltStrategy = proplists:get_value(reload_strategy, Opts), ReloadStrategy = leveled_codec:inker_reload_strategy(AltStrategy), - PCLL0CacheSize = + PCLL0CacheSize = case proplists:get_value(max_pencillercachesize, Opts) of P0CS when is_integer(P0CS), P0CS > ?MIN_PCL_CACHE_SIZE -> P0CS; @@ -1882,9 +2072,9 @@ set_options(Opts, Monitor) -> ok = filelib:ensure_dir(JournalFP), ok = filelib:ensure_dir(LedgerFP), - SFL_CompPerc = + SFL_CompPerc = proplists:get_value(singlefile_compactionpercentage, Opts), - MRL_CompPerc = + MRL_CompPerc = proplists:get_value(maxrunlength_compactionpercentage, Opts), true = MRL_CompPerc >= SFL_CompPerc, true = 100.0 >= MRL_CompPerc, @@ -1899,18 +2089,18 @@ set_options(Opts, Monitor) -> AltMethod -> AltMethod end, - CompressOnReceipt = - case proplists:get_value(compression_point, Opts) of + CompressOnReceipt = + case proplists:get_value(compression_point, Opts) of on_receipt -> % Note this will add measurable delay to PUT time % https://github.com/martinsumner/leveled/issues/95 true; on_compact -> % If using lz4 this is not recommended - false + false end, CompressionLevel = proplists:get_value(compression_level, Opts), - + BlockVersion = proplists:get_value(block_version, Opts), MaxSSTSlots = proplists:get_value(max_sstslots, Opts), MaxMergeBelow = proplists:get_value(max_mergebelow, Opts), @@ -1929,7 +2119,7 @@ set_options(Opts, Monitor) -> compression_method = JournalCompression, compress_on_receipt = CompressOnReceipt, score_onein = ScoreOneIn, - cdb_options = + cdb_options = #cdb_options{ max_size = MaxJournalSize, max_count = MaxJournalCount, @@ -1960,12 +2150,14 @@ set_options(Opts, Monitor) -> } }. - -spec return_snapfun( - book_state(), store|ledger, - tuple()|no_lookup|undefined, - boolean(), boolean()) - -> fun(() -> {ok, pid(), pid()|null, fun(() -> ok)}). + book_state(), + store | ledger, + tuple() | no_lookup | undefined, + boolean(), + boolean() +) -> + fun(() -> {ok, pid(), pid() | null, fun(() -> ok)}). %% @doc %% Generates a function from which a snapshot can be created. The primary %% factor here is the SnapPreFold boolean. If this is true then the snapshot @@ -1975,10 +2167,10 @@ set_options(Opts, Monitor) -> %% SnapPrefold is to be used when the intention is to queue the fold, and so %% calling of the fold may be delayed, but it is still desired that the fold %% represent the point in time that the query was requested. -%% +%% %% Also returns a function which will close any snapshots to be used in the %% runners post-query cleanup action -%% +%% %% When the bookie is a snapshot, a fresh snapshot should not be taken, the %% previous snapshot should be used instead. Also the snapshot should not be %% closed as part of the post-query activity as the snapshot may be reused, and @@ -1988,8 +2180,10 @@ return_snapfun( SnapType, Query, LongRunning, - SnapPreFold) - when is_pid(Pcl), is_pid(Ink) -> + SnapPreFold +) when + is_pid(Pcl), is_pid(Ink) +-> CloseFun = fun(LS0, JS0) -> fun() -> @@ -2012,7 +2206,8 @@ return_snapfun( State#state.monitor, SnapType, Query, - LongRunning), + LongRunning + ), fun() -> {ok, LS, JS, CloseFun(LS, JS)} end; {false, false} -> Self = self(), @@ -2021,51 +2216,54 @@ return_snapfun( % This uses the external snapshot - as the snapshot will need % to have consistent state between Bookie and Penciller when % it is made. - fun() -> - {ok, LS, JS} = + fun() -> + {ok, LS, JS} = book_snapshot(Self, SnapType, Query, LongRunning), {ok, LS, JS, CloseFun(LS, JS)} end; - {_ , true} -> + {_, true} -> LS = State#state.penciller, JS = State#state.inker, fun() -> {ok, LS, JS, fun() -> ok end} end end. --spec snaptype_by_presence(boolean()|defer) -> store|ledger. +-spec snaptype_by_presence(boolean() | defer) -> store | ledger. %% @doc -%% Folds that traverse over object heads, may also either require to return +%% Folds that traverse over object heads, may also either require to return %% the object, or at least confirm the object is present in the Ledger. This -%% is achieved by enabling presence - and this will change the type of +%% is achieved by enabling presence - and this will change the type of %% snapshot to one that covers the whole store (i.e. both ledger and journal), %% rather than just the ledger. snaptype_by_presence(true) -> store; snaptype_by_presence(defer) -> store; -snaptype_by_presence(false) -> +snaptype_by_presence(false) -> ledger. -spec get_runner(book_state(), tuple()) -> {async, fun(() -> term())}. %% @doc -%% Get an {async, Runner} for a given fold type. Fold types have different +%% Get an {async, Runner} for a given fold type. Fold types have different %% tuple inputs get_runner(State, {index_query, Constraint, FoldAccT, Range, TermHandling}) -> {StartKey, EndKey} = index_range(Constraint, Range), SnapFun = return_snapfun(State, ledger, {StartKey, EndKey}, false, false), leveled_runner:index_query( - SnapFun, {StartKey, EndKey, TermHandling}, FoldAccT); + SnapFun, {StartKey, EndKey, TermHandling}, FoldAccT + ); get_runner( - State, - {multi_index_query, Bucket, FoldAccT, Queries, ComboFun}) -> + State, + {multi_index_query, Bucket, FoldAccT, Queries, ComboFun} +) -> {FoldFun, InitAcc} = FoldAccT, - KeyFolder = fun(_B, K, Acc) -> [K|Acc] end, + KeyFolder = fun(_B, K, Acc) -> [K | Acc] end, QueryRunners = lists:map( fun({SetId, {IdxFld, StartTerm, EndTerm, Expr}}) -> {SK, EK} = index_range( - {Bucket, null}, {IdxFld, StartTerm, EndTerm}), + {Bucket, null}, {IdxFld, StartTerm, EndTerm} + ), SnapFun = return_snapfun(State, ledger, {SK, EK}, false, true), {async, Runner} = @@ -2088,7 +2286,8 @@ get_runner( {SetId, sets:from_list(KLR)} end end, - QueryRunners) + QueryRunners + ) ) ), lists:foldl( @@ -2107,14 +2306,14 @@ get_runner(State, {keylist, Tag, Bucket, FoldAccT}) -> get_runner(State, {keylist, Tag, Bucket, KeyRange, FoldAccT, TermRegex}) -> SnapFun = return_snapfun(State, ledger, no_lookup, true, true), leveled_runner:bucketkey_query( - SnapFun, Tag, Bucket, KeyRange, FoldAccT, TermRegex); + SnapFun, Tag, Bucket, KeyRange, FoldAccT, TermRegex + ); %% Set of runners for object or metadata folds get_runner( - State, - {foldheads_allkeys, - Tag, FoldFun, - JournalCheck, SnapPreFold, SegmentList, - LastModRange, MaxObjectCount}) -> + State, + {foldheads_allkeys, Tag, FoldFun, JournalCheck, SnapPreFold, SegmentList, + LastModRange, MaxObjectCount} +) -> SnapType = snaptype_by_presence(JournalCheck), SnapFun = return_snapfun(State, SnapType, no_lookup, true, SnapPreFold), leveled_runner:foldheads_allkeys( @@ -2128,29 +2327,29 @@ get_runner( ); get_runner(State, {foldobjects_allkeys, Tag, FoldFun, SnapPreFold}) -> get_runner( - State, {foldobjects_allkeys, Tag, FoldFun, SnapPreFold, key_order}); + State, {foldobjects_allkeys, Tag, FoldFun, SnapPreFold, key_order} + ); get_runner(State, {foldobjects_allkeys, Tag, FoldFun, SnapPreFold, Order}) -> case Order of key_order -> SnapFun = return_snapfun(State, store, no_lookup, true, SnapPreFold), leveled_runner:foldobjects_allkeys( - SnapFun, Tag, FoldFun, key_order); + SnapFun, Tag, FoldFun, key_order + ); sqn_order -> SnapFun = return_snapfun(State, store, undefined, true, SnapPreFold), leveled_runner:foldobjects_allkeys( - SnapFun, Tag, FoldFun, sqn_order) + SnapFun, Tag, FoldFun, sqn_order + ) end; get_runner( - State, - {foldheads_bybucket, - Tag, - BucketList, bucket_list, - FoldFun, - JournalCheck, SnapPreFold, - SegmentList, LastModRange, MaxObjectCount}) -> - KeyRangeFun = + State, + {foldheads_bybucket, Tag, BucketList, bucket_list, FoldFun, JournalCheck, + SnapPreFold, SegmentList, LastModRange, MaxObjectCount} +) -> + KeyRangeFun = fun(Bucket) -> {StartKey, EndKey, _} = return_ledger_keyrange(Tag, Bucket, all), {StartKey, EndKey} @@ -2168,45 +2367,41 @@ get_runner( MaxObjectCount ); get_runner( - State, - {foldheads_bybucket, - Tag, - Bucket, KeyRange, - FoldFun, - JournalCheck, SnapPreFold, - SegmentList, LastModRange, MaxObjectCount}) -> + State, + {foldheads_bybucket, Tag, Bucket, KeyRange, FoldFun, JournalCheck, + SnapPreFold, SegmentList, LastModRange, MaxObjectCount} +) -> {StartKey, EndKey, SnapQ} = return_ledger_keyrange(Tag, Bucket, KeyRange), SnapType = snaptype_by_presence(JournalCheck), SnapFun = return_snapfun(State, SnapType, SnapQ, true, SnapPreFold), leveled_runner:foldheads_bybucket( SnapFun, - Tag, - [{StartKey, EndKey}], - FoldFun, + Tag, + [{StartKey, EndKey}], + FoldFun, JournalCheck, SegmentList, LastModRange, MaxObjectCount ); get_runner( - State, - {foldobjects_bybucket, - Tag, Bucket, KeyRange, - FoldFun, - SnapPreFold}) -> + State, + {foldobjects_bybucket, Tag, Bucket, KeyRange, FoldFun, SnapPreFold} +) -> {StartKey, EndKey, SnapQ} = return_ledger_keyrange(Tag, Bucket, KeyRange), SnapFun = return_snapfun(State, store, SnapQ, true, SnapPreFold), leveled_runner:foldobjects_bybucket( - SnapFun, Tag, [{StartKey, EndKey}], FoldFun); + SnapFun, Tag, [{StartKey, EndKey}], FoldFun + ); get_runner( - State, - {foldobjects_byindex, - Tag, Bucket, {Field, FromTerm, ToTerm}, - FoldObjectsFun, - SnapPreFold}) -> + State, + {foldobjects_byindex, Tag, Bucket, {Field, FromTerm, ToTerm}, + FoldObjectsFun, SnapPreFold} +) -> SnapFun = return_snapfun(State, store, no_lookup, true, SnapPreFold), leveled_runner:foldobjects_byindex( - SnapFun, {Tag, Bucket, Field, FromTerm, ToTerm}, FoldObjectsFun); + SnapFun, {Tag, Bucket, Field, FromTerm, ToTerm}, FoldObjectsFun + ); get_runner(State, {bucket_list, Tag, FoldAccT}) -> {FoldBucketsFun, Acc} = FoldAccT, SnapFun = return_snapfun(State, ledger, no_lookup, false, false), @@ -2219,7 +2414,6 @@ get_runner(State, {first_bucket, Tag, FoldAccT}) -> get_runner(State, DeprecatedQuery) -> get_deprecatedrunner(State, DeprecatedQuery). - index_range(Constraint, Range) -> {IdxFld, StartT, EndT} = Range, {Bucket, ObjKey0} = @@ -2229,16 +2423,16 @@ index_range(Constraint, Range) -> B -> {B, null} end, - StartKey = + StartKey = leveled_codec:to_querykey(Bucket, ObjKey0, ?IDX_TAG, IdxFld, StartT), - EndKey = + EndKey = leveled_codec:to_querykey(Bucket, null, ?IDX_TAG, IdxFld, EndT), {StartKey, EndKey}. -spec get_deprecatedrunner(book_state(), tuple()) -> - {async, fun(() -> term())}. + {async, fun(() -> term())}. %% @doc -%% Get an {async, Runner} for a given fold type. Fold types have different +%% Get an {async, Runner} for a given fold type. Fold types have different %% tuple inputs. These folds are currently used in tests, but are deprecated. %% Most of these folds should be achievable through other available folds. get_deprecatedrunner(State, {bucket_stats, Bucket}) -> @@ -2249,64 +2443,73 @@ get_deprecatedrunner(State, {riakbucket_stats, Bucket}) -> leveled_runner:bucket_sizestats(SnapFun, Bucket, ?RIAK_TAG); get_deprecatedrunner(State, {hashlist_query, Tag, JournalCheck}) -> SnapType = snaptype_by_presence(JournalCheck), - SnapFun = return_snapfun(State, SnapType, no_lookup, true, true), + SnapFun = return_snapfun(State, SnapType, no_lookup, true, true), leveled_runner:hashlist_query(SnapFun, Tag, JournalCheck); -get_deprecatedrunner(State, - {tictactree_obj, - {Tag, Bucket, StartK, EndK, JournalCheck}, - TreeSize, - PartitionFilter}) -> +get_deprecatedrunner( + State, + {tictactree_obj, {Tag, Bucket, StartK, EndK, JournalCheck}, TreeSize, + PartitionFilter} +) -> SnapType = snaptype_by_presence(JournalCheck), - SnapFun = return_snapfun(State, SnapType, no_lookup, true, true), - leveled_runner:tictactree(SnapFun, - {Tag, Bucket, {StartK, EndK}}, - JournalCheck, - TreeSize, - PartitionFilter); -get_deprecatedrunner(State, - {tictactree_idx, - {Bucket, IdxField, StartK, EndK}, - TreeSize, - PartitionFilter}) -> - SnapFun = return_snapfun(State, ledger, no_lookup, true, true), - leveled_runner:tictactree(SnapFun, - {?IDX_TAG, Bucket, {IdxField, StartK, EndK}}, - false, - TreeSize, - PartitionFilter). - + SnapFun = return_snapfun(State, SnapType, no_lookup, true, true), + leveled_runner:tictactree( + SnapFun, + {Tag, Bucket, {StartK, EndK}}, + JournalCheck, + TreeSize, + PartitionFilter + ); +get_deprecatedrunner( + State, + {tictactree_idx, {Bucket, IdxField, StartK, EndK}, TreeSize, + PartitionFilter} +) -> + SnapFun = return_snapfun(State, ledger, no_lookup, true, true), + leveled_runner:tictactree( + SnapFun, + {?IDX_TAG, Bucket, {IdxField, StartK, EndK}}, + false, + TreeSize, + PartitionFilter + ). -spec return_ledger_keyrange( - atom(), leveled_codec:key(), tuple()|all) - -> - { - leveled_codec:query_key(), - leveled_codec:query_key(), - {leveled_codec:query_key(), - leveled_codec:query_key()}|no_lookup - }. + atom(), leveled_codec:key(), tuple() | all +) -> + { + leveled_codec:query_key(), + leveled_codec:query_key(), + {leveled_codec:query_key(), leveled_codec:query_key()} + | no_lookup + }. %% @doc -%% Convert a range of binary keys into a ledger key range, returning -%% {StartLK, EndLK, Query} where Query is to indicate whether the query -%% range is worth using to minimise the cost of the snapshot +%% Convert a range of binary keys into a ledger key range, returning +%% {StartLK, EndLK, Query} where Query is to indicate whether the query +%% range is worth using to minimise the cost of the snapshot return_ledger_keyrange(Tag, Bucket, KeyRange) -> {StartKey, EndKey, Snap} = - case KeyRange of - all -> - {leveled_codec:to_querykey(Bucket, null, Tag), + case KeyRange of + all -> + { + leveled_codec:to_querykey(Bucket, null, Tag), leveled_codec:to_querykey(Bucket, null, Tag), - false}; + false + }; {StartTerm, <<"$all">>} -> - {leveled_codec:to_querykey(Bucket, StartTerm, Tag), + { + leveled_codec:to_querykey(Bucket, StartTerm, Tag), leveled_codec:to_querykey(Bucket, null, Tag), - false}; + false + }; {StartTerm, EndTerm} -> - {leveled_codec:to_querykey(Bucket, StartTerm, Tag), + { + leveled_codec:to_querykey(Bucket, StartTerm, Tag), leveled_codec:to_querykey(Bucket, EndTerm, Tag), - true} + true + } end, - SnapQuery = - case Snap of + SnapQuery = + case Snap of true -> {StartKey, EndKey}; false -> @@ -2314,11 +2517,10 @@ return_ledger_keyrange(Tag, Bucket, KeyRange) -> end, {StartKey, EndKey, SnapQuery}. - -spec maybe_longrunning(erlang:timestamp(), atom()) -> ok. %% @doc -%% Check the length of time an operation (named by Aspect) has taken, and -%% see if it has crossed the long running threshold. If so log to indicate +%% Check the length of time an operation (named by Aspect) has taken, and +%% see if it has crossed the long running threshold. If so log to indicate %% a long running event has occurred. maybe_longrunning(SW, Aspect) -> case timer:now_diff(os:timestamp(), SW) of @@ -2329,38 +2531,51 @@ maybe_longrunning(SW, Aspect) -> end. -spec readycache_forsnapshot( - ledger_cache(), tuple()|no_lookup|undefined) -> ledger_cache(). + ledger_cache(), tuple() | no_lookup | undefined +) -> ledger_cache(). %% @doc -%% Strip the ledger cach back to only the relevant information needed in +%% Strip the ledger cach back to only the relevant information needed in %% the query, and to make the cache a snapshot (and so not subject to changes %% such as additions to the ets table) readycache_forsnapshot(LedgerCache, {StartKey, EndKey}) -> - {KL, MinSQN, MaxSQN} = scan_table(LedgerCache#ledger_cache.mem, - StartKey, - EndKey), + {KL, MinSQN, MaxSQN} = scan_table( + LedgerCache#ledger_cache.mem, + StartKey, + EndKey + ), case KL of [] -> - #ledger_cache{loader=empty_cache, - index=empty_index, - min_sqn=MinSQN, - max_sqn=MaxSQN}; + #ledger_cache{ + loader = empty_cache, + index = empty_index, + min_sqn = MinSQN, + max_sqn = MaxSQN + }; _ -> - #ledger_cache{loader=leveled_tree:from_orderedlist(KL, - ?CACHE_TYPE), - index=empty_index, - min_sqn=MinSQN, - max_sqn=MaxSQN} + #ledger_cache{ + loader = leveled_tree:from_orderedlist( + KL, + ?CACHE_TYPE + ), + index = empty_index, + min_sqn = MinSQN, + max_sqn = MaxSQN + } end; readycache_forsnapshot(LedgerCache, Query) -> % Need to convert the Ledger Cache away from using the ETS table - Tree = leveled_tree:from_orderedset(LedgerCache#ledger_cache.mem, - ?CACHE_TYPE), + Tree = leveled_tree:from_orderedset( + LedgerCache#ledger_cache.mem, + ?CACHE_TYPE + ), case leveled_tree:tsize(Tree) of 0 -> - #ledger_cache{loader=empty_cache, - index=empty_index, - min_sqn=LedgerCache#ledger_cache.min_sqn, - max_sqn=LedgerCache#ledger_cache.max_sqn}; + #ledger_cache{ + loader = empty_cache, + index = empty_index, + min_sqn = LedgerCache#ledger_cache.min_sqn, + max_sqn = LedgerCache#ledger_cache.max_sqn + }; _ -> Idx = case Query of @@ -2369,21 +2584,28 @@ readycache_forsnapshot(LedgerCache, Query) -> _ -> LedgerCache#ledger_cache.index end, - #ledger_cache{loader=Tree, - index=Idx, - min_sqn=LedgerCache#ledger_cache.min_sqn, - max_sqn=LedgerCache#ledger_cache.max_sqn} + #ledger_cache{ + loader = Tree, + index = Idx, + min_sqn = LedgerCache#ledger_cache.min_sqn, + max_sqn = LedgerCache#ledger_cache.max_sqn + } end. --spec scan_table(ets:tab(), - leveled_codec:ledger_key(), leveled_codec:ledger_key()) - -> {list(leveled_codec:ledger_kv()), - non_neg_integer()|infinity, - non_neg_integer()}. +-spec scan_table( + ets:tab(), + leveled_codec:ledger_key(), + leveled_codec:ledger_key() +) -> + { + list(leveled_codec:ledger_kv()), + non_neg_integer() | infinity, + non_neg_integer() + }. %% @doc %% Query the ETS table to find a range of keys (start inclusive). Should also -%% return the miniumum and maximum sequence number found in the query. This -%% is just then used as a safety check when loading these results into the +%% return the miniumum and maximum sequence number found in the query. This +%% is just then used as a safety check when loading these results into the %% penciller snapshot scan_table(Table, StartKey, EndKey) -> case ets:lookup(Table, StartKey) of @@ -2391,8 +2613,14 @@ scan_table(Table, StartKey, EndKey) -> scan_table(Table, StartKey, EndKey, [], infinity, 0); [{StartKey, StartVal}] -> SQN = leveled_codec:strip_to_seqonly({StartKey, StartVal}), - scan_table(Table, StartKey, EndKey, - [{StartKey, StartVal}], SQN, SQN) + scan_table( + Table, + StartKey, + EndKey, + [{StartKey, StartVal}], + SQN, + SQN + ) end. scan_table(Table, StartKey, EndKey, Acc, MinSQN, MaxSQN) -> @@ -2406,27 +2634,28 @@ scan_table(Table, StartKey, EndKey, Acc, MinSQN, MaxSQN) -> false -> [{NextKey, NextVal}] = ets:lookup(Table, NextKey), SQN = leveled_codec:strip_to_seqonly({NextKey, NextVal}), - scan_table(Table, - NextKey, - EndKey, - [{NextKey, NextVal}|Acc], - min(MinSQN, SQN), - max(MaxSQN, SQN)) + scan_table( + Table, + NextKey, + EndKey, + [{NextKey, NextVal} | Acc], + min(MinSQN, SQN), + max(MaxSQN, SQN) + ) end end. - --spec fetch_head(leveled_codec:ledger_key(), pid(), ledger_cache()) - -> {not_present|leveled_codec:ledger_value(), boolean()}. +-spec fetch_head(leveled_codec:ledger_key(), pid(), ledger_cache()) -> + {not_present | leveled_codec:ledger_value(), boolean()}. %% @doc %% Fetch only the head of the object from the Ledger (or the bookie's recent -%% ledger cache if it has just been updated). not_present is returned if the +%% ledger cache if it has just been updated). not_present is returned if the %% Key is not found fetch_head(Key, Penciller, LedgerCache) -> fetch_head(Key, Penciller, LedgerCache, false). --spec fetch_head(leveled_codec:ledger_key(), pid(), ledger_cache(), boolean()) - -> {not_present|leveled_codec:ledger_value(), boolean()}. +-spec fetch_head(leveled_codec:ledger_key(), pid(), ledger_cache(), boolean()) -> + {not_present | leveled_codec:ledger_value(), boolean()}. %% doc %% The L0Index needs to be bypassed when running head_only fetch_head(Key, Penciller, LedgerCache, HeadOnly) -> @@ -2436,9 +2665,9 @@ fetch_head(Key, Penciller, LedgerCache, HeadOnly) -> {Head, true}; [] -> Hash = leveled_codec:segment_hash(Key), - UseL0Idx = not HeadOnly, - % don't use the L0Index in head only mode. Object specs don't - % get an addition on the L0 index + UseL0Idx = not HeadOnly, + % don't use the L0Index in head only mode. Object specs don't + % get an addition on the L0 index case leveled_penciller:pcl_fetch(Penciller, Key, Hash, UseL0Idx) of {Key, Head} -> maybe_longrunning(SW, pcl_head), @@ -2449,25 +2678,25 @@ fetch_head(Key, Penciller, LedgerCache, HeadOnly) -> end end. - --spec journal_notfound(integer(), pid(), leveled_codec:ledger_key(), integer()) - -> {boolean(), integer()}. +-spec journal_notfound(integer(), pid(), leveled_codec:ledger_key(), integer()) -> + {boolean(), integer()}. %% @doc Check to see if the item is not_found in the journal. If it is found %% return false, and drop the counter that represents the frequency this check %% should be made. If it is not_found, this is not expected so up the check %% frequency to the maximum value journal_notfound(CheckFrequency, Inker, LK, SQN) -> - check_notfound(CheckFrequency, - fun() -> - leveled_inker:ink_keycheck(Inker, LK, SQN) - end). - + check_notfound( + CheckFrequency, + fun() -> + leveled_inker:ink_keycheck(Inker, LK, SQN) + end + ). --spec check_notfound(integer(), fun(() -> probably|missing)) -> - {boolean(), integer()}. +-spec check_notfound(integer(), fun(() -> probably | missing)) -> + {boolean(), integer()}. %% @doc Use a function to check if an item is found check_notfound(CheckFrequency, CheckFun) -> - case rand:uniform(?MAX_KEYCHECK_FREQUENCY) of + case rand:uniform(?MAX_KEYCHECK_FREQUENCY) of X when X =< CheckFrequency -> case CheckFun() of probably -> @@ -2479,33 +2708,39 @@ check_notfound(CheckFrequency, CheckFun) -> {false, CheckFrequency} end. - -spec preparefor_ledgercache( - leveled_codec:journal_key_tag()|null, - leveled_codec:primary_key()|?DUMMY, + leveled_codec:journal_key_tag() | null, + leveled_codec:primary_key() | ?DUMMY, non_neg_integer(), any(), - integer(), - leveled_codec:journal_keychanges()) - -> {leveled_codec:segment_hash(), - non_neg_integer(), - list(leveled_codec:ledger_kv())}. + integer(), + leveled_codec:journal_keychanges() +) -> + { + leveled_codec:segment_hash(), + non_neg_integer(), + list(leveled_codec:ledger_kv()) + }. %% @doc -%% Prepare an object and its related key changes for addition to the Ledger +%% Prepare an object and its related key changes for addition to the Ledger %% via the Ledger Cache. preparefor_ledgercache(?INKT_MPUT, ?DUMMY, SQN, _O, _S, {ObjSpecs, TTL}) -> ObjChanges = leveled_codec:obj_objectspecs(ObjSpecs, SQN, TTL), {no_lookup, SQN, ObjChanges}; preparefor_ledgercache( - ?INKT_KEYD, LedgerKey, SQN, _Obj, _Size, {IdxSpecs, TTL}) - when LedgerKey =/= ?DUMMY -> + ?INKT_KEYD, LedgerKey, SQN, _Obj, _Size, {IdxSpecs, TTL} +) when + LedgerKey =/= ?DUMMY +-> {Bucket, Key} = leveled_codec:from_ledgerkey(LedgerKey), KeyChanges = leveled_codec:idx_indexspecs(IdxSpecs, Bucket, Key, SQN, TTL), {no_lookup, SQN, KeyChanges}; preparefor_ledgercache( - _InkTag, LedgerKey, SQN, Obj, Size, {IdxSpecs, TTL}) - when LedgerKey =/= ?DUMMY -> + _InkTag, LedgerKey, SQN, Obj, Size, {IdxSpecs, TTL} +) when + LedgerKey =/= ?DUMMY +-> {Bucket, Key, MetaValue, {KeyH, _ObjH}, _LastMods} = leveled_codec:generate_ledgerkv(LedgerKey, SQN, Obj, Size, TTL), KeyChanges = @@ -2513,35 +2748,41 @@ preparefor_ledgercache( leveled_codec:idx_indexspecs(IdxSpecs, Bucket, Key, SQN, TTL), {KeyH, SQN, KeyChanges}. - -spec recalcfor_ledgercache( - leveled_codec:journal_key_tag()|null, - leveled_codec:primary_key()|?DUMMY, + leveled_codec:journal_key_tag() | null, + leveled_codec:primary_key() | ?DUMMY, non_neg_integer(), - binary()|term(), - integer(), + binary() | term(), + integer(), leveled_codec:journal_keychanges(), ledger_cache(), - pid()) - -> {leveled_codec:segment_hash(), - non_neg_integer(), - list(leveled_codec:ledger_kv())}. + pid() +) -> + { + leveled_codec:segment_hash(), + non_neg_integer(), + list(leveled_codec:ledger_kv()) + }. %% @doc %% When loading from the journal to the ledger, may hit a key which has the %% `recalc` strategy. Such a key needs to recalculate the key changes by %% comparison with the current state of the ledger, assuming it is a full %% journal entry (i.e. KeyDeltas which may be a result of previously running -%% with a retain strategy should be ignored). +%% with a retain strategy should be ignored). recalcfor_ledgercache( - InkTag, _LedgerKey, SQN, _Obj, _Size, {_IdxSpecs, _TTL}, _LC, _Pcl) - when InkTag == ?INKT_MPUT; InkTag == ?INKT_KEYD -> + InkTag, _LedgerKey, SQN, _Obj, _Size, {_IdxSpecs, _TTL}, _LC, _Pcl +) when + InkTag == ?INKT_MPUT; InkTag == ?INKT_KEYD +-> {no_lookup, SQN, []}; recalcfor_ledgercache( - _InkTag, LK, SQN, Obj, Size, {_Ignore, TTL}, LedgerCache, Penciller) - when LK =/= ?DUMMY -> + _InkTag, LK, SQN, Obj, Size, {_Ignore, TTL}, LedgerCache, Penciller +) when + LK =/= ?DUMMY +-> {Bucket, Key, MetaValue, {KeyH, _ObjH}, _LastMods} = leveled_codec:generate_ledgerkv(LK, SQN, Obj, Size, TTL), - OldObject = + OldObject = case check_in_ledgercache(LK, KeyH, LedgerCache, loader) of false -> leveled_penciller:pcl_fetch(Penciller, LK, KeyH, true); @@ -2565,38 +2806,44 @@ recalcfor_ledgercache( end, IdxSpecs = leveled_head:diff_indexspecs(element(1, LK), UpdMetadata, OldMetadata), - {KeyH, - SQN, - [{LK, MetaValue}] - ++ leveled_codec:idx_indexspecs(IdxSpecs, Bucket, Key, SQN, TTL)}. - + {KeyH, SQN, + [{LK, MetaValue}] ++ + leveled_codec:idx_indexspecs(IdxSpecs, Bucket, Key, SQN, TTL)}. -spec addto_ledgercache( - {leveled_codec:segment_hash(), - non_neg_integer(), - list(leveled_codec:ledger_kv())}, - ledger_cache()) - -> ledger_cache(). + { + leveled_codec:segment_hash(), + non_neg_integer(), + list(leveled_codec:ledger_kv()) + }, + ledger_cache() +) -> + ledger_cache(). %% @doc -%% Add a set of changes associated with a single sequence number (journal +%% Add a set of changes associated with a single sequence number (journal %% update) and key to the ledger cache. If the changes are not to be looked %% up directly, then they will not be indexed to accelerate lookup addto_ledgercache({H, SQN, KeyChanges}, Cache) -> ets:insert(Cache#ledger_cache.mem, KeyChanges), UpdIndex = leveled_pmem:prepare_for_index(Cache#ledger_cache.index, H), - Cache#ledger_cache{index = UpdIndex, - min_sqn=min(SQN, Cache#ledger_cache.min_sqn), - max_sqn=max(SQN, Cache#ledger_cache.max_sqn)}. + Cache#ledger_cache{ + index = UpdIndex, + min_sqn = min(SQN, Cache#ledger_cache.min_sqn), + max_sqn = max(SQN, Cache#ledger_cache.max_sqn) + }. -spec addto_ledgercache( - {leveled_codec:segment_hash()|no_lookup, + { + leveled_codec:segment_hash() | no_lookup, integer(), - list(leveled_codec:ledger_kv())}, + list(leveled_codec:ledger_kv()) + }, ledger_cache(), - loader) - -> ledger_cache(). + loader +) -> + ledger_cache(). %% @doc -%% Add a set of changes associated with a single sequence number (journal +%% Add a set of changes associated with a single sequence number (journal %% update) to the ledger cache. This is used explicitly when loading the %% ledger from the Journal (i.e. at startup) - and in this case the ETS insert %% can be bypassed, as all changes will be flushed to the Penciller before the @@ -2604,17 +2851,20 @@ addto_ledgercache({H, SQN, KeyChanges}, Cache) -> addto_ledgercache({H, SQN, KeyChanges}, Cache, loader) -> UpdQ = KeyChanges ++ Cache#ledger_cache.load_queue, UpdIndex = leveled_pmem:prepare_for_index(Cache#ledger_cache.index, H), - Cache#ledger_cache{index = UpdIndex, - load_queue = UpdQ, - min_sqn=min(SQN, Cache#ledger_cache.min_sqn), - max_sqn=max(SQN, Cache#ledger_cache.max_sqn)}. - + Cache#ledger_cache{ + index = UpdIndex, + load_queue = UpdQ, + min_sqn = min(SQN, Cache#ledger_cache.min_sqn), + max_sqn = max(SQN, Cache#ledger_cache.max_sqn) + }. --spec check_in_ledgercache(leveled_codec:ledger_key(), - leveled_codec:segment_hash(), - ledger_cache(), - loader) -> - false | leveled_codec:ledger_kv(). +-spec check_in_ledgercache( + leveled_codec:ledger_key(), + leveled_codec:segment_hash(), + ledger_cache(), + loader +) -> + false | leveled_codec:ledger_kv(). %% @doc %% Check the ledger cache for a Key, when the ledger cache is in loader mode %% and so is populating a queue not an ETS table @@ -2626,18 +2876,18 @@ check_in_ledgercache(PK, Hash, Cache, loader) -> lists:keyfind(PK, 1, Cache#ledger_cache.load_queue) end. - -spec maybepush_ledgercache( - pos_integer(), pos_integer(), ledger_cache(), pid()) - -> {ok|returned, ledger_cache()}. + pos_integer(), pos_integer(), ledger_cache(), pid() +) -> + {ok | returned, ledger_cache()}. %% @doc -%% Following an update to the ledger cache, check if this now big enough to be -%% pushed down to the Penciller. There is some random jittering here, to +%% Following an update to the ledger cache, check if this now big enough to be +%% pushed down to the Penciller. There is some random jittering here, to %% prevent coordination across leveled instances (e.g. when running in Riak). -%% +%% %% The penciller may be too busy, as the LSM tree is backed up with merge %% activity. In this case the update is not made and 'returned' not ok is set -%% in the reply. Try again later when it isn't busy (and also potentially +%% in the reply. Try again later when it isn't busy (and also potentially %% implement a slow_offer state to slow down the pace at which PUTs are being %% received) maybepush_ledgercache(MaxCacheSize, MaxCacheMult, Cache, Penciller) -> @@ -2647,7 +2897,7 @@ maybepush_ledgercache(MaxCacheSize, MaxCacheMult, Cache, Penciller) -> if TimeToPush -> CacheToLoad = - { + { Tab, Cache#ledger_cache.index, Cache#ledger_cache.min_sqn, @@ -2658,7 +2908,7 @@ maybepush_ledgercache(MaxCacheSize, MaxCacheMult, Cache, Penciller) -> Cache0 = #ledger_cache{}, true = ets:delete(Tab), NewTab = ets:new(mem, [ordered_set]), - {ok, Cache0#ledger_cache{mem=NewTab}}; + {ok, Cache0#ledger_cache{mem = NewTab}}; returned -> {returned, Cache} end; @@ -2667,22 +2917,23 @@ maybepush_ledgercache(MaxCacheSize, MaxCacheMult, Cache, Penciller) -> end. -spec maybe_withjitter( - non_neg_integer(), pos_integer(), pos_integer()) -> boolean(). + non_neg_integer(), pos_integer(), pos_integer() +) -> boolean(). %% @doc %% Push down randomly, but the closer to 4 * the maximum size, the more likely %% a push should be maybe_withjitter( - CacheSize, MaxCacheSize, MaxCacheMult) when CacheSize > MaxCacheSize -> + CacheSize, MaxCacheSize, MaxCacheMult +) when CacheSize > MaxCacheSize -> R = rand:uniform(MaxCacheMult * MaxCacheSize), (CacheSize - MaxCacheSize) > R; maybe_withjitter(_CacheSize, _MaxCacheSize, _MaxCacheMult) -> false. - -spec get_loadfun() -> initial_loadfun(). %% @doc -%% The LoadFun will be used by the Inker when walking across the Journal to -%% load the Penciller at startup. +%% The LoadFun will be used by the Inker when walking across the Journal to +%% load the Penciller at startup. get_loadfun() -> fun(KeyInJournal, ValueInJournal, _Pos, Acc0, ExtractFun) -> {MinSQN, MaxSQN, LoadItems} = Acc0, @@ -2700,21 +2951,20 @@ get_loadfun() -> case SQN of MaxSQN -> {stop, - {MinSQN, - MaxSQN, - [{InkTag, PK, SQN, Obj, IdxSpecs, ValSize} - |LoadItems]}}; + {MinSQN, MaxSQN, [ + {InkTag, PK, SQN, Obj, IdxSpecs, ValSize} + | LoadItems + ]}}; _ -> {loop, - {MinSQN, - MaxSQN, - [{InkTag, PK, SQN, Obj, IdxSpecs, ValSize} - |LoadItems]}} + {MinSQN, MaxSQN, [ + {InkTag, PK, SQN, Obj, IdxSpecs, ValSize} + | LoadItems + ]}} end end end. - delete_path(DirPath) -> ok = filelib:ensure_dir(DirPath), {ok, Files} = file:list_dir(DirPath), @@ -2722,35 +2972,50 @@ delete_path(DirPath) -> file:del_dir(DirPath). -spec maybelog_put_timing( - leveled_monitor:monitor(), - leveled_monitor:timing(), - leveled_monitor:timing(), - leveled_monitor:timing(), - pos_integer()) -> ok. + leveled_monitor:monitor(), + leveled_monitor:timing(), + leveled_monitor:timing(), + leveled_monitor:timing(), + pos_integer() +) -> ok. maybelog_put_timing( - {Pid, _StatsFreq}, InkTime, PrepTime, MemTime, Size) - when is_pid(Pid), - is_integer(InkTime), is_integer(PrepTime), is_integer(MemTime) -> + {Pid, _StatsFreq}, InkTime, PrepTime, MemTime, Size +) when + is_pid(Pid), + is_integer(InkTime), + is_integer(PrepTime), + is_integer(MemTime) +-> leveled_monitor:add_stat( - Pid, {bookie_put_update, InkTime, PrepTime, MemTime, Size}); + Pid, {bookie_put_update, InkTime, PrepTime, MemTime, Size} + ); maybelog_put_timing(_Monitor, _, _, _, _Size) -> ok. -spec maybelog_head_timing( - leveled_monitor:monitor(), - leveled_monitor:timing(), - leveled_monitor:timing(), - boolean(), - boolean()) -> ok. -maybelog_head_timing({Pid, _StatsFreq}, FetchTime, RspTime, false, CH) - when is_pid(Pid), is_integer(FetchTime), is_integer(RspTime) -> - CH0 = case CH of true -> 1; false -> 0 end, + leveled_monitor:monitor(), + leveled_monitor:timing(), + leveled_monitor:timing(), + boolean(), + boolean() +) -> ok. +maybelog_head_timing({Pid, _StatsFreq}, FetchTime, RspTime, false, CH) when + is_pid(Pid), is_integer(FetchTime), is_integer(RspTime) +-> + CH0 = + case CH of + true -> 1; + false -> 0 + end, leveled_monitor:add_stat( - Pid, {bookie_head_update, FetchTime, RspTime, CH0}); -maybelog_head_timing({Pid, _StatsFreq}, FetchTime, _, true, _CH) - when is_pid(Pid), is_integer(FetchTime) -> + Pid, {bookie_head_update, FetchTime, RspTime, CH0} + ); +maybelog_head_timing({Pid, _StatsFreq}, FetchTime, _, true, _CH) when + is_pid(Pid), is_integer(FetchTime) +-> leveled_monitor:add_stat( - Pid, {bookie_head_update, FetchTime, not_found, 0}); + Pid, {bookie_head_update, FetchTime, not_found, 0} + ); maybelog_head_timing(_Monitor, _, _, _NF, _CH) -> ok. @@ -2758,12 +3023,15 @@ maybelog_head_timing(_Monitor, _, _, _NF, _CH) -> leveled_monitor:monitor(), leveled_monitor:timing(), leveled_monitor:timing(), - boolean()) -> ok. -maybelog_get_timing({Pid, _StatsFreq}, HeadTime, BodyTime, false) - when is_pid(Pid), is_integer(HeadTime), is_integer(BodyTime) -> + boolean() +) -> ok. +maybelog_get_timing({Pid, _StatsFreq}, HeadTime, BodyTime, false) when + is_pid(Pid), is_integer(HeadTime), is_integer(BodyTime) +-> leveled_monitor:add_stat(Pid, {bookie_get_update, HeadTime, BodyTime}); -maybelog_get_timing({Pid, _StatsFreq}, HeadTime, _BodyTime, true) - when is_pid(Pid), is_integer(HeadTime) -> +maybelog_get_timing({Pid, _StatsFreq}, HeadTime, _BodyTime, true) when + is_pid(Pid), is_integer(HeadTime) +-> leveled_monitor:add_stat(Pid, {bookie_get_update, HeadTime, not_found}); maybelog_get_timing(_Monitor, _, _, _NF) -> ok. @@ -2771,14 +3039,15 @@ maybelog_get_timing(_Monitor, _, _, _NF) -> -spec maybelog_snap_timing( leveled_monitor:monitor(), leveled_monitor:timing(), - leveled_monitor:timing()) -> ok. -maybelog_snap_timing({Pid, _StatsFreq}, BookieTime, PCLTime) - when is_pid(Pid), is_integer(BookieTime), is_integer(PCLTime) -> + leveled_monitor:timing() +) -> ok. +maybelog_snap_timing({Pid, _StatsFreq}, BookieTime, PCLTime) when + is_pid(Pid), is_integer(BookieTime), is_integer(PCLTime) +-> leveled_monitor:add_stat(Pid, {bookie_snap_update, BookieTime, PCLTime}); maybelog_snap_timing(_Monitor, _, _) -> ok. - %%%============================================================================ %%% Test %%%============================================================================ @@ -2793,7 +3062,7 @@ book_returnactors(Pid) -> gen_server:call(Pid, return_actors). reset_filestructure() -> - RootPath = "test/test_area", + RootPath = "test/test_area", leveled_inker:clean_testdir(RootPath ++ "/" ++ ?JOURNAL_FP), leveled_penciller:clean_testdir(RootPath ++ "/" ++ ?LEDGER_FP), RootPath. @@ -2809,7 +3078,8 @@ generate_multiple_objects(Count, KeyNumber, ObjL) -> IndexSpec = [ { - add, <<"idx1_bin">>, + add, + <<"idx1_bin">>, list_to_binary("f" ++ integer_to_list(KeyNumber rem 10)) } ], @@ -2829,16 +3099,18 @@ shutdown_tester() -> fun({K, V, S}) -> ok = book_put(Bookie1, <<"Bucket">>, K, V, S, ?STD_TAG) end, - generate_multiple_objects(5000, 1)), + generate_multiple_objects(5000, 1) + ), {ok, SnpPCL1, SnpJrnl1} = leveled_bookie:book_snapshot(Bookie1, store, undefined, true), - + TestPid = self(), spawn( fun() -> ok = leveled_bookie:book_close(Bookie1), TestPid ! ok - end), + end + ), timer:sleep(2000), ok = leveled_penciller:pcl_close(SnpPCL1), @@ -2846,12 +3118,13 @@ shutdown_tester() -> P when is_pid(P) -> ok = leveled_inker:ink_close(SnpJrnl1) end, SW = os:timestamp(), - receive ok -> ok end, + receive + ok -> ok + end, WaitForShutDown = timer:now_diff(SW, os:timestamp()) div 1000, ?assert(WaitForShutDown =< (1000 + 1)), _ = reset_filestructure(). - ttl_test() -> RootPath = reset_filestructure(), {ok, Bookie1} = book_start([{root_path, RootPath}]), @@ -2884,17 +3157,20 @@ ttl_test() -> fun({K, V, S}) -> ok = book_tempput(Bookie1, <<"Bucket">>, K, V, S, ?STD_TAG, Past) end, - ObjL2), + ObjL2 + ), lists:foreach( fun({K, _V, _S}) -> not_found = book_get(Bookie1, <<"Bucket">>, K, ?STD_TAG) end, - ObjL2), + ObjL2 + ), lists:foreach( fun({K, _V, _S}) -> not_found = book_head(Bookie1, <<"Bucket">>, K, ?STD_TAG) end, - ObjL2), + ObjL2 + ), {async, BucketFolder} = book_returnfolder(Bookie1, {bucket_stats, <<"Bucket">>}), @@ -2919,11 +3195,8 @@ ttl_test() -> {async, IndexFolderTR} = book_returnfolder( Bookie1, - {index_query, - <<"Bucket">>, - {FoldKeysFun, []}, - {<<"idx1_bin">>, <<"f8">>, <<"f9">>}, - {true, Regex}} + {index_query, <<"Bucket">>, {FoldKeysFun, []}, + {<<"idx1_bin">>, <<"f8">>, <<"f9">>}, {true, Regex}} ), TermKeyList = IndexFolderTR(), ?assertMatch(10, length(TermKeyList)), @@ -2932,14 +3205,15 @@ ttl_test() -> {ok, Bookie2} = book_start([{root_path, RootPath}]), {async, IndexFolderTR2} = - book_returnfolder( - Bookie2, - { - index_query, - <<"Bucket">>, - {FoldKeysFun, []}, - {<<"idx1_bin">>, <<"f7">>, <<"f9">>}, - {false, Regex}} + book_returnfolder( + Bookie2, + { + index_query, + <<"Bucket">>, + {FoldKeysFun, []}, + {<<"idx1_bin">>, <<"f7">>, <<"f9">>}, + {false, Regex} + } ), KeyList2 = IndexFolderTR2(), ?assertMatch(10, length(KeyList2)), @@ -2977,7 +3251,7 @@ hashlist_query_testto() -> % Put in all the objects with a TTL in the future Future = leveled_util:integer_now() + 300, lists:foreach( - fun({K, V, S}) -> + fun({K, V, S}) -> ok = book_tempput(Bookie1, <<"Bucket">>, K, V, S, ?STD_TAG, Future) end, ObjL1 @@ -3000,8 +3274,9 @@ hashlist_query_testto() -> ?assertMatch(<<"Bucket">>, B), ?assertMatch(true, is_integer(H)) end, - KeyHashList) - , + KeyHashList + ), + ?assertMatch(1200, length(KeyHashList)), ok = book_close(Bookie1), {ok, Bookie2} = @@ -3015,13 +3290,12 @@ hashlist_query_testto() -> {async, HTFolder2} = book_returnfolder(Bookie2, {hashlist_query, ?STD_TAG, false}), L0 = length(KeyHashList), - HTR2 = HTFolder2(), + HTR2 = HTFolder2(), ?assertMatch(L0, length(HTR2)), ?assertMatch(KeyHashList, HTR2), ok = book_close(Bookie2), reset_filestructure(). - hashlist_query_withjournalcheck_test_() -> {timeout, 60, fun hashlist_query_withjournalcheck_testto/0}. @@ -3060,17 +3334,21 @@ foldobjects_vs_hashtree_testto() -> RootPath = reset_filestructure(), {ok, Bookie1} = book_start( - [{ - root_path, RootPath}, + [ + { + root_path, RootPath + }, {max_journalsize, 1000000}, {cache_size, 500} - ]), + ] + ), ObjL1 = generate_multiple_objects(800, 1), % Put in all the objects with a TTL in the future Future = leveled_util:integer_now() + 300, lists:foreach( - fun({K, V, S}) -> ok = - book_tempput(Bookie1, <<"Bucket">>, K, V, S, ?STD_TAG, Future) + fun({K, V, S}) -> + ok = + book_tempput(Bookie1, <<"Bucket">>, K, V, S, ?STD_TAG, Future) end, ObjL1 ), @@ -3080,15 +3358,15 @@ foldobjects_vs_hashtree_testto() -> FoldObjectsFun = fun(B, K, V, Acc) -> - [{B, K, erlang:phash2(term_to_binary(V))}|Acc] + [{B, K, erlang:phash2(term_to_binary(V))} | Acc] end, {async, HTFolder2} = book_returnfolder( Bookie1, { - foldobjects_allkeys, - ?STD_TAG, - FoldObjectsFun, + foldobjects_allkeys, + ?STD_TAG, + FoldObjectsFun, true } ), @@ -3100,16 +3378,16 @@ foldobjects_vs_hashtree_testto() -> {proxy_object, _MDBin, _Size, {FetchFun, Clone, JK}} = binary_to_term(ProxyV), V = FetchFun(Clone, JK), - [{B, K, erlang:phash2(term_to_binary(V))}|Acc] + [{B, K, erlang:phash2(term_to_binary(V))} | Acc] end, {async, HTFolder3} = book_returnfolder( Bookie1, { - foldheads_allkeys, - ?STD_TAG, - FoldHeadsFun, + foldheads_allkeys, + ?STD_TAG, + FoldHeadsFun, true, true, false, @@ -3124,15 +3402,15 @@ foldobjects_vs_hashtree_testto() -> fun(B, K, ProxyV, Acc) -> {proxy_object, MD, _Size1, _Fetcher} = binary_to_term(ProxyV), {Hash, _Size0, _UserDefinedMD} = MD, - [{B, K, Hash}|Acc] + [{B, K, Hash} | Acc] end, {async, HTFolder4} = book_returnfolder( Bookie1, { - foldheads_allkeys, - ?STD_TAG, + foldheads_allkeys, + ?STD_TAG, FoldHeadsFun2, false, false, @@ -3160,11 +3438,13 @@ folder_cache_test(CacheSize) -> RootPath = reset_filestructure(), {ok, Bookie1} = book_start( - [{ - root_path, RootPath}, + [ + { + root_path, RootPath + }, {max_journalsize, 1000000}, - {cache_size, CacheSize - }] + {cache_size, CacheSize} + ] ), _ = book_returnactors(Bookie1), ObjL1 = generate_multiple_objects(400, 1), @@ -3183,11 +3463,12 @@ folder_cache_test(CacheSize) -> ok = book_tempput(Bookie1, <<"BucketB">>, K, V, S, ?STD_TAG, Future) end, - ObjL2), + ObjL2 + ), FoldObjectsFun = fun(B, K, V, Acc) -> - [{B, K, erlang:phash2(term_to_binary(V))}|Acc] + [{B, K, erlang:phash2(term_to_binary(V))} | Acc] end, {async, HTFolder1A} = book_returnfolder( @@ -3225,7 +3506,7 @@ folder_cache_test(CacheSize) -> {proxy_object, _MDBin, _Size, {FetchFun, Clone, JK}} = binary_to_term(ProxyV), V = FetchFun(Clone, JK), - [{B, K, erlang:phash2(term_to_binary(V))}|Acc] + [{B, K, erlang:phash2(term_to_binary(V))} | Acc] end, {async, HTFolder2A} = @@ -3240,7 +3521,8 @@ folder_cache_test(CacheSize) -> true, true, false, - false,false + false, + false } ), KeyHashList2A = return_list_result(HTFolder2A), @@ -3261,7 +3543,7 @@ folder_cache_test(CacheSize) -> } ), KeyHashList2B = return_list_result(HTFolder2B), - + ?assertMatch( true, lists:usort(KeyHashList1A) == lists:usort(KeyHashList2A) @@ -3281,7 +3563,7 @@ folder_cache_test(CacheSize) -> {<<"Key">>, <<"$all">>}, FoldHeadsFun, true, - false, + false, false, false, false @@ -3313,13 +3595,13 @@ folder_cache_test(CacheSize) -> true, lists:usort(KeyHashList2B) == lists:usort(KeyHashList2D) ), - - CheckSplitQueryFun = + + CheckSplitQueryFun = fun(SplitInt) -> io:format("Testing SplitInt ~w~n", [SplitInt]), SplitIntEnd = list_to_binary("Key" ++ integer_to_list(SplitInt) ++ "|"), - SplitIntStart = + SplitIntStart = list_to_binary("Key" ++ integer_to_list(SplitInt + 1)), {async, HTFolder2E} = book_returnfolder( @@ -3355,18 +3637,22 @@ folder_cache_test(CacheSize) -> } ), KeyHashList2F = return_list_result(HTFolder2F), - + ?assertMatch(true, length(KeyHashList2E) > 0), ?assertMatch(true, length(KeyHashList2F) > 0), - - io:format("Length of 2B ~w 2E ~w 2F ~w~n", - [length(KeyHashList2B), - length(KeyHashList2E), - length(KeyHashList2F)]), + + io:format( + "Length of 2B ~w 2E ~w 2F ~w~n", + [ + length(KeyHashList2B), + length(KeyHashList2E), + length(KeyHashList2F) + ] + ), CompareL = lists:usort(KeyHashList2E ++ KeyHashList2F), ?assertMatch(true, lists:usort(KeyHashList2B) == CompareL) end, - + lists:foreach(CheckSplitQueryFun, [1, 4, 8, 300, 100, 400, 200, 600]), ok = book_close(Bookie1), @@ -3383,29 +3669,34 @@ small_cachesize_test() -> RootPath = reset_filestructure(), {ok, Bookie1} = book_start( - [{ - root_path, RootPath}, + [ + { + root_path, RootPath + }, {max_journalsize, 1000000}, {cache_size, 1} - ]), + ] + ), ok = leveled_bookie:book_close(Bookie1). - is_empty_test() -> RootPath = reset_filestructure(), {ok, Bookie1} = book_start( - [{ - root_path, RootPath}, + [ + { + root_path, RootPath + }, {max_journalsize, 1000000}, {cache_size, 500} - ]), + ] + ), % Put in an object with a TTL in the future Future = leveled_util:integer_now() + 300, ?assertMatch(true, leveled_bookie:book_isempty(Bookie1, ?STD_TAG)), - ok = + ok = book_tempput( - Bookie1, <<"B">>, <<"K">>, {value, <<"V">>}, [], ?STD_TAG, Future + Bookie1, <<"B">>, <<"K">>, {value, <<"V">>}, [], ?STD_TAG, Future ), ?assertMatch(false, leveled_bookie:book_isempty(Bookie1, ?STD_TAG)), ?assertMatch(true, leveled_bookie:book_isempty(Bookie1, ?RIAK_TAG)), @@ -3421,11 +3712,14 @@ is_empty_headonly_test() -> {max_journalsize, 1000000}, {cache_size, 500}, {head_only, no_lookup} - ]), + ] + ), ?assertMatch(true, book_isempty(Bookie1, ?HEAD_TAG)), - ObjSpecs = - [{add, <<"B1">>, <<"K1">>, <<1:8/integer>>, {size, 100}}, - {remove, <<"B1">>, <<"K1">>, <<0:8/integer>>, null}], + ObjSpecs = + [ + {add, <<"B1">>, <<"K1">>, <<1:8/integer>>, {size, 100}}, + {remove, <<"B1">>, <<"K1">>, <<0:8/integer>>, null} + ], ok = book_mput(Bookie1, ObjSpecs), ?assertMatch(false, book_isempty(Bookie1, ?HEAD_TAG)), ok = book_close(Bookie1). @@ -3436,43 +3730,44 @@ undefined_rootpath_test() -> R = gen_server:start(?MODULE, [set_defaults(Opts)], []), ?assertMatch({error, no_root_path}, R), error_logger:tty(true). - + foldkeys_headonly_test() -> foldkeys_headonly_tester(5000, 25, <<"BucketStr">>), foldkeys_headonly_tester(2000, 25, <<"B0">>). foldkeys_headonly_tester(ObjectCount, BlockSize, BStr) -> RootPath = reset_filestructure(), - + {ok, Bookie1} = book_start( - [{root_path, RootPath}, - {max_journalsize, 1000000}, - {cache_size, 500}, - {head_only, no_lookup}] - ), + [ + {root_path, RootPath}, + {max_journalsize, 1000000}, + {cache_size, 500}, + {head_only, no_lookup} + ] + ), GenObjSpecFun = fun(I) -> Key = I rem 6, {add, BStr, <>, <>, null} end, ObjSpecs = lists:map(GenObjSpecFun, lists:seq(1, ObjectCount)), - ObjSpecBlocks = + ObjSpecBlocks = lists:map( fun(I) -> lists:sublist(ObjSpecs, I * BlockSize + 1, BlockSize) end, - lists:seq(0, ObjectCount div BlockSize - 1)), + lists:seq(0, ObjectCount div BlockSize - 1) + ), lists:map(fun(Block) -> book_mput(Bookie1, Block) end, ObjSpecBlocks), ?assertMatch(false, book_isempty(Bookie1, ?HEAD_TAG)), - - FolderT = - {keylist, - ?HEAD_TAG, BStr, - {fun(_B, {K, SK}, Acc) -> [{K, SK}|Acc] end, []} - }, - - Key_SKL_Compare = + + FolderT = + {keylist, ?HEAD_TAG, BStr, + {fun(_B, {K, SK}, Acc) -> [{K, SK} | Acc] end, []}}, + + Key_SKL_Compare = lists:usort( lists:map( fun({add, _B, K, SK, _V}) -> {K, SK} end, ObjSpecs @@ -3485,7 +3780,7 @@ foldkeys_headonly_tester(ObjectCount, BlockSize, BStr) -> end, ok = book_close(Bookie1), - + {ok, Bookie2} = book_start( [ @@ -3493,8 +3788,9 @@ foldkeys_headonly_tester(ObjectCount, BlockSize, BStr) -> {max_journalsize, 1000000}, {cache_size, 500}, {head_only, no_lookup} - ]), - + ] + ), + {async, Folder2} = book_returnfolder(Bookie2, FolderT), case Folder2() of Key_SKL2 when is_list(Key_SKL2) -> @@ -3503,20 +3799,20 @@ foldkeys_headonly_tester(ObjectCount, BlockSize, BStr) -> ok = book_close(Bookie2). - is_empty_stringkey_test() -> RootPath = reset_filestructure(), - {ok, Bookie1} = + {ok, Bookie1} = book_start( [ {root_path, RootPath}, {max_journalsize, 1000000}, {cache_size, 500} - ]), + ] + ), ?assertMatch(true, book_isempty(Bookie1, ?STD_TAG)), Past = leveled_util:integer_now() - 300, ?assertMatch(true, leveled_bookie:book_isempty(Bookie1, ?STD_TAG)), - ok = + ok = book_tempput( Bookie1, <<"B">>, <<"K">>, {value, <<"V">>}, [], ?STD_TAG, Past ), @@ -3527,27 +3823,34 @@ is_empty_stringkey_test() -> scan_table_test() -> K1 = leveled_codec:to_objectkey( - <<"B1">>, <<"K1">>, ?IDX_TAG, <<"F1-bin">>, <<"AA1">>), + <<"B1">>, <<"K1">>, ?IDX_TAG, <<"F1-bin">>, <<"AA1">> + ), K2 = leveled_codec:to_objectkey( - <<"B1">>, <<"K2">>, ?IDX_TAG, <<"F1-bin">>, <<"AA1">>), - K3 = + <<"B1">>, <<"K2">>, ?IDX_TAG, <<"F1-bin">>, <<"AA1">> + ), + K3 = leveled_codec:to_objectkey( - <<"B1">>, <<"K3">>, ?IDX_TAG, <<"F1-bin">>, <<"AB1">>), + <<"B1">>, <<"K3">>, ?IDX_TAG, <<"F1-bin">>, <<"AB1">> + ), K4 = leveled_codec:to_objectkey( - <<"B1">>, <<"K4">>, ?IDX_TAG, <<"F1-bin">>, <<"AA2">>), - K5 = + <<"B1">>, <<"K4">>, ?IDX_TAG, <<"F1-bin">>, <<"AA2">> + ), + K5 = leveled_codec:to_objectkey( - <<"B2">>, <<"K5">>, ?IDX_TAG, <<"F1-bin">>, <<"AA2">>), + <<"B2">>, <<"K5">>, ?IDX_TAG, <<"F1-bin">>, <<"AA2">> + ), Tab0 = ets:new(mem, [ordered_set]), SK_A0 = leveled_codec:to_querykey( - <<"B1">>, null, ?IDX_TAG, <<"F1-bin">>, <<"AA0">>), + <<"B1">>, null, ?IDX_TAG, <<"F1-bin">>, <<"AA0">> + ), EK_A9 = leveled_codec:to_querykey( - <<"B1">>, null, ?IDX_TAG, <<"F1-bin">>, <<"AA9">>), + <<"B1">>, null, ?IDX_TAG, <<"F1-bin">>, <<"AA9">> + ), Empty = {[], infinity, 0}, ?assertMatch(Empty, scan_table(Tab0, SK_A0, EK_A9)), ets:insert(Tab0, [{K1, {1, active, no_lookup, null}}]), @@ -3582,27 +3885,30 @@ erase_journal_test() -> {ok, Bookie1} = book_start( [ - {root_path, RootPath}, - {max_journalsize, 50000}, + {root_path, RootPath}, + {max_journalsize, 50000}, {cache_size, 100} - ]), + ] + ), ObjL1 = generate_multiple_objects(500, 1), % Put in all the objects with a TTL in the future lists:foreach( fun({K, V, S}) -> ok = book_put(Bookie1, <<"Bucket">>, K, V, S, ?STD_TAG) end, - ObjL1), + ObjL1 + ), lists:foreach( fun({K, V, _S}) -> {ok, V} = book_get(Bookie1, <<"Bucket">>, K, ?STD_TAG) end, - ObjL1), - + ObjL1 + ), + CheckHeadFun = - fun(Book) -> + fun(Book) -> fun({K, _V, _S}, Acc) -> - case book_head(Book, <<"Bucket">>, K, ?STD_TAG) of + case book_head(Book, <<"Bucket">>, K, ?STD_TAG) of {ok, _Head} -> Acc; not_found -> Acc + 1 end @@ -3616,10 +3922,10 @@ erase_journal_test() -> leveled_inker:clean_testdir(RootPath ++ "/" ++ ?JOURNAL_FP), {ok, Bookie2} = book_start([ - {root_path, RootPath}, - {max_journalsize, 5000}, - {cache_size, 100}] - ), + {root_path, RootPath}, + {max_journalsize, 5000}, + {cache_size, 100} + ]), HeadsNotFound2 = lists:foldl(CheckHeadFun(Bookie2), 0, ObjL1), ?assertMatch(500, HeadsNotFound2), ok = book_destroy(Bookie2). @@ -3630,35 +3936,41 @@ sqnorder_fold_test() -> book_start([ {root_path, RootPath}, {max_journalsize, 1000000}, - {cache_size, 500}] - ), + {cache_size, 500} + ]), ok = book_put(Bookie1, <<"B">>, <<"K1">>, {value, <<"V1">>}, [], ?STD_TAG), ok = book_put(Bookie1, <<"B">>, <<"K2">>, {value, <<"V2">>}, [], ?STD_TAG), - + FoldObjectsFun = fun(B, K, V, Acc) -> Acc ++ [{B, K, V}] end, {async, ObjFPre} = book_objectfold( - Bookie1, ?STD_TAG, {FoldObjectsFun, []}, true, sqn_order), + Bookie1, ?STD_TAG, {FoldObjectsFun, []}, true, sqn_order + ), {async, ObjFPost} = book_objectfold( - Bookie1, ?STD_TAG, {FoldObjectsFun, []}, false, sqn_order), - + Bookie1, ?STD_TAG, {FoldObjectsFun, []}, false, sqn_order + ), + ok = book_put(Bookie1, <<"B">>, <<"K3">>, {value, <<"V3">>}, [], ?STD_TAG), ObjLPre = ObjFPre(), ?assertMatch( - [{<<"B">>, <<"K1">>, {value, <<"V1">>}}, - {<<"B">>, <<"K2">>, {value, <<"V2">>}}], + [ + {<<"B">>, <<"K1">>, {value, <<"V1">>}}, + {<<"B">>, <<"K2">>, {value, <<"V2">>}} + ], ObjLPre ), ObjLPost = ObjFPost(), ?assertMatch( - [{<<"B">>, <<"K1">>, {value, <<"V1">>}}, + [ + {<<"B">>, <<"K1">>, {value, <<"V1">>}}, {<<"B">>, <<"K2">>, {value, <<"V2">>}}, - {<<"B">>, <<"K3">>, {value, <<"V3">>}}], + {<<"B">>, <<"K3">>, {value, <<"V3">>}} + ], ObjLPost ), - + ok = book_destroy(Bookie1). sqnorder_mutatefold_test() -> @@ -3671,22 +3983,24 @@ sqnorder_mutatefold_test() -> ]), ok = book_put(Bookie1, <<"B">>, <<"K1">>, {value, <<"V1">>}, [], ?STD_TAG), ok = book_put(Bookie1, <<"B">>, <<"K1">>, {value, <<"V2">>}, [], ?STD_TAG), - + FoldObjectsFun = fun(B, K, V, Acc) -> Acc ++ [{B, K, V}] end, {async, ObjFPre} = book_objectfold( - Bookie1, ?STD_TAG, {FoldObjectsFun, []}, true, sqn_order), + Bookie1, ?STD_TAG, {FoldObjectsFun, []}, true, sqn_order + ), {async, ObjFPost} = book_objectfold( - Bookie1, ?STD_TAG, {FoldObjectsFun, []}, false, sqn_order), - + Bookie1, ?STD_TAG, {FoldObjectsFun, []}, false, sqn_order + ), + ok = book_put(Bookie1, <<"B">>, <<"K1">>, {value, <<"V3">>}, [], ?STD_TAG), ObjLPre = ObjFPre(), ?assertMatch([{<<"B">>, <<"K1">>, {value, <<"V2">>}}], ObjLPre), ObjLPost = ObjFPost(), ?assertMatch([{<<"B">>, <<"K1">>, {value, <<"V3">>}}], ObjLPost), - + ok = book_destroy(Bookie1). check_notfound_test() -> @@ -3699,16 +4013,17 @@ check_notfound_test() -> Freq0 end, 100, - lists:seq(1, 5000)), - % 5000 as needs to be a lot as doesn't decrement - % when random interval is not hit + lists:seq(1, 5000) + ), + % 5000 as needs to be a lot as doesn't decrement + % when random interval is not hit ?assertMatch(?MIN_KEYCHECK_FREQUENCY, MinFreq), - + ?assertMatch( - {true, ?MAX_KEYCHECK_FREQUENCY}, + {true, ?MAX_KEYCHECK_FREQUENCY}, check_notfound(?MAX_KEYCHECK_FREQUENCY, MissingFun) ), - + ?assertMatch({false, 0}, check_notfound(0, MissingFun)). -endif. diff --git a/src/leveled_cdb.erl b/src/leveled_cdb.erl index f8e3860f..57db6bee 100644 --- a/src/leveled_cdb.erl +++ b/src/leveled_cdb.erl @@ -48,50 +48,58 @@ -behaviour(gen_statem). -include("leveled.hrl"). --export([init/1, - callback_mode/0, - terminate/3, - code_change/4]). +-export([ + init/1, + callback_mode/0, + terminate/3, + code_change/4 +]). %% states --export([starting/3, - writer/3, - rolling/3, - reader/3, - delete_pending/3]). - --export([cdb_open_writer/1, - cdb_open_writer/2, - cdb_open_reader/1, - cdb_open_reader/2, - cdb_reopen_reader/3, - cdb_get/2, - cdb_put/3, - cdb_put/4, - cdb_mput/2, - cdb_getpositions/2, - cdb_directfetch/3, - cdb_lastkey/1, - cdb_firstkey/1, - cdb_filename/1, - cdb_keycheck/2, - cdb_scan/4, - cdb_close/1, - cdb_complete/1, - cdb_roll/1, - cdb_returnhashtable/3, - cdb_checkhashtable/1, - cdb_destroy/1, - cdb_deletepending/1, - cdb_deletepending/3, - cdb_isrolling/1, - cdb_clerkcomplete/1, - cdb_getcachedscore/2, - cdb_putcachedscore/2, - cdb_deleteconfirmed/1]). - --export([finished_rolling/1, - hashtable_calc/2]). +-export([ + starting/3, + writer/3, + rolling/3, + reader/3, + delete_pending/3 +]). + +-export([ + cdb_open_writer/1, + cdb_open_writer/2, + cdb_open_reader/1, + cdb_open_reader/2, + cdb_reopen_reader/3, + cdb_get/2, + cdb_put/3, + cdb_put/4, + cdb_mput/2, + cdb_getpositions/2, + cdb_directfetch/3, + cdb_lastkey/1, + cdb_firstkey/1, + cdb_filename/1, + cdb_keycheck/2, + cdb_scan/4, + cdb_close/1, + cdb_complete/1, + cdb_roll/1, + cdb_returnhashtable/3, + cdb_checkhashtable/1, + cdb_destroy/1, + cdb_deletepending/1, + cdb_deletepending/3, + cdb_isrolling/1, + cdb_clerkcomplete/1, + cdb_getcachedscore/2, + cdb_putcachedscore/2, + cdb_deleteconfirmed/1 +]). + +-export([ + finished_rolling/1, + hashtable_calc/2 +]). -define(DWORD_SIZE, 8). -define(MAX_FILE_SIZE, 3221225472). @@ -101,45 +109,47 @@ -define(DELETE_TIMEOUT, 10000). -define(GETPOS_FACTOR, 8). -define(MAX_OBJECT_SIZE, 1000000000). - % 1GB but really should be much smaller than this +% 1GB but really should be much smaller than this -define(MEGA, 1000000). -define(CACHE_LIFE, 86400). --record(state, {hashtree, - last_position :: integer() | undefined, - % defined when writing, not required once rolled - last_key = empty, - current_count = 0 :: non_neg_integer(), - hash_index = {} :: tuple(), - filename :: string() | undefined, - % defined when starting - handle :: file:io_device() | undefined, - % defined when starting - max_size :: pos_integer(), - max_count :: pos_integer(), - binary_mode = false :: boolean(), - delete_point = 0 :: integer(), - inker :: pid() | undefined, - % undefined until delete_pending - deferred_delete = false :: boolean(), - waste_path :: string()|undefined, - % undefined has functional meaning - % - no sending to waste on delete - sync_strategy = none, - log_options = leveled_log:get_opts() - :: leveled_log:log_options(), - cached_score :: {float(), erlang:timestamp()}|undefined, - monitor = {no_monitor, 0} :: leveled_monitor:monitor()}). +-record(state, { + hashtree, + last_position :: integer() | undefined, + % defined when writing, not required once rolled + last_key = empty, + current_count = 0 :: non_neg_integer(), + hash_index = {} :: tuple(), + filename :: string() | undefined, + % defined when starting + handle :: file:io_device() | undefined, + % defined when starting + max_size :: pos_integer(), + max_count :: pos_integer(), + binary_mode = false :: boolean(), + delete_point = 0 :: integer(), + inker :: pid() | undefined, + % undefined until delete_pending + deferred_delete = false :: boolean(), + waste_path :: string() | undefined, + % undefined has functional meaning + % - no sending to waste on delete + sync_strategy = none, + log_options = leveled_log:get_opts() :: + leveled_log:log_options(), + cached_score :: {float(), erlang:timestamp()} | undefined, + monitor = {no_monitor, 0} :: leveled_monitor:monitor() +}). -type cdb_options() :: #cdb_options{}. -type hashtable_index() :: tuple(). --type file_location() :: integer()|eof. +-type file_location() :: integer() | eof. +-type extract_fun() :: fun((binary()) -> any()). +%% erlfmt:ignore - issues with editors when function definitions are split -type filter_fun() :: - fun((any(), - binary(), - integer(), - term()|{term(), term()}, - fun((binary()) -> any())) -> {stop|loop, any()}). + fun((any(), binary(), integer(), term() | {term(), term()}, extract_fun()) -> + {stop | loop, any()} + ). -export_type([filter_fun/0]). @@ -152,7 +162,7 @@ %% Open a file for writing using default options cdb_open_writer(Filename) -> %% No options passed - cdb_open_writer(Filename, #cdb_options{binary_mode=true}). + cdb_open_writer(Filename, #cdb_options{binary_mode = true}). -spec cdb_open_writer(string(), cdb_options()) -> {ok, pid()}. %% @doc @@ -176,12 +186,16 @@ cdb_open_writer(Filename, Opts) -> %% determine when scans over a file have completed. cdb_reopen_reader(Filename, LastKey, CDBopts) -> {ok, Pid} = - gen_statem:start_link(?MODULE, - [CDBopts#cdb_options{binary_mode=true}], - []), - ok = gen_statem:call(Pid, - {open_reader, Filename, LastKey}, - infinity), + gen_statem:start_link( + ?MODULE, + [CDBopts#cdb_options{binary_mode = true}], + [] + ), + ok = gen_statem:call( + Pid, + {open_reader, Filename, LastKey}, + infinity + ), {ok, Pid}. -spec cdb_open_reader(string()) -> {ok, pid()}. @@ -190,7 +204,7 @@ cdb_reopen_reader(Filename, LastKey, CDBopts) -> %% Don't use this if the LastKey is known, as this requires an expensive scan %% to discover the LastKey. cdb_open_reader(Filename) -> - cdb_open_reader(Filename, #cdb_options{binary_mode=true}). + cdb_open_reader(Filename, #cdb_options{binary_mode = true}). -spec cdb_open_reader(string(), #cdb_options{}) -> {ok, pid()}. %% @doc @@ -203,13 +217,13 @@ cdb_open_reader(Filename, Opts) -> ok = gen_statem:call(Pid, {open_reader, Filename}, infinity), {ok, Pid}. --spec cdb_get(pid(), any()) -> {any(), any()}|missing. +-spec cdb_get(pid(), any()) -> {any(), any()} | missing. %% @doc %% Extract a Key and Value from a CDB file by passing in a Key. cdb_get(Pid, Key) -> gen_statem:call(Pid, {get_kv, Key}, infinity). --spec cdb_put(pid(), any(), any()) -> ok|roll. +-spec cdb_put(pid(), any(), any()) -> ok | roll. %% @doc %% Put a key and value into a cdb file that is open as a writer, will fail %% if the FSM is in any other state. @@ -220,14 +234,14 @@ cdb_get(Pid, Key) -> cdb_put(Pid, Key, Value) -> cdb_put(Pid, Key, Value, false). --spec cdb_put(pid(), any(), any(), boolean()) -> ok|roll. +-spec cdb_put(pid(), any(), any(), boolean()) -> ok | roll. %% @doc %% See cdb_put/3. Addition of force-sync option, to be used when sync mode is %% none to force a sync to disk on this particlar put. cdb_put(Pid, Key, Value, Sync) -> gen_statem:call(Pid, {put_kv, Key, Value, Sync}, infinity). --spec cdb_mput(pid(), list()) -> ok|roll. +-spec cdb_mput(pid(), list()) -> ok | roll. %% @doc %% Add multiple keys and values in one call. The file will request a roll if %% all of the keys and values cnanot be written (and in this case none of them @@ -238,7 +252,7 @@ cdb_put(Pid, Key, Value, Sync) -> cdb_mput(Pid, KVList) -> gen_statem:call(Pid, {mput_kv, KVList}, infinity). --spec cdb_getpositions(pid(), integer()|all) -> list(). +-spec cdb_getpositions(pid(), integer() | all) -> list(). %% @doc %% Get the positions in the file of a random sample of Keys. cdb_directfetch %% can then be used to fetch those keys. SampleSize can be an integer or the @@ -279,11 +293,14 @@ cdb_getpositions(Pid, SampleSize) -> end. cdb_getpositions_fromidx(Pid, SampleSize, Index, Acc) -> - gen_statem:call(Pid, - {get_positions, SampleSize, Index, Acc}, infinity). + gen_statem:call( + Pid, + {get_positions, SampleSize, Index, Acc}, + infinity + ). --spec cdb_directfetch(pid(), list(), key_only|key_size|key_value_check) -> - list(). +-spec cdb_directfetch(pid(), list(), key_only | key_size | key_value_check) -> + list(). %% @doc %% Info can be key_only, key_size (size being the size of the value) or %% key_value_check (with the check part indicating if the CRC is correct for @@ -346,7 +363,7 @@ cdb_deletepending(Pid) -> % Only used in unit tests cdb_deletepending(Pid, 0, no_poll). --spec cdb_deletepending(pid(), integer(), pid()|no_poll) -> ok. +-spec cdb_deletepending(pid(), integer(), pid() | no_poll) -> ok. %% @doc %% Puts the file in a delete_pending state. From that state the Inker will be %% polled to discover if the Manifest SQN at which the file is deleted now @@ -358,8 +375,9 @@ cdb_deletepending(Pid, ManSQN, Inker) -> gen_statem:cast(Pid, {delete_pending, ManSQN, Inker}). -spec cdb_scan( - pid(), filter_fun(), any(), integer()|undefined) - -> {integer()|eof, any()}. + pid(), filter_fun(), any(), integer() | undefined +) -> + {integer() | eof, any()}. %% @doc %% cdb_scan returns {LastPosition, Acc}. Use LastPosition as StartPosiiton to %% continue from that point (calling function has to protect against) double @@ -368,11 +386,13 @@ cdb_deletepending(Pid, ManSQN, Inker) -> %% LastPosition could be the atom complete when the last key processed was at %% the end of the file. last_key must be defined in LoopState. cdb_scan(Pid, FilterFun, InitAcc, StartPosition) -> - gen_statem:call(Pid, - {cdb_scan, FilterFun, InitAcc, StartPosition}, - infinity). + gen_statem:call( + Pid, + {cdb_scan, FilterFun, InitAcc, StartPosition}, + infinity + ). --spec cdb_lastkey(pid()) -> leveled_codec:journal_key()|empty. +-spec cdb_lastkey(pid()) -> leveled_codec:journal_key() | empty. %% @doc %% Get the last key to be added to the file (which will have the highest %% sequence number) @@ -389,7 +409,7 @@ cdb_firstkey(Pid) -> cdb_filename(Pid) -> gen_statem:call(Pid, cdb_filename, infinity). --spec cdb_keycheck(pid(), any()) -> probably|missing. +-spec cdb_keycheck(pid(), any()) -> probably | missing. %% @doc %% Check to see if the key is probably present, will return either %% probably or missing. Does not do a definitive check @@ -410,21 +430,18 @@ cdb_isrolling(Pid) -> cdb_clerkcomplete(Pid) -> gen_statem:cast(Pid, clerk_complete). --spec cdb_getcachedscore(pid(), erlang:timestamp()) -> undefined|float(). +-spec cdb_getcachedscore(pid(), erlang:timestamp()) -> undefined | float(). %% @doc %% Return the cached score for a CDB file cdb_getcachedscore(Pid, Now) -> gen_statem:call(Pid, {get_cachedscore, Now}, infinity). - -spec cdb_putcachedscore(pid(), float()) -> ok. %% @doc %% Return the cached score for a CDB file cdb_putcachedscore(Pid, Score) -> gen_statem:call(Pid, {put_cachedscore, Score}, infinity). - - %%%============================================================================ %%% gen_server callbacks %%%============================================================================ @@ -444,15 +461,15 @@ init([Opts]) -> MC -> MC end, - {ok, - starting, - #state{max_size=MaxSize, - max_count=MaxCount, - binary_mode=Opts#cdb_options.binary_mode, - waste_path=Opts#cdb_options.waste_path, - sync_strategy=Opts#cdb_options.sync_strategy, - log_options=Opts#cdb_options.log_options, - monitor=Opts#cdb_options.monitor}}. + {ok, starting, #state{ + max_size = MaxSize, + max_count = MaxCount, + binary_mode = Opts#cdb_options.binary_mode, + waste_path = Opts#cdb_options.waste_path, + sync_strategy = Opts#cdb_options.sync_strategy, + log_options = Opts#cdb_options.log_options, + monitor = Opts#cdb_options.monitor + }}. callback_mode() -> state_functions. @@ -464,62 +481,75 @@ starting({call, From}, {open_writer, Filename}, State) -> {WriteOps, UpdStrategy} = set_writeops(State#state.sync_strategy), leveled_log:log(cdb13, [WriteOps]), {ok, Handle} = file:open(Filename, WriteOps), - State0 = State#state{handle=Handle, - current_count = size_hashtree(HashTree), - sync_strategy = UpdStrategy, - last_position=LastPosition, - last_key=LastKey, - filename=Filename, - hashtree=HashTree}, + State0 = State#state{ + handle = Handle, + current_count = size_hashtree(HashTree), + sync_strategy = UpdStrategy, + last_position = LastPosition, + last_key = LastKey, + filename = Filename, + hashtree = HashTree + }, {next_state, writer, State0, [{reply, From, ok}, hibernate]}; starting({call, From}, {open_reader, Filename}, State) -> leveled_log:save(State#state.log_options), leveled_log:log(cdb02, [Filename]), {Handle, Index, LastKey} = open_for_readonly(Filename, false), - State0 = State#state{handle=Handle, - last_key=LastKey, - filename=Filename, - hash_index=Index}, + State0 = State#state{ + handle = Handle, + last_key = LastKey, + filename = Filename, + hash_index = Index + }, {next_state, reader, State0, [{reply, From, ok}, hibernate]}; starting({call, From}, {open_reader, Filename, LastKey}, State) -> leveled_log:save(State#state.log_options), leveled_log:log(cdb02, [Filename]), {Handle, Index, LastKey} = open_for_readonly(Filename, LastKey), - State0 = State#state{handle=Handle, - last_key=LastKey, - filename=Filename, - hash_index=Index}, + State0 = State#state{ + handle = Handle, + last_key = LastKey, + filename = Filename, + hash_index = Index + }, {next_state, reader, State0, [{reply, From, ok}, hibernate]}. - writer( - {call, From}, {get_kv, Key}, State = #state{handle =IO}) - when ?IS_DEF(IO) -> - {keep_state_and_data, - [{reply, - From, + {call, From}, {get_kv, Key}, State = #state{handle = IO} +) when + ?IS_DEF(IO) +-> + {keep_state_and_data, [ + {reply, From, get_mem( Key, IO, State#state.hashtree, - State#state.binary_mode)}]}; + State#state.binary_mode + )} + ]}; writer( - {call, From}, {key_check, Key}, State = #state{handle =IO}) - when ?IS_DEF(IO) -> - {keep_state_and_data, - [{reply, - From, + {call, From}, {key_check, Key}, State = #state{handle = IO} +) when + ?IS_DEF(IO) +-> + {keep_state_and_data, [ + {reply, From, get_mem( Key, IO, State#state.hashtree, State#state.binary_mode, - loose_presence)}]}; + loose_presence + )} + ]}; writer( {call, From}, {put_kv, Key, Value, Sync}, - State = #state{last_position = LP, handle = IO}) - when ?IS_DEF(last_position), ?IS_DEF(IO) -> + State = #state{last_position = LP, handle = IO} +) when + ?IS_DEF(last_position), ?IS_DEF(IO) +-> NewCount = State#state.current_count + 1, case NewCount >= State#state.max_count of true -> @@ -551,11 +581,12 @@ writer( end, {keep_state, State#state{ - handle=UpdHandle, - current_count=NewCount, - last_position=NewPosition, - last_key=Key, - hashtree=HashTree}, + handle = UpdHandle, + current_count = NewCount, + last_position = NewPosition, + last_key = Key, + hashtree = HashTree + }, [{reply, From, ok}]} end end; @@ -564,19 +595,21 @@ writer({call, From}, {mput_kv, []}, _State) -> writer( {call, From}, {mput_kv, KVList}, - State = #state{last_position = LP, handle = IO}) - when ?IS_DEF(last_position), ?IS_DEF(IO) -> + State = #state{last_position = LP, handle = IO} +) when + ?IS_DEF(last_position), ?IS_DEF(IO) +-> NewCount = State#state.current_count + length(KVList), TooMany = NewCount >= State#state.max_count, NotEmpty = State#state.current_count > 0, case (TooMany and NotEmpty) of true -> - {keep_state_and_data, [{reply, From, roll}]}; + {keep_state_and_data, [{reply, From, roll}]}; false -> Result = mput( IO, - KVList, + KVList, {LP, State#state.hashtree}, State#state.binary_mode, State#state.max_size @@ -588,66 +621,83 @@ writer( {UpdHandle, NewPosition, HashTree, LastKey} -> {keep_state, State#state{ - handle=UpdHandle, - current_count=NewCount, - last_position=NewPosition, - last_key=LastKey, - hashtree=HashTree}, + handle = UpdHandle, + current_count = NewCount, + last_position = NewPosition, + last_key = LastKey, + hashtree = HashTree + }, [{reply, From, ok}]} end end; writer( - {call, From}, cdb_complete, State = #state{filename = FN}) - when ?IS_DEF(FN) -> + {call, From}, cdb_complete, State = #state{filename = FN} +) when + ?IS_DEF(FN) +-> NewName = determine_new_filename(FN), - ok = close_file(State#state.handle, - State#state.hashtree, - State#state.last_position), + ok = close_file( + State#state.handle, + State#state.hashtree, + State#state.last_position + ), ok = rename_for_read(FN, NewName), {stop_and_reply, normal, [{reply, From, {ok, NewName}}]}; writer({call, From}, Event, State) -> handle_sync_event(Event, From, State); writer( - cast, cdb_roll, State = #state{last_position = LP}) - when ?IS_DEF(LP) -> - ok = + cast, cdb_roll, State = #state{last_position = LP} +) when + ?IS_DEF(LP) +-> + ok = leveled_iclerk:clerk_hashtablecalc( - State#state.hashtree, LP, self()), + State#state.hashtree, LP, self() + ), {next_state, rolling, State}. - rolling( - {call, From}, {get_kv, Key}, State = #state{handle = IO}) - when ?IS_DEF(IO) -> - {keep_state_and_data, - [{reply, - From, + {call, From}, {get_kv, Key}, State = #state{handle = IO} +) when + ?IS_DEF(IO) +-> + {keep_state_and_data, [ + {reply, From, get_mem( Key, IO, State#state.hashtree, - State#state.binary_mode)}]}; + State#state.binary_mode + )} + ]}; rolling( - {call, From}, {key_check, Key}, State = #state{handle = IO}) - when ?IS_DEF(IO) -> - {keep_state_and_data, - [{reply, - From, + {call, From}, {key_check, Key}, State = #state{handle = IO} +) when + ?IS_DEF(IO) +-> + {keep_state_and_data, [ + {reply, From, get_mem( Key, IO, State#state.hashtree, State#state.binary_mode, - loose_presence)}]}; -rolling({call, From}, - {get_positions, _SampleSize, _Index, SampleAcc}, - _State) -> + loose_presence + )} + ]}; +rolling( + {call, From}, + {get_positions, _SampleSize, _Index, SampleAcc}, + _State +) -> {keep_state_and_data, [{reply, From, SampleAcc}]}; rolling( {call, From}, {return_hashtable, IndexList, HashTreeBin}, - State = #state{filename = FN}) - when ?IS_DEF(FN) -> + State = #state{filename = FN} +) when + ?IS_DEF(FN) +-> SW = os:timestamp(), Handle = State#state.handle, {ok, BasePos} = file:position(Handle, State#state.last_position), @@ -660,10 +710,12 @@ rolling( ets:delete(State#state.hashtree), {NewHandle, Index, LastKey} = open_for_readonly(NewName, State#state.last_key), - State0 = State#state{handle=NewHandle, - last_key=LastKey, - filename=NewName, - hash_index=Index}, + State0 = State#state{ + handle = NewHandle, + last_key = LastKey, + filename = NewName, + hash_index = Index + }, case State#state.deferred_delete of true -> {next_state, delete_pending, State0, [{reply, From, ok}]}; @@ -678,12 +730,15 @@ rolling({call, From}, cdb_isrolling, _State) -> rolling({call, From}, Event, State) -> handle_sync_event(Event, From, State); rolling(cast, {delete_pending, ManSQN, Inker}, State) -> - {keep_state, - State#state{delete_point=ManSQN, inker=Inker, deferred_delete=true}}. + {keep_state, State#state{ + delete_point = ManSQN, inker = Inker, deferred_delete = true + }}. reader( - {call, From}, {get_kv, Key}, State = #state{handle = IO}) - when ?IS_DEF(IO) -> + {call, From}, {get_kv, Key}, State = #state{handle = IO} +) when + ?IS_DEF(IO) +-> Result = get_withcache( IO, @@ -695,12 +750,14 @@ reader( {keep_state_and_data, [{reply, From, Result}]}; reader({call, From}, {key_check, Key}, State) -> Result = - get_withcache(State#state.handle, - Key, - State#state.hash_index, - loose_presence, - State#state.binary_mode, - {no_monitor, 0}), + get_withcache( + State#state.handle, + Key, + State#state.hash_index, + loose_presence, + State#state.binary_mode, + {no_monitor, 0} + ), {keep_state_and_data, [{reply, From, Result}]}; reader({call, From}, {get_positions, SampleSize, Index, Acc}, State) -> {Pos, Count} = element(Index + 1, State#state.hash_index), @@ -709,14 +766,17 @@ reader({call, From}, {get_positions, SampleSize, Index, Acc}, State) -> all -> {keep_state_and_data, [{reply, From, UpdAcc}]}; _ -> - {keep_state_and_data, - [{reply, From, lists:sublist(UpdAcc, SampleSize)}]} + {keep_state_and_data, [ + {reply, From, lists:sublist(UpdAcc, SampleSize)} + ]} end; reader( {call, From}, {direct_fetch, PositionList, Info}, - State = #state{handle = IO}) - when ?IS_DEF(IO) -> + State = #state{handle = IO} +) when + ?IS_DEF(IO) +-> FilterFalseKey = fun(Tpl) -> case element(1, Tpl) of @@ -732,17 +792,17 @@ reader( FM = lists:filtermap( fun(P) -> - FilterFalseKey(extract_key(IO, P)) - end, - PositionList - ), + FilterFalseKey(extract_key(IO, P)) + end, + PositionList + ), MapFun = fun(T) -> element(1, T) end, - {keep_state_and_data, - [{reply, From, lists:map(MapFun, FM)}]}; + {keep_state_and_data, [{reply, From, lists:map(MapFun, FM)}]}; key_size -> FilterFun = fun(P) -> FilterFalseKey(extract_key_size(IO, P)) end, - {keep_state_and_data, - [{reply, From, lists:filtermap(FilterFun, PositionList)}]}; + {keep_state_and_data, [ + {reply, From, lists:filtermap(FilterFun, PositionList)} + ]}; key_value_check -> BM = State#state.binary_mode, MapFun = fun(P) -> extract_key_value_check(IO, P, BM) end, @@ -755,31 +815,32 @@ reader( {keep_state_and_data, []} end; reader( - {call, From}, cdb_complete, State = #state{filename = FN, handle = IO}) - when ?IS_DEF(FN), ?IS_DEF(IO) -> + {call, From}, cdb_complete, State = #state{filename = FN, handle = IO} +) when + ?IS_DEF(FN), ?IS_DEF(IO) +-> leveled_log:log(cdb05, [FN, reader, cdb_ccomplete]), ok = file:close(IO), - {stop_and_reply, normal, - [{reply, From, {ok, FN}}], - State#state{handle=undefined}}; + {stop_and_reply, normal, [{reply, From, {ok, FN}}], State#state{ + handle = undefined + }}; reader({call, From}, check_hashtable, _State) -> {keep_state_and_data, [{reply, From, true}]}; reader({call, From}, Event, State) -> handle_sync_event(Event, From, State); reader(cast, {delete_pending, 0, no_poll}, State) -> - {next_state, delete_pending, State#state{delete_point=0}}; + {next_state, delete_pending, State#state{delete_point = 0}}; reader(cast, {delete_pending, ManSQN, Inker}, State) -> - {next_state, - delete_pending, - State#state{delete_point=ManSQN, inker=Inker}, - ?DELETE_TIMEOUT}; + {next_state, delete_pending, + State#state{delete_point = ManSQN, inker = Inker}, ?DELETE_TIMEOUT}; reader(cast, clerk_complete, _State) -> {keep_state_and_data, [hibernate]}. - delete_pending( - {call, From}, {get_kv, Key}, State = #state{handle = IO}) - when ?IS_DEF(IO) -> + {call, From}, {get_kv, Key}, State = #state{handle = IO} +) when + ?IS_DEF(IO) +-> Result = get_withcache( IO, @@ -790,8 +851,10 @@ delete_pending( ), {keep_state_and_data, [{reply, From, Result}, ?DELETE_TIMEOUT]}; delete_pending( - {call, From}, {key_check, Key}, State = #state{handle = IO}) - when ?IS_DEF(IO) -> + {call, From}, {key_check, Key}, State = #state{handle = IO} +) when + ?IS_DEF(IO) +-> Result = get_withcache( IO, @@ -803,33 +866,44 @@ delete_pending( ), {keep_state_and_data, [{reply, From, Result}, ?DELETE_TIMEOUT]}; delete_pending( - {call, From}, cdb_close, State = #state{handle = IO, filename = FN}) - when ?IS_DEF(FN), ?IS_DEF(IO) -> + {call, From}, cdb_close, State = #state{handle = IO, filename = FN} +) when + ?IS_DEF(FN), ?IS_DEF(IO) +-> leveled_log:log(cdb05, [FN, delete_pending, cdb_close]), close_pendingdelete(IO, FN, State#state.waste_path), {stop_and_reply, normal, [{reply, From, ok}]}; delete_pending({call, From}, Event, State) -> handle_sync_event(Event, From, State); delete_pending( - cast, delete_confirmed, State = #state{handle = IO, filename = FN}) - when ?IS_DEF(FN), ?IS_DEF(IO) -> + cast, delete_confirmed, State = #state{handle = IO, filename = FN} +) when + ?IS_DEF(FN), ?IS_DEF(IO) +-> leveled_log:log(cdb04, [FN, State#state.delete_point]), close_pendingdelete(IO, FN, State#state.waste_path), {stop, normal}; delete_pending( - cast, destroy, State = #state{handle = IO, filename = FN}) - when ?IS_DEF(FN), ?IS_DEF(IO) -> + cast, destroy, State = #state{handle = IO, filename = FN} +) when + ?IS_DEF(FN), ?IS_DEF(IO) +-> leveled_log:log(cdb05, [FN, delete_pending, destroy]), close_pendingdelete(IO, FN, State#state.waste_path), {stop, normal}; delete_pending( - timeout, _, State=#state{delete_point=ManSQN, handle = IO, filename = FN}) - when ManSQN > 0, ?IS_DEF(FN), ?IS_DEF(IO) -> + timeout, + _, + State = #state{delete_point = ManSQN, handle = IO, filename = FN} +) when + ManSQN > 0, ?IS_DEF(FN), ?IS_DEF(IO) +-> case is_process_alive(State#state.inker) of true -> ok = leveled_inker:ink_confirmdelete( - State#state.inker, ManSQN, self()), + State#state.inker, ManSQN, self() + ), {keep_state_and_data, [?DELETE_TIMEOUT]}; false -> leveled_log:log(cdb04, [FN, ManSQN]), @@ -837,10 +911,11 @@ delete_pending( {stop, normal} end. - handle_sync_event( - {cdb_scan, FilterFun, Acc, StartPos}, From, State = #state{handle = IO}) - when ?IS_DEF(IO) -> + {cdb_scan, FilterFun, Acc, StartPos}, From, State = #state{handle = IO} +) when + ?IS_DEF(IO) +-> {ok, EndPos0} = file:position(IO, eof), {ok, StartPos0} = case StartPos of @@ -852,7 +927,7 @@ handle_sync_event( file:position(IO, StartPos0), MaybeEnd = (check_last_key(State#state.last_key) == empty) or - (StartPos0 >= (EndPos0 - ?DWORD_SIZE)), + (StartPos0 >= (EndPos0 - ?DWORD_SIZE)), {LastPosition, Acc2} = case MaybeEnd of true -> @@ -880,8 +955,10 @@ handle_sync_event( handle_sync_event(cdb_lastkey, From, State) -> {keep_state_and_data, [{reply, From, State#state.last_key}]}; handle_sync_event( - cdb_firstkey, From, State = #state{handle = IO}) - when ?IS_DEF(IO) -> + cdb_firstkey, From, State = #state{handle = IO} +) when + ?IS_DEF(IO) +-> {ok, EOFPos} = file:position(IO, eof), FirstKey = case EOFPos of @@ -912,8 +989,10 @@ handle_sync_event({get_cachedscore, {NowMega, NowSecs, _}}, From, State) -> undefined -> undefined; {Score, {CacheMega, CacheSecs, _}} -> - case (NowMega * ?MEGA + NowSecs) > - (CacheMega * ?MEGA + CacheSecs + ?CACHE_LIFE) of + case + (NowMega * ?MEGA + NowSecs) > + (CacheMega * ?MEGA + CacheSecs + ?CACHE_LIFE) + of true -> undefined; false -> @@ -922,12 +1001,14 @@ handle_sync_event({get_cachedscore, {NowMega, NowSecs, _}}, From, State) -> end, {keep_state_and_data, [{reply, From, ScoreToReturn}]}; handle_sync_event({put_cachedscore, Score}, From, State) -> - {keep_state, - State#state{cached_score = {Score,os:timestamp()}}, - [{reply, From, ok}]}; + {keep_state, State#state{cached_score = {Score, os:timestamp()}}, [ + {reply, From, ok} + ]}; handle_sync_event( - cdb_close, From, _State = #state{handle = IO}) - when ?IS_DEF(IO) -> + cdb_close, From, _State = #state{handle = IO} +) when + ?IS_DEF(IO) +-> file:close(IO), {stop_and_reply, normal, [{reply, From, ok}]}. @@ -937,7 +1018,6 @@ terminate(_Reason, _StateName, _State) -> code_change(_OldVsn, StateName, State, _Extra) -> {ok, StateName, State}. - %%%============================================================================ %%% External functions %%%============================================================================ @@ -959,8 +1039,7 @@ finished_rolling(CDB) -> %%% Internal functions %%%============================================================================ - --spec close_pendingdelete(file:io_device(), list(), list()|undefined) -> ok. +-spec close_pendingdelete(file:io_device(), list(), list() | undefined) -> ok. %% @doc %% If delete is pending - then the close behaviour needs to actuallly delete %% the file @@ -982,7 +1061,8 @@ close_pendingdelete(Handle, Filename, WasteFP) -> leveled_log:log(cdb21, [Filename]) end. --spec set_writeops(sync|riak_sync|none) -> {list(), sync|riak_sync|none}. +-spec set_writeops(sync | riak_sync | none) -> + {list(), sync | riak_sync | none}. %% @doc %% Sync should be used - it is a transaction log - in single node %% implementations. `riak_sync` is a legacy of earlier OTP versions when @@ -1029,17 +1109,30 @@ open_active_file(FileName) when is_list(FileName) -> end, {LastPosition, HashTree, LastKey}. --spec put(file:io_device(), - any(), any(), - {integer(), ets:tid()}, boolean(), integer(), boolean()) - -> roll|{file:io_device(), integer(), ets:tid()}. +-spec put( + file:io_device(), + any(), + any(), + {integer(), ets:tid()}, + boolean(), + integer(), + boolean() +) -> + roll | {file:io_device(), integer(), ets:tid()}. %% @doc %% put(Handle, Key, Value, {LastPosition, HashDict}) -> {NewPosition, KeyDict} %% Append to an active file a new key/value pair returning an updated %% dictionary of Keys and positions. Returns an updated Position %% -put(Handle, Key, Value, {LastPosition, HashTree}, - BinaryMode, MaxSize, IsEmpty) -> +put( + Handle, + Key, + Value, + {LastPosition, HashTree}, + BinaryMode, + MaxSize, + IsEmpty +) -> Bin = key_value_to_record({Key, Value}, BinaryMode), ObjectSize = byte_size(Bin), SizeWithinReason = ObjectSize < ?MAX_OBJECT_SIZE, @@ -1051,42 +1144,51 @@ put(Handle, Key, Value, {LastPosition, HashTree}, if SizeWithinReason -> ok = file:pwrite(Handle, LastPosition, Bin), - {Handle, - PotentialNewSize, + {Handle, PotentialNewSize, put_hashtree(Key, LastPosition, HashTree)} end end. - --spec mput(file:io_device(), - list(tuple()), - {integer(), ets:tid()}, boolean(), integer()) - -> roll|{file:io_device(), integer(), ets:tid(), any()}. +-spec mput( + file:io_device(), + list(tuple()), + {integer(), ets:tid()}, + boolean(), + integer() +) -> + roll | {file:io_device(), integer(), ets:tid(), any()}. %% @doc %% Multiple puts - either all will succeed or it will return roll with non %% succeeding. mput(Handle, KVList, {LastPosition, HashTree0}, BinaryMode, MaxSize) -> - {KPList, Bin, LastKey} = multi_key_value_to_record(KVList, - BinaryMode, - LastPosition), + {KPList, Bin, LastKey} = multi_key_value_to_record( + KVList, + BinaryMode, + LastPosition + ), PotentialNewSize = LastPosition + byte_size(Bin), if PotentialNewSize > MaxSize -> roll; true -> ok = file:pwrite(Handle, LastPosition, Bin), - HashTree1 = lists:foldl(fun({K, P}, Acc) -> - put_hashtree(K, P, Acc) - end, - HashTree0, - KPList), + HashTree1 = lists:foldl( + fun({K, P}, Acc) -> + put_hashtree(K, P, Acc) + end, + HashTree0, + KPList + ), {Handle, PotentialNewSize, HashTree1, LastKey} end. - -spec get_withcache( - file:io_device(), any(), tuple(), boolean(), - leveled_monitor:monitor()) -> missing|probably|tuple(). + file:io_device(), + any(), + tuple(), + boolean(), + leveled_monitor:monitor() +) -> missing | probably | tuple(). %% @doc %% %% Using a cache of the Index array - get a K/V pair from the file using the @@ -1099,14 +1201,14 @@ get_withcache(Handle, Key, Cache, BinaryMode, Monitor) -> get_withcache(Handle, Key, Cache, QuickCheck, BinaryMode, Monitor) -> get(Handle, Key, Cache, QuickCheck, BinaryMode, Monitor). - -spec get( - file:io_device(), - any(), - tuple(), - loose_presence|any(), + file:io_device(), + any(), + tuple(), + loose_presence | any(), boolean(), - leveled_monitor:monitor()) -> tuple()|probably|missing. + leveled_monitor:monitor() +) -> tuple() | probably | missing. %% @doc %% %% Get a K/V pair from the file using the Key. QuickCheck can be set to @@ -1115,8 +1217,9 @@ get_withcache(Handle, Key, Cache, QuickCheck, BinaryMode, Monitor) -> %% that Key) %% %% Timings also passed in and can be updated based on results -get(Handle, Key, Cache, QuickCheck, BinaryMode, Monitor) - when is_tuple(Handle) -> +get(Handle, Key, Cache, QuickCheck, BinaryMode, Monitor) when + is_tuple(Handle) +-> get(Handle, Key, Cache, fun get_index/3, QuickCheck, BinaryMode, Monitor). get(Handle, Key, Cache, CacheFun, QuickCheck, BinaryMode, Monitor) -> @@ -1141,7 +1244,8 @@ get(Handle, Key, Cache, CacheFun, QuickCheck, BinaryMode, Monitor) -> Hash, Key, QuickCheck, - BinaryMode), + BinaryMode + ), {TS1, _SW2} = leveled_monitor:step_time(SW1), maybelog_get_timing(Monitor, TS0, TS1, CycleCount), Result @@ -1150,8 +1254,8 @@ get(Handle, Key, Cache, CacheFun, QuickCheck, BinaryMode, Monitor) -> get_index(_Handle, Index, Cache) -> element(Index + 1, Cache). --spec get_mem(any(), list()|file:io_device(), ets:tid(), boolean()) -> - tuple()|probably|missing. +-spec get_mem(any(), list() | file:io_device(), ets:tid(), boolean()) -> + tuple() | probably | missing. %% @doc %% Get a Key/Value pair from an active CDB file (with no hash table written) get_mem(Key, FNOrHandle, HashTree, BinaryMode) -> @@ -1183,7 +1287,6 @@ hashtable_calc(HashTree, StartPos) -> %% Internal functions %%%%%%%%%%%%%%%%%%%% - determine_new_filename(Filename) -> filename:rootname(Filename, ".pnd") ++ ".cdb". @@ -1192,9 +1295,8 @@ rename_for_read(Filename, NewName) -> leveled_log:log(cdb08, [Filename, NewName, filelib:is_file(NewName)]), file:rename(Filename, NewName). - --spec open_for_readonly(string(), term()) - -> {file:io_device(), hashtable_index(), term()}. +-spec open_for_readonly(string(), term()) -> + {file:io_device(), hashtable_index(), term()}. %% @doc %% Open a CDB file to accept read requests (e.g. key/value lookups) but no %% additions or changes @@ -1210,7 +1312,6 @@ open_for_readonly(Filename, LastKeyKnown) -> end, {Handle, Index, LastKey}. - -spec load_index(file:io_device()) -> hashtable_index(). %% @doc %% The CDB file has at the beginning an index of how many keys are present in @@ -1224,8 +1325,7 @@ load_index(Handle) -> end, list_to_tuple(lists:map(LoadIndexFun, Index)). - --spec find_lastkey(file:io_device(), hashtable_index()) -> empty|term(). +-spec find_lastkey(file:io_device(), hashtable_index()) -> empty | term(). %% @doc %% Function to find the LastKey in the file find_lastkey(Handle, IndexCache) -> @@ -1234,9 +1334,11 @@ find_lastkey(Handle, IndexCache) -> {Pos, Count} = element(Index + 1, IndexCache), scan_index_findlast(Handle, Pos, Count, {LastPos, KeyCount}) end, - {LastPosition, TotalKeys} = lists:foldl(ScanIndexFun, - {0, 0}, - lists:seq(0, 255)), + {LastPosition, TotalKeys} = lists:foldl( + ScanIndexFun, + {0, 0}, + lists:seq(0, 255) + ), case TotalKeys of 0 -> empty; @@ -1246,13 +1348,14 @@ find_lastkey(Handle, IndexCache) -> safe_read_next(Handle, KeyLength, key) end. - scan_index_findlast(Handle, Position, Count, {LastPosition, TotalKeys}) -> {ok, _} = file:position(Handle, Position), MaxPosFun = fun({_Hash, HPos}, MaxPos) -> max(HPos, MaxPos) end, - MaxPos = lists:foldl(MaxPosFun, - LastPosition, - read_next_n_integerpairs(Handle, Count)), + MaxPos = lists:foldl( + MaxPosFun, + LastPosition, + read_next_n_integerpairs(Handle, Count) + ), {MaxPos, TotalKeys + Count}. scan_index_returnpositions(Handle, Position, Count, PosList0) -> @@ -1263,13 +1366,14 @@ scan_index_returnpositions(Handle, Position, Count, PosList0) -> {0, 0} -> PosList; _ -> - [HPosition|PosList] + [HPosition | PosList] end end, - lists:foldl(AddPosFun, - PosList0, - read_next_n_integerpairs(Handle, Count)). - + lists:foldl( + AddPosFun, + PosList0, + read_next_n_integerpairs(Handle, Count) + ). %% Take an active file and write the hash details necessary to close that %% file and roll a new active file if requested. @@ -1282,7 +1386,6 @@ close_file(Handle, HashTree, BasePos) -> ok = write_top_index_table(Handle, BasePos, IndexList), file:close(Handle). - %% Fetch a list of positions by passing a key to the HashTree get_hashtree(Key, HashTree) -> Hash = hash(Key), @@ -1292,19 +1395,20 @@ get_hashtree(Key, HashTree) -> %% Add to hash tree - this is an array of 256 skiplists that contains the Hash %% and position of objects which have been added to an open CDB file put_hashtree(Key, Position, HashTree) -> - Hash = hash(Key), - Index = hash_to_index(Hash), - add_position_tohashtree(HashTree, Index, Hash, Position). + Hash = hash(Key), + Index = hash_to_index(Hash), + add_position_tohashtree(HashTree, Index, Hash, Position). %% Function to extract a Key-Value pair given a file handle and a position %% Will confirm that the key matches and do a CRC check extract_kvpair(_H, [], _K, _BinaryMode) -> missing; -extract_kvpair(Handle, [Position|Rest], Key, BinaryMode) -> +extract_kvpair(Handle, [Position | Rest], Key, BinaryMode) -> {ok, _} = file:position(Handle, Position), {KeyLength, ValueLength} = read_next_2_integers(Handle), case safe_read_next(Handle, KeyLength, keybin) of - {Key, KeyBin} -> % If same key as passed in, then found! + % If same key as passed in, then found! + {Key, KeyBin} -> case checkread_next_value(Handle, ValueLength, KeyBin) of {false, _} -> crc_wonky; @@ -1342,9 +1446,9 @@ extract_key_value_check(Handle, Position, BinaryMode) -> {Key, binary_to_term(Value), true} end. - -spec startup_scan_over_file( - file:io_device(), integer()) -> {integer(), {ets:tid(), term()}}. + file:io_device(), integer() +) -> {integer(), {ets:tid(), term()}}. %% @doc %% Scan through the file until there is a failure to crc check an input, and %% at that point return the position and the key dictionary scanned so far @@ -1368,10 +1472,8 @@ startup_filter(Hashtree) -> end, FilterFun. - --spec scan_over_file - (file:io_device(), integer(), filter_fun(), term(), any()) -> - {file_location(), term()}. +-spec scan_over_file(file:io_device(), integer(), filter_fun(), term(), any()) -> + {file_location(), term()}. %% Scan for key changes - scan over file returning applying FilterFun %% The FilterFun should accept as input: %% - Key, ValueBin, Position, Accumulator, Fun (to extract values from Binary) @@ -1396,14 +1498,18 @@ scan_over_file(Handle, Position, FilterFun, Output, LastKey) -> LastKey -> eof; _ -> - Position + KeyLength + ValueLength - + ?DWORD_SIZE + Position + KeyLength + ValueLength + + ?DWORD_SIZE end, - case FilterFun(Key, - ValueAsBin, - Position, - Output, - fun extract_valueandsize/1) of + case + FilterFun( + Key, + ValueAsBin, + Position, + Output, + fun extract_valueandsize/1 + ) + of {stop, UpdOutput} -> {Position, UpdOutput}; {loop, UpdOutput} -> @@ -1411,16 +1517,17 @@ scan_over_file(Handle, Position, FilterFun, Output, LastKey) -> eof -> {eof, UpdOutput}; _ -> - scan_over_file(Handle, - NewPosition, - FilterFun, - UpdOutput, - LastKey) + scan_over_file( + Handle, + NewPosition, + FilterFun, + UpdOutput, + LastKey + ) end end end. - %% @doc %% Confirm that the last key has been defined and set to a non-default value check_last_key(empty) -> @@ -1429,7 +1536,8 @@ check_last_key(_LK) -> ok. -spec saferead_keyvalue( - file:io_device()) -> false|{any(), binary(), integer(), integer()}. + file:io_device() +) -> false | {any(), binary(), integer(), integer()}. %% @doc %% Read the Key/Value at this point, returning {ok, Key, Value} %% catch expected exceptions associated with file corruption (or end) and @@ -1456,9 +1564,9 @@ saferead_keyvalue(Handle) -> end. -spec safe_read_next - (file:io_device(), integer(), key) -> false|term(); - (file:io_device(), integer(), keybin) -> false|{term(), binary()}; - (file:io_device(), integer(), {value, binary()}) -> false|binary(). + (file:io_device(), integer(), key) -> false | term(); + (file:io_device(), integer(), keybin) -> false | {term(), binary()}; + (file:io_device(), integer(), {value, binary()}) -> false | binary(). %% @doc %% Read the next item of length Length %% Previously catching error:badarg was sufficient to capture errors of @@ -1486,7 +1594,7 @@ safe_read_next(Handle, Length, ReadType) -> false end. --spec crccheck(binary()|bitstring(), binary()) -> any(). +-spec crccheck(binary() | bitstring(), binary()) -> any(). %% @doc %% CRC chaeck the value which should be a binary, where the first four bytes %% are a CRC check. If the binary is truncated, it could be a bitstring or @@ -1498,21 +1606,18 @@ crccheck(<>, KeyBin) when is_binary(KeyBin) -> _ -> leveled_log:log(cdb10, ["mismatch"]), false - end; + end; crccheck(_V, _KB) -> leveled_log:log(cdb10, ["size"]), false. - -spec calc_crc(binary(), binary()) -> integer(). %% @doc %% Do a vaanilla CRC calculation on the binary calc_crc(KeyBin, Value) -> erlang:crc32(<>). - --spec checkread_next_value - (file:io_device(), integer(), binary()) -> - {true, binary()}|{false, crc_wonky}. +-spec checkread_next_value(file:io_device(), integer(), binary()) -> + {true, binary()} | {false, crc_wonky}. %% @doc %% Read next string where the string has a CRC prepended - stripping the crc %% and checking if requested @@ -1529,7 +1634,6 @@ checkread_next_value(Handle, Length, KeyBin) -> extract_valueandsize(ValueAsBin) -> {ValueAsBin, byte_size(ValueAsBin)}. - %% Used for reading lengths with CDB read_next_2_integers(Handle) -> case file:read(Handle, ?DWORD_SIZE) of @@ -1545,16 +1649,20 @@ read_next_n_integerpairs(Handle, NumberOfPairs) -> read_integerpairs(<<>>, Pairs) -> Pairs; -read_integerpairs(<>, Pairs) -> +read_integerpairs( + <>, Pairs +) -> read_integerpairs(<>, Pairs ++ [{Int1, Int2}]). - - -spec search_hash_table( - file:io_device(), tuple(), integer(), any(), - loose_presence|boolean(), boolean()) - -> {pos_integer(), missing|probably|tuple()}. + file:io_device(), + tuple(), + integer(), + any(), + loose_presence | boolean(), + boolean() +) -> + {pos_integer(), missing | probably | tuple()}. %% @doc %% %% Seach the hash table for the matching hash and key. Be prepared for @@ -1564,21 +1672,29 @@ read_integerpairs(< +search_hash_table( + _Handle, + {_, _, TotalSlots, TotalSlots}, + _Hash, + _Key, + _QuickCheck, + _BinaryMode +) -> % We have done the full loop - value must not be present {TotalSlots, missing}; -search_hash_table(Handle, - {FirstHashPosition, Slot, CycleCount, TotalSlots}, - Hash, Key, - QuickCheck, BinaryMode) -> +search_hash_table( + Handle, + {FirstHashPosition, Slot, CycleCount, TotalSlots}, + Hash, + Key, + QuickCheck, + BinaryMode +) -> % Read the next 2 integers at current position, see if it matches the hash % we're after Offset = - ((Slot + CycleCount - 1) rem TotalSlots) * ?DWORD_SIZE - + FirstHashPosition, + ((Slot + CycleCount - 1) rem TotalSlots) * ?DWORD_SIZE + + FirstHashPosition, {ok, _} = file:position(Handle, Offset), case read_next_2_integers(Handle) of @@ -1598,8 +1714,11 @@ search_hash_table(Handle, search_hash_table( Handle, {FirstHashPosition, Slot, CycleCount + 1, TotalSlots}, - Hash, Key, - QuickCheck, BinaryMode); + Hash, + Key, + QuickCheck, + BinaryMode + ); _ -> {CycleCount, KV} end; @@ -1607,24 +1726,29 @@ search_hash_table(Handle, search_hash_table( Handle, {FirstHashPosition, Slot, CycleCount + 1, TotalSlots}, - Hash, Key, - QuickCheck, BinaryMode) + Hash, + Key, + QuickCheck, + BinaryMode + ) end. - -spec maybelog_get_timing( - leveled_monitor:monitor(), - leveled_monitor:timing(), - leveled_monitor:timing(), - pos_integer()) -> ok. + leveled_monitor:monitor(), + leveled_monitor:timing(), + leveled_monitor:timing(), + pos_integer() +) -> ok. maybelog_get_timing( - {Pid, _StatsFreq}, IndexTime, ReadTime, CycleCount) - when is_pid(Pid), is_integer(IndexTime), is_integer(ReadTime) -> + {Pid, _StatsFreq}, IndexTime, ReadTime, CycleCount +) when + is_pid(Pid), is_integer(IndexTime), is_integer(ReadTime) +-> leveled_monitor:add_stat( - Pid, {cdb_get_update, CycleCount, IndexTime, ReadTime}); + Pid, {cdb_get_update, CycleCount, IndexTime, ReadTime} + ); maybelog_get_timing(_Monitor, _IndexTime, _ReadTime, _CC) -> ok. - %% Write the actual hashtables at the bottom of the file. Each hash table %% entry is a doubleword in length. The first word is the hash value @@ -1644,7 +1768,6 @@ perform_write_hash_tables(Handle, HashTreeBin, StartPos) -> leveled_log:log_timer(cdb12, [], SWW), ok. - %% Write the top most 255 doubleword entries. First word is the %% file pointer to a hashtable and the second word is the number of entries %% in the hash table @@ -1659,21 +1782,23 @@ write_top_index_table(Handle, BasePos, IndexList) -> false -> {Pos, Pos + (Count * ?DWORD_SIZE)} end, - {<>, - NextPos} - end, + { + <>, + NextPos + } + end, - {IndexBin, _Pos} = lists:foldl(FnWriteIndex, - {<<>>, BasePos}, - IndexList), + {IndexBin, _Pos} = lists:foldl( + FnWriteIndex, + {<<>>, BasePos}, + IndexList + ), {ok, _} = file:position(Handle, 0), ok = file:write(Handle, IndexBin), ok = file:advise(Handle, 0, ?DWORD_SIZE * 256, will_need), ok. - hash(Key) -> leveled_util:magic_hash(Key). @@ -1688,27 +1813,32 @@ hash_to_slot(Hash, L) -> %% at the front of the value key_value_to_record({Key, Value}, BinaryMode) -> BK = term_to_binary(Key), - BV = case BinaryMode of - true -> - Value; - false -> - term_to_binary(Value) - end, + BV = + case BinaryMode of + true -> + Value; + false -> + term_to_binary(Value) + end, KS = byte_size(BK), VS = byte_size(BV), CRC = calc_crc(BK, BV), - <>. - + <>. multi_key_value_to_record(KVList, BinaryMode, LastPosition) -> - lists:foldl(fun({K, V}, {KPosL, Bin, _LK}) -> - Bin0 = key_value_to_record({K, V}, BinaryMode), - {[{K, byte_size(Bin) + LastPosition}|KPosL], - <>, - K} end, - {[], <<>>, empty}, - KVList). + lists:foldl( + fun({K, V}, {KPosL, Bin, _LK}) -> + Bin0 = key_value_to_record({K, V}, BinaryMode), + { + [{K, byte_size(Bin) + LastPosition} | KPosL], + <>, + K + } + end, + {[], <<>>, empty}, + KVList + ). %%%============================================================================ %%% HashTree Implementation @@ -1720,7 +1850,7 @@ lookup_positions(HashTree, Index, Hash) -> lookup_positions(HashTree, Index, Hash, Pos, PosList) -> case ets:next(HashTree, {Index, Hash, Pos}) of {Index, Hash, NewPos} -> - lookup_positions(HashTree, Index, Hash, NewPos, [NewPos|PosList]); + lookup_positions(HashTree, Index, Hash, NewPos, [NewPos | PosList]); _ -> PosList end. @@ -1741,7 +1871,7 @@ to_list(HashTree, Index) -> to_list(HashTree, Index, {LastHash, LastPos}, Acc) -> case ets:next(HashTree, {Index, LastHash, LastPos}) of {Index, Hash, Pos} -> - to_list(HashTree, Index, {Hash, Pos}, [{Hash, Pos}|Acc]); + to_list(HashTree, Index, {Hash, Pos}, [{Hash, Pos} | Acc]); _ -> Acc end. @@ -1765,37 +1895,42 @@ build_hashtree_binary([], IdxLen, SlotPos, Bin) -> Bin; N when N < IdxLen -> ZeroLen = (IdxLen - N) * 64, - [<<0:ZeroLen>>|Bin] + [<<0:ZeroLen>> | Bin] end; -build_hashtree_binary([{TopSlot, TopBin}|SlotMapTail], IdxLen, SlotPos, Bin) -> +build_hashtree_binary([{TopSlot, TopBin} | SlotMapTail], IdxLen, SlotPos, Bin) -> case TopSlot of N when N > SlotPos -> D = N - SlotPos, Bridge = lists:duplicate(D, <<0:64>>) ++ Bin, - UpdBin = [<>|Bridge], - build_hashtree_binary(SlotMapTail, - IdxLen, - SlotPos + D + 1, - UpdBin); + UpdBin = [<> | Bridge], + build_hashtree_binary( + SlotMapTail, + IdxLen, + SlotPos + D + 1, + UpdBin + ); N when N =< SlotPos, SlotPos < IdxLen -> - UpdBin = [<>|Bin], - build_hashtree_binary(SlotMapTail, - IdxLen, - SlotPos + 1, - UpdBin); + UpdBin = [<> | Bin], + build_hashtree_binary( + SlotMapTail, + IdxLen, + SlotPos + 1, + UpdBin + ); N when N < SlotPos, SlotPos == IdxLen -> % Need to wrap round and put in the first empty slot from the % beginning Pos = find_firstzero(Bin, length(Bin)), - {LHS, [<<0:64>>|RHS]} = lists:split(Pos - 1, Bin), - UpdBin = lists:append(LHS, [TopBin|RHS]), - build_hashtree_binary(SlotMapTail, - IdxLen, - SlotPos, - UpdBin) + {LHS, [<<0:64>> | RHS]} = lists:split(Pos - 1, Bin), + UpdBin = lists:append(LHS, [TopBin | RHS]), + build_hashtree_binary( + SlotMapTail, + IdxLen, + SlotPos, + UpdBin + ) end. - % Search from the tail of the list to find the first zero find_firstzero(Bin, Pos) -> case lists:nth(Pos, Bin) of @@ -1805,17 +1940,30 @@ find_firstzero(Bin, Pos) -> find_firstzero(Bin, Pos - 1) end. - write_hash_tables(Indexes, HashTree, CurrPos) -> write_hash_tables(Indexes, HashTree, CurrPos, CurrPos, [], [], {0, 0, 0}). -write_hash_tables([], _HashTree, _CurrPos, _BasePos, - IndexList, HT_BinList, {T1, T2, T3}) -> +write_hash_tables( + [], + _HashTree, + _CurrPos, + _BasePos, + IndexList, + HT_BinList, + {T1, T2, T3} +) -> leveled_log:log(cdb14, [T1, T2, T3]), IL = lists:reverse(IndexList), {IL, list_to_binary(lists:reverse(HT_BinList))}; -write_hash_tables([Index|Rest], HashTree, CurrPos, BasePos, - IndexList, HT_BinList, Timers) -> +write_hash_tables( + [Index | Rest], + HashTree, + CurrPos, + BasePos, + IndexList, + HT_BinList, + Timers +) -> SW1 = os:timestamp(), SlotMap = to_slotmap(HashTree, Index), T1 = timer:now_diff(os:timestamp(), SW1) + element(1, Timers), @@ -1826,7 +1974,7 @@ write_hash_tables([Index|Rest], HashTree, CurrPos, BasePos, HashTree, CurrPos, BasePos, - [{Index, BasePos, 0}|IndexList], + [{Index, BasePos, 0} | IndexList], HT_BinList, Timers ); @@ -1843,14 +1991,12 @@ write_hash_tables([Index|Rest], HashTree, CurrPos, BasePos, HashTree, CurrPos + IndexLength * ?DWORD_SIZE, BasePos, - [{Index, CurrPos, IndexLength}|IndexList], + [{Index, CurrPos, IndexLength} | IndexList], lists:append(NewSlotBin, HT_BinList), {T1, T2, T3} ) end. - - %%%%%%%%%%%%%%%% % T E S T %%%%%%%%%%%%%%% @@ -1864,7 +2010,7 @@ write_hash_tables([Index|Rest], HashTree, CurrPos, BasePos, % % Returns a dictionary that is keyed by % the least significant 8 bits of each hash with the -% values being a list of the hash and the position of the +% values being a list of the hash and the position of the % key/value binary in the file. write_key_value_pairs(Handle, KeyValueList) -> {ok, Position} = file:position(Handle, cur), @@ -1873,40 +2019,56 @@ write_key_value_pairs(Handle, KeyValueList) -> write_key_value_pairs(_, [], Acc) -> Acc; -write_key_value_pairs(Handle, [HeadPair|TailList], Acc) -> +write_key_value_pairs(Handle, [HeadPair | TailList], Acc) -> {Key, Value} = HeadPair, {Handle, NewPosition, HashTree} = put(Handle, Key, Value, Acc), write_key_value_pairs(Handle, TailList, {NewPosition, HashTree}). get(FileName, Key, BinaryMode) when is_list(FileName) -> - {ok, Handle} = file:open(FileName,[binary, raw, read]), + {ok, Handle} = file:open(FileName, [binary, raw, read]), get(Handle, Key, BinaryMode); get(Handle, Key, BinaryMode) -> get( - Handle, Key, no_cache, fun get_uncached_index/3, - true, BinaryMode, {no_monitor, 0}). + Handle, + Key, + no_cache, + fun get_uncached_index/3, + true, + BinaryMode, + {no_monitor, 0} + ). get_uncached_index(Handle, Index, no_cache) -> - {ok,_} = file:position(Handle, {bof, ?DWORD_SIZE * Index}), + {ok, _} = file:position(Handle, {bof, ?DWORD_SIZE * Index}), % Get location of hashtable and number of entries in the hash read_next_2_integers(Handle). - -file_put(FileName, + +file_put( + FileName, Key, Value, {LastPosition, HashTree}, BinaryMode, MaxSize, - IsEmpty) when is_list(FileName) -> -{ok, Handle} = file:open(FileName, ?WRITE_OPS), -put(Handle, Key, Value, {LastPosition, HashTree}, - BinaryMode, MaxSize, IsEmpty). + IsEmpty +) when is_list(FileName) -> + {ok, Handle} = file:open(FileName, ?WRITE_OPS), + put( + Handle, + Key, + Value, + {LastPosition, HashTree}, + BinaryMode, + MaxSize, + IsEmpty + ). file_get_mem(Key, Filename, HashTree, BinaryMode) -> file_get_mem(Key, Filename, HashTree, BinaryMode, true). -file_get_mem(Key, Filename, HashTree, BinaryMode, QuickCheck) - when is_list(Filename) -> +file_get_mem(Key, Filename, HashTree, BinaryMode, QuickCheck) when + is_list(Filename) +-> {ok, Handle} = file:open(Filename, [binary, raw, read]), get_mem(Key, Handle, HashTree, BinaryMode, QuickCheck). @@ -1919,31 +2081,43 @@ endian_flip(Int) -> %% from_dict(FileName,ListOfKeyValueTuples) %% Given a filename and a dictionary, create a cdb %% using the key value pairs from the dict. -from_dict(FileName,Dict) -> +from_dict(FileName, Dict) -> KeyValueList = dict:to_list(Dict), create(FileName, KeyValueList). - %% %% create(FileName,ListOfKeyValueTuples) -> ok %% Given a filename and a list of {key,value} tuples, %% this function creates a CDB %% -create(FileName,KeyValueList) -> +create(FileName, KeyValueList) -> {ok, Handle} = file:open(FileName, ?WRITE_OPS), {ok, _} = file:position(Handle, {bof, ?BASE_POSITION}), {BasePos, HashTree} = write_key_value_pairs(Handle, KeyValueList), close_file(Handle, HashTree, BasePos). - %% Should not be used for non-test PUTs by the inker - as the Max File Size %% should be taken from the startup options not the default put(FileName, Key, Value, {LastPosition, HashTree}) when is_list(FileName) -> - file_put(FileName, Key, Value, {LastPosition, HashTree}, - ?BINARY_MODE, ?MAX_FILE_SIZE, false); + file_put( + FileName, + Key, + Value, + {LastPosition, HashTree}, + ?BINARY_MODE, + ?MAX_FILE_SIZE, + false + ); put(Handle, Key, Value, {LastPosition, HashTree}) -> - put(Handle, Key, Value, {LastPosition, HashTree}, - ?BINARY_MODE, ?MAX_FILE_SIZE, false). + put( + Handle, + Key, + Value, + {LastPosition, HashTree}, + ?BINARY_MODE, + ?MAX_FILE_SIZE, + false + ). dump(FileName) -> {ok, Handle} = file:open(FileName, [binary, raw, read]), @@ -1952,7 +2126,7 @@ dump(FileName) -> {_, Count} = read_next_2_integers(Handle), Acc + Count end, - NumberOfPairs = lists:foldl(Fn, 0, lists:seq(0,255)) bsr 1, + NumberOfPairs = lists:foldl(Fn, 0, lists:seq(0, 255)) bsr 1, io:format("Count of keys in db is ~w~n", [NumberOfPairs]), {ok, _} = file:position(Handle, {bof, ?BASE_POSITION}), Fn1 = fun(_I, Acc) -> @@ -1964,9 +2138,9 @@ dump(FileName) -> binary_to_term(V0) end, {Key, Value} = get(Handle, Key, false), - [{Key,Value} | Acc] + [{Key, Value} | Acc] end, - lists:foldr(Fn1, [], lists:seq(0, NumberOfPairs-1)). + lists:foldr(Fn1, [], lists:seq(0, NumberOfPairs - 1)). %% %% to_dict(FileName) @@ -1982,16 +2156,17 @@ to_dict(FileName) -> KeyValueList = dump(FileName), dict:from_list(KeyValueList). - build_hashtree_bunchedatend_binary_test() -> - SlotMap = [{1, <<10:32, 0:32>>}, - {4, <<11:32, 100:32>>}, - {8, <<12:32, 200:32>>}, - {8, <<13:32, 300:32>>}, - {14, <<14:32, 400:32>>}, - {14, <<15:32, 500:32>>}, - {15, <<16:32, 600:32>>}, - {15, <<17:32, 700:32>>}], + SlotMap = [ + {1, <<10:32, 0:32>>}, + {4, <<11:32, 100:32>>}, + {8, <<12:32, 200:32>>}, + {8, <<13:32, 300:32>>}, + {14, <<14:32, 400:32>>}, + {14, <<15:32, 500:32>>}, + {15, <<16:32, 600:32>>}, + {15, <<17:32, 700:32>>} + ], Bin = list_to_binary( lists:reverse( @@ -2005,14 +2180,16 @@ build_hashtree_bunchedatend_binary_test() -> ?assertMatch(ExpBin, Bin). build_hashtree_bunchedatstart_binary_test() -> - SlotMap = [{1, <<10:32, 0:32>>}, - {2, <<11:32, 100:32>>}, - {3, <<12:32, 200:32>>}, - {4, <<13:32, 300:32>>}, - {5, <<14:32, 400:32>>}, - {6, <<15:32, 500:32>>}, - {7, <<16:32, 600:32>>}, - {8, <<17:32, 700:32>>}], + SlotMap = [ + {1, <<10:32, 0:32>>}, + {2, <<11:32, 100:32>>}, + {3, <<12:32, 200:32>>}, + {4, <<13:32, 300:32>>}, + {5, <<14:32, 400:32>>}, + {6, <<15:32, 500:32>>}, + {7, <<16:32, 600:32>>}, + {8, <<17:32, 700:32>>} + ], Bin = list_to_binary( lists:reverse( @@ -2027,66 +2204,83 @@ build_hashtree_bunchedatstart_binary_test() -> ?assertMatch(ExpSize, byte_size(Bin)), ?assertMatch(ExpBin, Bin). - build_hashtree_test() -> - SlotMap = [{3, <<2424914688:32, 100:32>>}, - {3, <<2424917760:32, 200:32>>}, - {7, <<2424915712:32, 300:32>>}, - {9, <<2424903936:32, 400:32>>}, - {9, <<2424907008:32, 500:32>>}, - {10, <<2424913408:32, 600:32>>}], + SlotMap = [ + {3, <<2424914688:32, 100:32>>}, + {3, <<2424917760:32, 200:32>>}, + {7, <<2424915712:32, 300:32>>}, + {9, <<2424903936:32, 400:32>>}, + {9, <<2424907008:32, 500:32>>}, + {10, <<2424913408:32, 600:32>>} + ], BinList = build_hashtree_binary(SlotMap, 12), - ExpOut = [<<0:64>>, <<0:64>>, <<0:64>>, <<2424914688:32, 100:32>>] ++ - [<<2424917760:32, 200:32>>, <<0:64>>, <<0:64>>] ++ - [<<2424915712:32, 300:32>>, <<0:64>>] ++ - [<<2424903936:32, 400:32>>, <<2424907008:32, 500:32>>] ++ - [<<2424913408:32, 600:32>>], + ExpOut = + [<<0:64>>, <<0:64>>, <<0:64>>, <<2424914688:32, 100:32>>] ++ + [<<2424917760:32, 200:32>>, <<0:64>>, <<0:64>>] ++ + [<<2424915712:32, 300:32>>, <<0:64>>] ++ + [<<2424903936:32, 400:32>>, <<2424907008:32, 500:32>>] ++ + [<<2424913408:32, 600:32>>], ?assertMatch(ExpOut, lists:reverse(BinList)). - find_firstzero_test() -> - Bin = [<<1:64/integer>>, <<0:64/integer>>, - <<89:64/integer>>, <<89:64/integer>>, - <<0:64/integer>>, - <<71:64/integer>>, <<72:64/integer>>], + Bin = [ + <<1:64/integer>>, + <<0:64/integer>>, + <<89:64/integer>>, + <<89:64/integer>>, + <<0:64/integer>>, + <<71:64/integer>>, + <<72:64/integer>> + ], ?assertMatch(5, find_firstzero(Bin, length(Bin))), - {LHS, [<<0:64>>|RHS]} = lists:split(4, Bin), - ?assertMatch([<<1:64/integer>>, <<0:64/integer>>, - <<89:64/integer>>, <<89:64/integer>>], LHS), + {LHS, [<<0:64>> | RHS]} = lists:split(4, Bin), + ?assertMatch( + [ + <<1:64/integer>>, + <<0:64/integer>>, + <<89:64/integer>>, + <<89:64/integer>> + ], + LHS + ), ?assertMatch([<<71:64/integer>>, <<72:64/integer>>], RHS). - magickey_test() -> {C, L1, L2} = {247, 10, 100}, - % Magic constants - will lead to first hash slot being empty - % prompts potential issue when first hash slot is empty but - % hash is 0 + % Magic constants - will lead to first hash slot being empty + % prompts potential issue when first hash slot is empty but + % hash is 0 MagicKey = - {315781, - stnd, + {315781, stnd, {o_rkv, - <<100,111,109,97,105,110,68,111,99,117,109,101,110,116>>, - <<48,48,48,49,52,54,56,54,51,48,48,48,51,50,49,54,51,51>>, + <<100, 111, 109, 97, 105, 110, 68, 111, 99, 117, 109, 101, 110, + 116>>, + <<48, 48, 48, 49, 52, 54, 56, 54, 51, 48, 48, 48, 51, 50, 49, + 54, 51, 51>>, null}}, ?assertEqual(0, hash(MagicKey)), NotMagicKVGen = fun(I) -> - {{I + C, stnd, {o_rkv, <<"B">>, integer_to_binary(I + C), null}}, - <<"V">>} + { + {I + C, stnd, {o_rkv, <<"B">>, integer_to_binary(I + C), null}}, + <<"V">> + } end, Set1 = lists:map(NotMagicKVGen, lists:seq(1, L1)), Set2 = lists:map(NotMagicKVGen, lists:seq(L1 + 1, L2)), {ok, P1} = - cdb_open_writer("test/test_area/magic_hash.pnd", - #cdb_options{binary_mode=true}), + cdb_open_writer( + "test/test_area/magic_hash.pnd", + #cdb_options{binary_mode = true} + ), ok = cdb_put(P1, MagicKey, <<"MagicV0">>), lists:foreach(fun({K, V}) -> cdb_put(P1, K, V) end, Set1), ok = cdb_put(P1, MagicKey, <<"MagicV1">>), lists:foreach(fun({K, V}) -> cdb_put(P1, K, V) end, Set2), {ok, F2} = cdb_complete(P1), - {ok, P2} = cdb_open_reader(F2, #cdb_options{binary_mode=true}), + {ok, P2} = cdb_open_reader(F2, #cdb_options{binary_mode = true}), {GetK, GetV} = cdb_get(P2, MagicKey), ?assertEqual(<<"MagicV1">>, GetV), @@ -2097,12 +2291,14 @@ magickey_test() -> ok = file:delete("test/test_area/magic_hash.cdb"), {ok, P3} = - cdb_open_writer("test/test_area/magic_hash.pnd", - #cdb_options{binary_mode=true}), + cdb_open_writer( + "test/test_area/magic_hash.pnd", + #cdb_options{binary_mode = true} + ), KVL = Set1 ++ [{MagicKey, <<"MagicV1">>}] ++ Set2, ok = cdb_mput(P3, KVL), {ok, F2} = cdb_complete(P3), - {ok, P4} = cdb_open_reader(F2, #cdb_options{binary_mode=true}), + {ok, P4} = cdb_open_reader(F2, #cdb_options{binary_mode = true}), {GetK, GetV} = cdb_get(P4, MagicKey), ?assertEqual(<<"MagicV1">>, GetV), @@ -2110,74 +2306,96 @@ magickey_test() -> ok = file:delete("test/test_area/magic_hash.cdb"), {ok, P5} = - cdb_open_writer("test/test_area/magic_hash.pnd", - #cdb_options{binary_mode=true}), + cdb_open_writer( + "test/test_area/magic_hash.pnd", + #cdb_options{binary_mode = true} + ), KVL5 = Set1 ++ Set2, ok = cdb_mput(P5, KVL5), {ok, F2} = cdb_complete(P5), - {ok, P6} = cdb_open_reader(F2, #cdb_options{binary_mode=true}), + {ok, P6} = cdb_open_reader(F2, #cdb_options{binary_mode = true}), missing = cdb_get(P6, MagicKey), ok = cdb_close(P6), ok = file:delete("test/test_area/magic_hash.cdb"). - cyclecount_test() -> io:format("~n~nStarting cycle count test~n"), KVL1 = generate_sequentialkeys(5000, []), - KVL2 = lists:foldl(fun({K, V}, Acc) -> - H = hash(K), - I = hash_to_index(H), - case I of - 0 -> - [{K, V}|Acc]; - _ -> - Acc - end end, - [], - KVL1), - {ok, P1} = cdb_open_writer("test/test_area/cycle_count.pnd", - #cdb_options{binary_mode=false}), + KVL2 = lists:foldl( + fun({K, V}, Acc) -> + H = hash(K), + I = hash_to_index(H), + case I of + 0 -> + [{K, V} | Acc]; + _ -> + Acc + end + end, + [], + KVL1 + ), + {ok, P1} = cdb_open_writer( + "test/test_area/cycle_count.pnd", + #cdb_options{binary_mode = false} + ), ok = cdb_mput(P1, KVL2), {ok, F2} = cdb_complete(P1), - {ok, P2} = cdb_open_reader(F2, #cdb_options{binary_mode=false}), - lists:foreach(fun({K, V}) -> - ?assertMatch({K, V}, cdb_get(P2, K)) end, - KVL2), + {ok, P2} = cdb_open_reader(F2, #cdb_options{binary_mode = false}), + lists:foreach( + fun({K, V}) -> + ?assertMatch({K, V}, cdb_get(P2, K)) + end, + KVL2 + ), % Test many missing keys - lists:foreach(fun(X) -> - K = "NotKey" ++ integer_to_list(X), - ?assertMatch(missing, cdb_get(P2, K)) - end, - lists:seq(1, 5000)), + lists:foreach( + fun(X) -> + K = "NotKey" ++ integer_to_list(X), + ?assertMatch(missing, cdb_get(P2, K)) + end, + lists:seq(1, 5000) + ), ok = cdb_close(P2), ok = file:delete("test/test_area/cycle_count.cdb"). - full_1_test() -> - List1 = lists:sort([{"key1","value1"},{"key2","value2"}]), - create("test/test_area/simple.cdb", - lists:sort([{"key1","value1"},{"key2","value2"}])), + List1 = lists:sort([{"key1", "value1"}, {"key2", "value2"}]), + create( + "test/test_area/simple.cdb", + lists:sort([{"key1", "value1"}, {"key2", "value2"}]) + ), List2 = lists:sort(dump("test/test_area/simple.cdb")), - ?assertMatch(List1,List2), + ?assertMatch(List1, List2), ok = file:delete("test/test_area/simple.cdb"). full_2_test() -> - List1 = lists:sort([{lists:flatten(io_lib:format("~s~p",[Prefix,Plug])), - lists:flatten(io_lib:format("value~p",[Plug]))} - || Plug <- lists:seq(1,200), - Prefix <- ["dsd","so39ds","oe9%#*(","020dkslsldclsldowlslf%$#", - "tiep4||","qweq"]]), - create("test/test_area/full.cdb",List1), + List1 = lists:sort([ + { + lists:flatten(io_lib:format("~s~p", [Prefix, Plug])), + lists:flatten(io_lib:format("value~p", [Plug])) + } + || Plug <- lists:seq(1, 200), + Prefix <- [ + "dsd", + "so39ds", + "oe9%#*(", + "020dkslsldclsldowlslf%$#", + "tiep4||", + "qweq" + ] + ]), + create("test/test_area/full.cdb", List1), List2 = lists:sort(dump("test/test_area/full.cdb")), - ?assertMatch(List1,List2), + ?assertMatch(List1, List2), ok = file:delete("test/test_area/full.cdb"). from_dict_test() -> D = dict:new(), - D1 = dict:store("a","b",D), - D2 = dict:store("c","d",D1), - ok = from_dict("test/test_area/from_dict_test.cdb",D2), + D1 = dict:store("a", "b", D), + D2 = dict:store("c", "d", D1), + ok = from_dict("test/test_area/from_dict_test.cdb", D2), io:format("Store created ~n", []), KVP = lists:sort(dump("test/test_area/from_dict_test.cdb")), D3 = lists:sort(dict:to_list(D2)), @@ -2188,24 +2406,24 @@ from_dict_test() -> to_dict_test() -> D = dict:new(), - D1 = dict:store("a","b",D), - D2 = dict:store("c","d",D1), - ok = from_dict("test/test_area/from_dict_test1.cdb",D2), + D1 = dict:store("a", "b", D), + D2 = dict:store("c", "d", D1), + ok = from_dict("test/test_area/from_dict_test1.cdb", D2), Dict = to_dict("test/test_area/from_dict_test1.cdb"), D3 = lists:sort(dict:to_list(D2)), D4 = lists:sort(dict:to_list(Dict)), - ?assertMatch(D4,D3), + ?assertMatch(D4, D3), ok = file:delete("test/test_area/from_dict_test1.cdb"). crccheck_emptyvalue_test() -> ?assertMatch(false, crccheck(<<>>, <<"Key">>)). crccheck_shortvalue_test() -> - Value = <<128,128,32>>, + Value = <<128, 128, 32>>, ?assertMatch(false, crccheck(Value, <<"Key">>)). crccheck_justshortvalue_test() -> - Value = <<128,128,32,64>>, + Value = <<128, 128, 32, 64>>, ?assertMatch(false, crccheck(Value, <<"Key">>)). crccheck_wronghash_test() -> @@ -2237,40 +2455,68 @@ activewrite_singlewrite_test() -> io:format("New db file created ~n", []), {LastPosition, KeyDict, _} = open_active_file("test/test_area/test_mem.cdb"), - io:format("File opened as new active file " - "with LastPosition=~w ~n", [LastPosition]), + io:format( + "File opened as new active file " + "with LastPosition=~w ~n", + [LastPosition] + ), {_, _, UpdKeyDict} = put( "test/test_area/test_mem.cdb", - Key, Value, {LastPosition, KeyDict}), + Key, + Value, + {LastPosition, KeyDict} + ), io:format("New key and value added to active file ~n", []), ?assertMatch( {Key, Value}, file_get_mem( - Key, "test/test_area/test_mem.cdb", UpdKeyDict, false)), + Key, "test/test_area/test_mem.cdb", UpdKeyDict, false + ) + ), ?assertMatch( probably, file_get_mem( - Key, "test/test_area/test_mem.cdb", - UpdKeyDict, false, loose_presence)), + Key, + "test/test_area/test_mem.cdb", + UpdKeyDict, + false, + loose_presence + ) + ), ?assertMatch( missing, file_get_mem( - "not_present", "test/test_area/test_mem.cdb", - UpdKeyDict, false, loose_presence)), + "not_present", + "test/test_area/test_mem.cdb", + UpdKeyDict, + false, + loose_presence + ) + ), ok = file:delete("test/test_area/test_mem.cdb"). search_hash_table_findinslot_test() -> - Key1 = "key1", % this is in slot 3 if count is 8 - D = dict:from_list([{Key1, "value1"}, {"K2", "V2"}, {"K3", "V3"}, - {"K4", "V4"}, {"K5", "V5"}, {"K6", "V6"}, {"K7", "V7"}, - {"K8", "V8"}]), - ok = from_dict("test/test_area/hashtable1_test.cdb",D), - {ok, Handle} = file:open("test/test_area/hashtable1_test.cdb", - [binary, raw, read, write]), + % this is in slot 3 if count is 8 + Key1 = "key1", + D = dict:from_list([ + {Key1, "value1"}, + {"K2", "V2"}, + {"K3", "V3"}, + {"K4", "V4"}, + {"K5", "V5"}, + {"K6", "V6"}, + {"K7", "V7"}, + {"K8", "V8"} + ]), + ok = from_dict("test/test_area/hashtable1_test.cdb", D), + {ok, Handle} = file:open( + "test/test_area/hashtable1_test.cdb", + [binary, raw, read, write] + ), Hash = hash(Key1), Index = hash_to_index(Hash), - {ok, _} = file:position(Handle, {bof, ?DWORD_SIZE*Index}), + {ok, _} = file:position(Handle, {bof, ?DWORD_SIZE * Index}), {HashTable, Count} = read_next_2_integers(Handle), io:format("Count of ~w~n", [Count]), {ok, FirstHashPosition} = file:position(Handle, {bof, HashTable}), @@ -2285,34 +2531,56 @@ search_hash_table_findinslot_test() -> ?assertMatch({"key1", "value1"}, get(Handle, Key1, false)), NoMonitor = {no_monitor, 0}, ?assertMatch( - probably, - get(Handle, Key1, no_cache, fun get_uncached_index/3, - loose_presence, false, NoMonitor)), + probably, + get( + Handle, + Key1, + no_cache, + fun get_uncached_index/3, + loose_presence, + false, + NoMonitor + ) + ), ?assertMatch( - missing, - get(Handle, "Key99", no_cache, fun get_uncached_index/3, - loose_presence, false, NoMonitor)), + missing, + get( + Handle, + "Key99", + no_cache, + fun get_uncached_index/3, + loose_presence, + false, + NoMonitor + ) + ), {ok, _} = file:position(Handle, FirstHashPosition), FlipH3 = endian_flip(ReadH3), FlipP3 = endian_flip(ReadP3), - RBin = <>, + RBin = <>, io:format("Replacement binary of ~w~n", [RBin]), - {ok, OldBin} = file:pread(Handle, - FirstHashPosition + (Slot -1) * ?DWORD_SIZE, 16), + {ok, OldBin} = file:pread( + Handle, + FirstHashPosition + (Slot - 1) * ?DWORD_SIZE, + 16 + ), io:format("Bin to be replaced is ~w ~n", [OldBin]), - ok = file:pwrite(Handle, - FirstHashPosition + (Slot -1) * ?DWORD_SIZE, - RBin), + ok = file:pwrite( + Handle, + FirstHashPosition + (Slot - 1) * ?DWORD_SIZE, + RBin + ), ok = file:close(Handle), io:format("Find key following change to hash table~n"), - ?assertMatch(missing, get("test/test_area/hashtable1_test.cdb", Key1, false)), + ?assertMatch( + missing, get("test/test_area/hashtable1_test.cdb", Key1, false) + ), ok = file:delete("test/test_area/hashtable1_test.cdb"). newactivefile_test() -> - {LastPosition, _, _} = open_active_file("test/test_area/activefile_test.cdb"), + {LastPosition, _, _} = open_active_file( + "test/test_area/activefile_test.cdb" + ), ?assertMatch(256 * ?DWORD_SIZE, LastPosition), ok = file:delete("test/test_area/activefile_test.cdb"). @@ -2322,7 +2590,7 @@ emptyvalue_fromdict_test() -> D2 = dict:store("K2", "", D1), D3 = dict:store("K3", "V3", D2), D4 = dict:store("K4", "", D3), - ok = from_dict("test/test_area/from_dict_test_ev.cdb",D4), + ok = from_dict("test/test_area/from_dict_test_ev.cdb", D4), io:format("Store created ~n", []), KVP = lists:sort(dump("test/test_area/from_dict_test_ev.cdb")), D_Result = lists:sort(dict:to_list(D4)), @@ -2331,23 +2599,28 @@ emptyvalue_fromdict_test() -> ?assertMatch(KVP, D_Result), ok = file:delete("test/test_area/from_dict_test_ev.cdb"). - empty_roll_test() -> file:delete("test/test_area/empty_roll.cdb"), file:delete("test/test_area/empty_roll.pnd"), - {ok, P1} = cdb_open_writer("test/test_area/empty_roll.pnd", - #cdb_options{binary_mode=true}), + {ok, P1} = cdb_open_writer( + "test/test_area/empty_roll.pnd", + #cdb_options{binary_mode = true} + ), ok = cdb_roll(P1), true = finished_rolling(P1), - {ok, P2} = cdb_open_reader("test/test_area/empty_roll.cdb", - #cdb_options{binary_mode=true}), + {ok, P2} = cdb_open_reader( + "test/test_area/empty_roll.cdb", + #cdb_options{binary_mode = true} + ), ok = cdb_close(P2), ok = file:delete("test/test_area/empty_roll.cdb"). find_lastkey_test() -> file:delete("test/test_area/lastkey.pnd"), - {ok, P1} = cdb_open_writer("test/test_area/lastkey.pnd", - #cdb_options{binary_mode=false}), + {ok, P1} = cdb_open_writer( + "test/test_area/lastkey.pnd", + #cdb_options{binary_mode = false} + ), ok = cdb_put(P1, "Key1", "Value1"), ok = cdb_put(P1, "Key3", "Value3"), ok = cdb_put(P1, "Key2", "Value2"), @@ -2355,8 +2628,10 @@ find_lastkey_test() -> ?assertMatch("Key1", cdb_firstkey(P1)), probably = cdb_keycheck(P1, "Key2"), ok = cdb_close(P1), - {ok, P2} = cdb_open_writer("test/test_area/lastkey.pnd", - #cdb_options{binary_mode=false}), + {ok, P2} = cdb_open_writer( + "test/test_area/lastkey.pnd", + #cdb_options{binary_mode = false} + ), ?assertMatch("Key2", cdb_lastkey(P2)), probably = cdb_keycheck(P2, "Key2"), {ok, F2} = cdb_complete(P2), @@ -2369,36 +2644,47 @@ find_lastkey_test() -> ok = file:delete("test/test_area/lastkey.cdb"). get_keys_byposition_simple_test() -> - {ok, P1} = cdb_open_writer("test/test_area/poskey.pnd", - #cdb_options{binary_mode=false}), + {ok, P1} = cdb_open_writer( + "test/test_area/poskey.pnd", + #cdb_options{binary_mode = false} + ), ok = cdb_put(P1, "Key1", "Value1"), ok = cdb_put(P1, "Key3", "Value3"), ok = cdb_put(P1, "Key2", "Value2"), KeyList = ["Key1", "Key2", "Key3"], {ok, F2} = cdb_complete(P1), - {ok, P2} = cdb_open_reader(F2, #cdb_options{binary_mode=false}), + {ok, P2} = cdb_open_reader(F2, #cdb_options{binary_mode = false}), PositionList = cdb_getpositions(P2, all), io:format("Position list of ~w~n", [PositionList]), ?assertMatch(3, length(PositionList)), R1 = cdb_directfetch(P2, PositionList, key_only), io:format("R1 ~w~n", [R1]), ?assertMatch(3, length(R1)), - lists:foreach(fun(Key) -> - ?assertMatch(true, lists:member(Key, KeyList)) end, - R1), + lists:foreach( + fun(Key) -> + ?assertMatch(true, lists:member(Key, KeyList)) + end, + R1 + ), R2 = cdb_directfetch(P2, PositionList, key_size), ?assertMatch(3, length(R2)), - lists:foreach(fun({Key, _Size}) -> - ?assertMatch(true, lists:member(Key, KeyList)) end, - R2), + lists:foreach( + fun({Key, _Size}) -> + ?assertMatch(true, lists:member(Key, KeyList)) + end, + R2 + ), R3 = cdb_directfetch(P2, PositionList, key_value_check), ?assertMatch(3, length(R3)), - lists:foreach(fun({Key, Value, Check}) -> - ?assertMatch(true, Check), - {K, V} = cdb_get(P2, Key), - ?assertMatch(K, Key), - ?assertMatch(V, Value) end, - R3), + lists:foreach( + fun({Key, Value, Check}) -> + ?assertMatch(true, Check), + {K, V} = cdb_get(P2, Key), + ?assertMatch(K, Key), + ?assertMatch(V, Value) + end, + R3 + ), ok = cdb_close(P2), ok = file:delete(F2). @@ -2406,16 +2692,20 @@ generate_sequentialkeys(0, KVList) -> KVList; generate_sequentialkeys(Count, KVList) -> KV = {"Key" ++ integer_to_list(Count), "Value" ++ integer_to_list(Count)}, - generate_sequentialkeys(Count - 1, [KV|KVList]). + generate_sequentialkeys(Count - 1, [KV | KVList]). get_keys_byposition_manykeys_test_() -> {timeout, 600, fun get_keys_byposition_manykeys_test_to/0}. get_keys_byposition_manykeys_test_to() -> KeyCount = 16384, - {ok, P1} = cdb_open_writer("test/test_area/poskeymany.pnd", - #cdb_options{binary_mode=false, - sync_strategy=none}), + {ok, P1} = cdb_open_writer( + "test/test_area/poskeymany.pnd", + #cdb_options{ + binary_mode = false, + sync_strategy = none + } + ), KVList = generate_sequentialkeys(KeyCount, []), lists:foreach(fun({K, V}) -> cdb_put(P1, K, V) end, KVList), ok = cdb_roll(P1), @@ -2425,25 +2715,28 @@ get_keys_byposition_manykeys_test_to() -> % (e.g. > 10K) it is implausible that cdb_roll will ever finish before the % call to cdb_getpositions is executed. So the race is tolerated ?assertMatch([], cdb_getpositions(P1, 10)), - lists:foldl(fun(X, Complete) -> - case Complete of - true -> - true; - false -> - case cdb_checkhashtable(P1) of - true -> - true; - false -> - timer:sleep(X), - false - end - end end, - false, - lists:seq(1, 30)), + lists:foldl( + fun(X, Complete) -> + case Complete of + true -> + true; + false -> + case cdb_checkhashtable(P1) of + true -> + true; + false -> + timer:sleep(X), + false + end + end + end, + false, + lists:seq(1, 30) + ), ?assertMatch(10, length(cdb_getpositions(P1, 10))), {ok, F2} = cdb_complete(P1), - {ok, P2} = cdb_open_reader(F2, #cdb_options{binary_mode=false}), + {ok, P2} = cdb_open_reader(F2, #cdb_options{binary_mode = false}), PositionList = cdb_getpositions(P2, all), L1 = length(PositionList), io:format("Length of all positions ~w~n", [L1]), @@ -2469,12 +2762,13 @@ get_keys_byposition_manykeys_test_to() -> ok = cdb_close(P2), ok = file:delete(F2). - nokeys_test() -> - {ok, P1} = cdb_open_writer("test/test_area/nohash_emptyfile.pnd", - #cdb_options{binary_mode=false}), + {ok, P1} = cdb_open_writer( + "test/test_area/nohash_emptyfile.pnd", + #cdb_options{binary_mode = false} + ), {ok, F2} = cdb_complete(P1), - {ok, P2} = cdb_open_reader(F2, #cdb_options{binary_mode=false}), + {ok, P2} = cdb_open_reader(F2, #cdb_options{binary_mode = false}), io:format("FirstKey is ~s~n", [cdb_firstkey(P2)]), io:format("LastKey is ~s~n", [cdb_lastkey(P2)]), ?assertMatch(empty, cdb_firstkey(P2)), @@ -2484,8 +2778,10 @@ nokeys_test() -> mput_test() -> KeyCount = 1024, - {ok, P1} = cdb_open_writer("test/test_area/nohash_keysinfile.pnd", - #cdb_options{binary_mode=false}), + {ok, P1} = cdb_open_writer( + "test/test_area/nohash_keysinfile.pnd", + #cdb_options{binary_mode = false} + ), KVList = generate_sequentialkeys(KeyCount, []), ok = cdb_mput(P1, KVList), ?assertMatch({"Key1", "Value1"}, cdb_get(P1, "Key1")), @@ -2493,7 +2789,7 @@ mput_test() -> ?assertMatch(missing, cdb_get(P1, "Key1025")), ?assertMatch(missing, cdb_get(P1, "Key1026")), {ok, F2} = cdb_complete(P1), - {ok, P2} = cdb_open_reader(F2, #cdb_options{binary_mode=false}), + {ok, P2} = cdb_open_reader(F2, #cdb_options{binary_mode = false}), ?assertMatch("Key1", cdb_firstkey(P2)), ?assertMatch("Key1024", cdb_lastkey(P2)), ?assertMatch({"Key1", "Value1"}, cdb_get(P2, "Key1")), @@ -2504,8 +2800,10 @@ mput_test() -> ok = file:delete(F2). state_test() -> - {ok, P1} = cdb_open_writer("test/test_area/state_test.pnd", - #cdb_options{binary_mode=false}), + {ok, P1} = cdb_open_writer( + "test/test_area/state_test.pnd", + #cdb_options{binary_mode = false} + ), KVList = generate_sequentialkeys(1000, []), ok = cdb_mput(P1, KVList), ?assertMatch(probably, cdb_keycheck(P1, "Key1")), @@ -2521,10 +2819,11 @@ state_test() -> ?assertMatch({"Key1", "Value1"}, cdb_get(P1, "Key1")), ok = cdb_close(P1). - hashclash_test() -> - {ok, P1} = cdb_open_writer("test/test_area/hashclash_test.pnd", - #cdb_options{binary_mode=false}), + {ok, P1} = cdb_open_writer( + "test/test_area/hashclash_test.pnd", + #cdb_options{binary_mode = false} + ), Key1 = "Key4184465780", Key99 = "Key4254669179", KeyNF = "Key9070567319", @@ -2543,7 +2842,7 @@ hashclash_test() -> ?assertMatch(missing, cdb_get(P1, KeyNF)), {ok, FN} = cdb_complete(P1), - {ok, P2} = cdb_open_reader(FN, #cdb_options{binary_mode=false}), + {ok, P2} = cdb_open_reader(FN, #cdb_options{binary_mode = false}), ?assertMatch(probably, cdb_keycheck(P2, Key1)), ?assertMatch(probably, cdb_keycheck(P2, Key99)), @@ -2567,17 +2866,22 @@ hashclash_test() -> corruptfile_test() -> file:delete("test/test_area/corrupt_test.pnd"), - {ok, P1} = cdb_open_writer("test/test_area/corrupt_test.pnd", - #cdb_options{binary_mode=false}), + {ok, P1} = cdb_open_writer( + "test/test_area/corrupt_test.pnd", + #cdb_options{binary_mode = false} + ), KVList = generate_sequentialkeys(100, []), - ok = cdb_mput(P1, []), % Not relevant to this test, but needs testing + % Not relevant to this test, but needs testing + ok = cdb_mput(P1, []), lists:foreach(fun({K, V}) -> cdb_put(P1, K, V) end, KVList), ?assertMatch(probably, cdb_keycheck(P1, "Key1")), ?assertMatch({"Key1", "Value1"}, cdb_get(P1, "Key1")), ?assertMatch({"Key100", "Value100"}, cdb_get(P1, "Key100")), ok = cdb_close(P1), - lists:foreach(fun(Offset) -> corrupt_testfile_at_offset(Offset) end, - lists:seq(1, 40)), + lists:foreach( + fun(Offset) -> corrupt_testfile_at_offset(Offset) end, + lists:seq(1, 40) + ), ok = file:delete("test/test_area/corrupt_test.pnd"). corrupt_testfile_at_offset(Offset) -> @@ -2586,8 +2890,10 @@ corrupt_testfile_at_offset(Offset) -> file:position(F1, EofPos - Offset), ok = file:truncate(F1), ok = file:close(F1), - {ok, P2} = cdb_open_writer("test/test_area/corrupt_test.pnd", - #cdb_options{binary_mode=false}), + {ok, P2} = cdb_open_writer( + "test/test_area/corrupt_test.pnd", + #cdb_options{binary_mode = false} + ), ?assertMatch(probably, cdb_keycheck(P2, "Key1")), ?assertMatch({"Key1", "Value1"}, cdb_get(P2, "Key1")), ?assertMatch(missing, cdb_get(P2, "Key100")), @@ -2597,8 +2903,10 @@ corrupt_testfile_at_offset(Offset) -> crc_corrupt_writer_test() -> file:delete("test/test_area/corruptwrt_test.pnd"), - {ok, P1} = cdb_open_writer("test/test_area/corruptwrt_test.pnd", - #cdb_options{binary_mode=false}), + {ok, P1} = cdb_open_writer( + "test/test_area/corruptwrt_test.pnd", + #cdb_options{binary_mode = false} + ), KVList = generate_sequentialkeys(100, []), ok = cdb_mput(P1, KVList), ?assertMatch(probably, cdb_keycheck(P1, "Key1")), @@ -2610,8 +2918,10 @@ crc_corrupt_writer_test() -> % zero the last byte of the last value ok = file:pwrite(Handle, EofPos - 5, <<0:8/integer>>), ok = file:close(Handle), - {ok, P2} = cdb_open_writer("test/test_area/corruptwrt_test.pnd", - #cdb_options{binary_mode=false}), + {ok, P2} = cdb_open_writer( + "test/test_area/corruptwrt_test.pnd", + #cdb_options{binary_mode = false} + ), ?assertMatch(probably, cdb_keycheck(P2, "Key1")), ?assertMatch({"Key1", "Value1"}, cdb_get(P2, "Key1")), ?assertMatch(missing, cdb_get(P2, "Key100")), @@ -2628,25 +2938,22 @@ safe_read_test() -> ValToWrite = <>, KeyL = byte_size(Key), FlippedKeyL = endian_flip(KeyL), - ValueL= byte_size(ValToWrite), + ValueL = byte_size(ValToWrite), FlippedValL = endian_flip(ValueL), TestFN = "test/test_area/saferead.pnd", BinToWrite = - <>, TestCorruptedWriteFun = fun(BitNumber) -> - <> = BinToWrite, + <> = + BinToWrite, BadBit = Bit bxor 1, - AltBin = <>, + AltBin = + <>, file:delete(TestFN), {ok, Handle} = file:open(TestFN, ?WRITE_OPS), ok = file:pwrite(Handle, 0, AltBin), @@ -2667,8 +2974,10 @@ safe_read_test() -> ok = file:close(Handle) end, - lists:foreach(TestCorruptedWriteFun, - lists:seq(1, -1 + 8 * (KeyL + ValueL + 8))), + lists:foreach( + TestCorruptedWriteFun, + lists:seq(1, -1 + 8 * (KeyL + ValueL + 8)) + ), {ok, HandleK} = file:open(TestFN, ?WRITE_OPS), ok = file:pwrite(HandleK, 0, BinToWrite), @@ -2696,23 +3005,24 @@ safe_read_test() -> {ok, HandleHappy} = file:open(TestFN, ?WRITE_OPS), ok = file:pwrite(HandleHappy, 0, BinToWrite), {ok, _} = file:position(HandleHappy, bof), - ?assertMatch({<<"Key">>, Value, KeyL, ValueL}, - saferead_keyvalue(HandleHappy)), + ?assertMatch( + {<<"Key">>, Value, KeyL, ValueL}, + saferead_keyvalue(HandleHappy) + ), file:delete(TestFN). - get_positions_corruption_test() -> F1 = "test/test_area/corruptpos_test.pnd", file:delete(F1), - {ok, P1} = cdb_open_writer(F1, #cdb_options{binary_mode=false}), + {ok, P1} = cdb_open_writer(F1, #cdb_options{binary_mode = false}), KVList = generate_sequentialkeys(1000, []), ok = cdb_mput(P1, KVList), ?assertMatch(probably, cdb_keycheck(P1, "Key1")), ?assertMatch({"Key1", "Value1"}, cdb_get(P1, "Key1")), ?assertMatch({"Key100", "Value100"}, cdb_get(P1, "Key100")), {ok, F2} = cdb_complete(P1), - {ok, P2} = cdb_open_reader(F2, #cdb_options{binary_mode=false}), + {ok, P2} = cdb_open_reader(F2, #cdb_options{binary_mode = false}), PositionList = cdb_getpositions(P2, all), ?assertMatch(1000, length(PositionList)), ok = cdb_close(P2), @@ -2726,7 +3036,7 @@ get_positions_corruption_test() -> ok = lists:foreach(CorruptFun, Positions), ok = file:close(Handle), - {ok, P3} = cdb_open_reader(F2, #cdb_options{binary_mode=false}), + {ok, P3} = cdb_open_reader(F2, #cdb_options{binary_mode = false}), PositionList = cdb_getpositions(P3, all), ?assertMatch(1000, length(PositionList)), @@ -2742,11 +3052,11 @@ badly_written_test() -> {ok, Handle} = file:open(F1, ?WRITE_OPS), ok = file:pwrite(Handle, 256 * ?DWORD_SIZE, <<1:8/integer>>), ok = file:close(Handle), - {ok, P1} = cdb_open_writer(F1, #cdb_options{binary_mode=false}), + {ok, P1} = cdb_open_writer(F1, #cdb_options{binary_mode = false}), ok = cdb_put(P1, "Key100", "Value100"), ?assertMatch({"Key100", "Value100"}, cdb_get(P1, "Key100")), ok = cdb_close(P1), - {ok, P2} = cdb_open_writer(F1, #cdb_options{binary_mode=false}), + {ok, P2} = cdb_open_writer(F1, #cdb_options{binary_mode = false}), ?assertMatch({"Key100", "Value100"}, cdb_get(P2, "Key100")), ok = cdb_close(P2), file:delete(F1). @@ -2754,29 +3064,29 @@ badly_written_test() -> pendingdelete_test() -> F1 = "test/test_area/deletfile_test.pnd", file:delete(F1), - {ok, P1} = cdb_open_writer(F1, #cdb_options{binary_mode=false}), + {ok, P1} = cdb_open_writer(F1, #cdb_options{binary_mode = false}), KVList = generate_sequentialkeys(1000, []), ok = cdb_mput(P1, KVList), ?assertMatch(probably, cdb_keycheck(P1, "Key1")), ?assertMatch({"Key1", "Value1"}, cdb_get(P1, "Key1")), ?assertMatch({"Key100", "Value100"}, cdb_get(P1, "Key100")), {ok, F2} = cdb_complete(P1), - {ok, P2} = cdb_open_reader(F2, #cdb_options{binary_mode=false}), + {ok, P2} = cdb_open_reader(F2, #cdb_options{binary_mode = false}), ?assertMatch({"Key1", "Value1"}, cdb_get(P2, "Key1")), ?assertMatch({"Key100", "Value100"}, cdb_get(P2, "Key100")), ok = file:delete(F2), ok = cdb_deletepending(P2), - % No issues destroying even though the file has already been removed + % No issues destroying even though the file has already been removed ok = cdb_destroy(P2). getpositions_sample_test() -> % what if we try and get positions with a file with o(1000) entries F1 = "test/test_area/getpos_sample_test.pnd", - {ok, P1} = cdb_open_writer(F1, #cdb_options{binary_mode=false}), + {ok, P1} = cdb_open_writer(F1, #cdb_options{binary_mode = false}), KVList = generate_sequentialkeys(1000, []), ok = cdb_mput(P1, KVList), {ok, F2} = cdb_complete(P1), - {ok, P2} = cdb_open_reader(F2, #cdb_options{binary_mode=false}), + {ok, P2} = cdb_open_reader(F2, #cdb_options{binary_mode = false}), PositionList100 = cdb_getpositions(P2, 100), PositionList101 = cdb_getpositions(P2, 101), @@ -2796,7 +3106,7 @@ nonsense_coverage_test() -> code_change( nonsense, reader, - #state{max_count=1, max_size=100}, + #state{max_count = 1, max_size = 100}, nonsense ) ). diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl index 354520a4..83aff0d5 100644 --- a/src/leveled_codec.erl +++ b/src/leveled_codec.erl @@ -2,10 +2,9 @@ %% %% Functions for manipulating keys and values within leveled. %% -%% Any thing specific to handling of a given tag should be encapsulated +%% Any thing specific to handling of a given tag should be encapsulated %% within the leveled_head module - -module(leveled_codec). -include("leveled.hrl"). @@ -17,197 +16,209 @@ -endif. -export([ - inker_reload_strategy/1, - strip_to_seqonly/1, - strip_to_statusonly/1, - strip_to_segmentonly/1, - strip_to_keyseqonly/1, - strip_to_indexdetails/1, - striphead_to_v1details/1, - endkey_passed/2, - key_dominates/2, - maybe_reap_expiredkey/2, - to_objectkey/3, - to_objectkey/5, - to_querykey/3, - to_querykey/5, - from_ledgerkey/1, - from_ledgerkey/2, - isvalid_ledgerkey/1, - to_inkerkey/2, - to_inkerkv/6, - from_inkerkv/1, - from_inkerkv/2, - from_journalkey/1, - revert_to_keydeltas/2, - is_full_journalentry/1, - check_forinkertype/2, - get_tagstrategy/2, - maybe_compress/2, - create_value_for_journal/3, - revert_value_from_journal/1, - revert_value_from_journal/2, - generate_ledgerkv/5, - get_size/2, - get_keyandobjhash/2, - idx_indexspecs/5, - obj_objectspecs/3, - segment_hash/1, - to_lookup/1, - next_key/1, - return_proxy/4, - get_metadata/1, - maybe_accumulate/5, - accumulate_index/2, - count_tombs/2]). - --type tag() :: - leveled_head:object_tag()|?IDX_TAG|?HEAD_TAG|atom(). + inker_reload_strategy/1, + strip_to_seqonly/1, + strip_to_statusonly/1, + strip_to_segmentonly/1, + strip_to_keyseqonly/1, + strip_to_indexdetails/1, + striphead_to_v1details/1, + endkey_passed/2, + key_dominates/2, + maybe_reap_expiredkey/2, + to_objectkey/3, + to_objectkey/5, + to_querykey/3, + to_querykey/5, + from_ledgerkey/1, + from_ledgerkey/2, + isvalid_ledgerkey/1, + to_inkerkey/2, + to_inkerkv/6, + from_inkerkv/1, + from_inkerkv/2, + from_journalkey/1, + revert_to_keydeltas/2, + is_full_journalentry/1, + check_forinkertype/2, + get_tagstrategy/2, + maybe_compress/2, + create_value_for_journal/3, + revert_value_from_journal/1, + revert_value_from_journal/2, + generate_ledgerkv/5, + get_size/2, + get_keyandobjhash/2, + idx_indexspecs/5, + obj_objectspecs/3, + segment_hash/1, + to_lookup/1, + next_key/1, + return_proxy/4, + get_metadata/1, + maybe_accumulate/5, + accumulate_index/2, + count_tombs/2 +]). + +-type tag() :: + leveled_head:object_tag() | ?IDX_TAG | ?HEAD_TAG | atom(). -type single_key() :: binary(). -type tuple_key() :: {single_key(), single_key()}. --type key() :: single_key()|tuple_key(). - % Keys SHOULD be binary() - % string() support is a legacy of old tests +-type key() :: single_key() | tuple_key(). +% Keys SHOULD be binary() +% string() support is a legacy of old tests -type sqn() :: - % SQN of the object in the Journal - pos_integer(). --type segment_hash() :: - % hash of the key to an aae segment - to be used in ledger filters - {integer(), integer()}|no_lookup. + % SQN of the object in the Journal + pos_integer(). +-type segment_hash() :: + % hash of the key to an aae segment - to be used in ledger filters + {integer(), integer()} | no_lookup. -type head_value() :: any(). -type metadata() :: - tuple()|null|head_value(). % null for empty metadata + % null for empty metadata + tuple() | null | head_value(). -type last_moddate() :: - % modified date as determined by the object (not this store) - % if the object has siblings in the store will be the maximum of those - % dates - integer()|undefined. --type lastmod_range() :: {integer(), pos_integer()|infinity}. + % modified date as determined by the object (not this store) + % if the object has siblings in the store will be the maximum of those + % dates + integer() | undefined. +-type lastmod_range() :: {integer(), pos_integer() | infinity}. -type ledger_status() :: - tomb|{active, non_neg_integer()|infinity}. + tomb | {active, non_neg_integer() | infinity}. -type primary_key() :: - {leveled_head:object_tag(), key(), single_key(), single_key()|null}. - % Primary key for an object + {leveled_head:object_tag(), key(), single_key(), single_key() | null}. +% Primary key for an object -type object_key() :: - {tag(), key(), key(), single_key()|null}. + {tag(), key(), key(), single_key() | null}. -type query_key() :: - {tag(), key()|null, key()|null, single_key()|null}|all. --type ledger_key() :: - object_key()|query_key(). + {tag(), key() | null, key() | null, single_key() | null} | all. +-type ledger_key() :: + object_key() | query_key(). -type slimmed_key() :: - {binary(), binary()|null}|binary()|null|all. + {binary(), binary() | null} | binary() | null | all. -type ledger_value() :: - ledger_value_v1()|ledger_value_v2(). + ledger_value_v1() | ledger_value_v2(). -type ledger_value_v1() :: - {sqn(), ledger_status(), segment_hash(), metadata()}. + {sqn(), ledger_status(), segment_hash(), metadata()}. -type ledger_value_v2() :: - {sqn(), ledger_status(), segment_hash(), metadata(), last_moddate()}. + {sqn(), ledger_status(), segment_hash(), metadata(), last_moddate()}. -type ledger_kv() :: - {object_key(), ledger_value()}. + {object_key(), ledger_value()}. -type compaction_method() :: - retain|recovr|recalc. + retain | recovr | recalc. -type compaction_strategy() :: - list({tag(), compaction_method()}). + list({tag(), compaction_method()}). -type journal_key_tag() :: - ?INKT_STND|?INKT_TOMB|?INKT_MPUT|?INKT_KEYD. + ?INKT_STND | ?INKT_TOMB | ?INKT_MPUT | ?INKT_KEYD. -type journal_key() :: - {sqn(), journal_key_tag(), primary_key()}. + {sqn(), journal_key_tag(), primary_key()}. -type journal_ref() :: - {object_key(), sqn()}. + {object_key(), sqn()}. -type object_spec_v0() :: - {add|remove, key(), single_key(), single_key()|null, metadata()}. + {add | remove, key(), single_key(), single_key() | null, metadata()}. -type object_spec_v1() :: - {add|remove, v1, key(), single_key(), single_key()|null, - list(erlang:timestamp())|undefined, metadata()}. + { + add | remove, + v1, + key(), + single_key(), + single_key() | null, + list(erlang:timestamp()) | undefined, + metadata() + }. -type object_spec() :: - object_spec_v0()|object_spec_v1(). + object_spec_v0() | object_spec_v1(). -type compression_method() :: - lz4|native|zstd|none. + lz4 | native | zstd | none. -type index_specs() :: - list({add|remove, any(), any()}). --type journal_keychanges() :: - {index_specs(), infinity|integer()}. % {KeyChanges, TTL} + list({add | remove, any(), any()}). +-type journal_keychanges() :: + % {KeyChanges, TTL} + {index_specs(), infinity | integer()}. -type maybe_lookup() :: - lookup|no_lookup. + lookup | no_lookup. -type actual_regex() :: - {re_pattern, term(), term(), term(), term()}. --type capture_value() :: binary()|integer(). + {re_pattern, term(), term(), term(), term()}. +-type capture_value() :: binary() | integer(). -type query_filter_fun() :: - fun((#{binary() => capture_value()}) -> boolean()). + fun((#{binary() => capture_value()}) -> boolean()). -type query_eval_fun() :: - fun((binary(), binary()) -> #{binary() => capture_value()}). --type query_expression() :: - {query, query_eval_fun(), query_filter_fun()}. + fun((binary(), binary()) -> #{binary() => capture_value()}). +-type query_expression() :: + {query, query_eval_fun(), query_filter_fun()}. -type term_expression() :: - actual_regex()|undefined|query_expression(). + actual_regex() | undefined | query_expression(). -type value_fetcher() :: - {fun((pid(), leveled_codec:journal_key()) -> any()), - pid(), leveled_codec:journal_key()}. - % A 2-arity function, which when passed the other two elements of the tuple - % will return the value + { + fun((pid(), leveled_codec:journal_key()) -> any()), + pid(), + leveled_codec:journal_key() + }. +% A 2-arity function, which when passed the other two elements of the tuple +% will return the value -type proxy_object() :: {proxy_object, leveled_head:head(), non_neg_integer(), value_fetcher()}. - % Returns the head, size and a tuple for accessing the value +% Returns the head, size and a tuple for accessing the value -type proxy_objectbin() :: binary(). - % using term_to_binary(proxy_object()) - - --type segment_list() - :: list(integer())|false. - --export_type([tag/0, - key/0, - single_key/0, - sqn/0, - object_spec/0, - segment_hash/0, - ledger_status/0, - primary_key/0, - object_key/0, - query_key/0, - ledger_key/0, - ledger_value/0, - ledger_kv/0, - compaction_strategy/0, - compaction_method/0, - journal_key_tag/0, - journal_key/0, - journal_ref/0, - compression_method/0, - journal_keychanges/0, - index_specs/0, - segment_list/0, - maybe_lookup/0, - last_moddate/0, - lastmod_range/0, - term_expression/0, - actual_regex/0, - value_fetcher/0, - proxy_object/0, - slimmed_key/0, - head_value/0 - ]). - +% using term_to_binary(proxy_object()) + +-type segment_list() :: + list(integer()) | false. + +-export_type([ + tag/0, + key/0, + single_key/0, + sqn/0, + object_spec/0, + segment_hash/0, + ledger_status/0, + primary_key/0, + object_key/0, + query_key/0, + ledger_key/0, + ledger_value/0, + ledger_kv/0, + compaction_strategy/0, + compaction_method/0, + journal_key_tag/0, + journal_key/0, + journal_ref/0, + compression_method/0, + journal_keychanges/0, + index_specs/0, + segment_list/0, + maybe_lookup/0, + last_moddate/0, + lastmod_range/0, + term_expression/0, + actual_regex/0, + value_fetcher/0, + proxy_object/0, + slimmed_key/0, + head_value/0 +]). %%%============================================================================ %%% Ledger Key Manipulation %%%============================================================================ --spec segment_hash(ledger_key()|binary()) -> {integer(), integer()}. +-spec segment_hash(ledger_key() | binary()) -> {integer(), integer()}. %% @doc %% Return two 16 bit integers - the segment ID and a second integer for spare -%% entropy. The hashed should be used in blooms or indexes such that some -%% speed can be gained if just the segment ID is known - but more can be +%% entropy. The hashed should be used in blooms or indexes such that some +%% speed can be gained if just the segment ID is known - but more can be %% gained should the extended hash (with the second element) is known segment_hash(Key) when is_binary(Key) -> - {segment_hash, SegmentID, ExtraHash, _AltHash} - = leveled_tictac:keyto_segment48(Key), + {segment_hash, SegmentID, ExtraHash, _AltHash} = + leveled_tictac:keyto_segment48(Key), {SegmentID, ExtraHash}; segment_hash(KeyTuple) when is_tuple(KeyTuple) -> - BinKey = + BinKey = case element(1, KeyTuple) of ?HEAD_TAG -> headkey_to_canonicalbinary(KeyTuple); @@ -216,26 +227,32 @@ segment_hash(KeyTuple) when is_tuple(KeyTuple) -> end, segment_hash(BinKey). - headkey_to_canonicalbinary({ - ?HEAD_TAG, Bucket, Key, SubK}) - when is_binary(Bucket), is_binary(Key), is_binary(SubK) -> + ?HEAD_TAG, Bucket, Key, SubK +}) when + is_binary(Bucket), is_binary(Key), is_binary(SubK) +-> <>; headkey_to_canonicalbinary( - {?HEAD_TAG, Bucket, Key, null}) - when is_binary(Bucket), is_binary(Key) -> + {?HEAD_TAG, Bucket, Key, null} +) when + is_binary(Bucket), is_binary(Key) +-> <>; headkey_to_canonicalbinary( - {?HEAD_TAG, {BucketType, Bucket}, Key, SubKey}) - when is_binary(BucketType), is_binary(Bucket) -> + {?HEAD_TAG, {BucketType, Bucket}, Key, SubKey} +) when + is_binary(BucketType), is_binary(Bucket) +-> headkey_to_canonicalbinary( - {?HEAD_TAG, <>, Key, SubKey}). + {?HEAD_TAG, <>, Key, SubKey} + ). -spec to_lookup(ledger_key()) -> maybe_lookup(). %% @doc %% Should it be possible to lookup a key in the merge tree. This is not true %% For keys that should only be read through range queries. Direct lookup -%% keys will have presence in bloom filters and other lookup accelerators. +%% keys will have presence in bloom filters and other lookup accelerators. to_lookup(Key) when is_tuple(Key) -> case element(1, Key) of ?IDX_TAG -> @@ -244,7 +261,6 @@ to_lookup(Key) when is_tuple(Key) -> lookup end. - %% @doc %% Some helper functions to get a sub_components of the key/value @@ -261,16 +277,16 @@ strip_to_segmentonly({_LK, LV}) -> element(3, LV). strip_to_keyseqonly({LK, V}) -> {LK, element(1, V)}. -spec strip_to_indexdetails(ledger_kv()) -> - {integer(), segment_hash(), last_moddate()}. -strip_to_indexdetails({_, {SQN, _, SegmentHash, _}}) -> + {integer(), segment_hash(), last_moddate()}. +strip_to_indexdetails({_, {SQN, _, SegmentHash, _}}) -> % A v1 value {SQN, SegmentHash, undefined}; -strip_to_indexdetails({_, {SQN, _, SegmentHash, _, LMD}}) -> +strip_to_indexdetails({_, {SQN, _, SegmentHash, _, LMD}}) -> % A v2 value should have a fith element - Last Modified Date {SQN, SegmentHash, LMD}. -spec striphead_to_v1details(ledger_value()) -> ledger_value(). -striphead_to_v1details(V) -> +striphead_to_v1details(V) -> {element(1, V), element(2, V), element(3, V), element(4, V)}. -spec get_metadata(ledger_value()) -> metadata(). @@ -278,12 +294,13 @@ get_metadata(LV) -> element(4, LV). -spec maybe_accumulate( - list(leveled_codec:ledger_kv()), - term(), - non_neg_integer(), - {pos_integer(), {non_neg_integer(), non_neg_integer()|infinity}}, - leveled_penciller:pclacc_fun()) - -> {term(), non_neg_integer()}. + list(leveled_codec:ledger_kv()), + term(), + non_neg_integer(), + {pos_integer(), {non_neg_integer(), non_neg_integer() | infinity}}, + leveled_penciller:pclacc_fun() +) -> + {term(), non_neg_integer()}. %% @doc %% Make an accumulation decision based on the date range and also the expiry %% status of the ledger key and value Needs to handle v1 and v2 values. When @@ -291,63 +308,103 @@ get_metadata(LV) -> maybe_accumulate([], Acc, Count, _Filter, _Fun) -> {Acc, Count}; maybe_accumulate( - [{K, {_SQN, {active, TS}, _SH, _MD, undefined}=V}|T], - Acc, Count, {Now, _ModRange}=Filter, AccFun) - when TS >= Now -> + [{K, {_SQN, {active, TS}, _SH, _MD, undefined} = V} | T], + Acc, + Count, + {Now, _ModRange} = Filter, + AccFun +) when + TS >= Now +-> maybe_accumulate(T, AccFun(K, V, Acc), Count + 1, Filter, AccFun); maybe_accumulate( - [{K, {_SQN, {active, TS}, _SH, _MD}=V}|T], - Acc, Count, {Now, _ModRange}=Filter, AccFun) - when TS >= Now -> + [{K, {_SQN, {active, TS}, _SH, _MD} = V} | T], + Acc, + Count, + {Now, _ModRange} = Filter, + AccFun +) when + TS >= Now +-> maybe_accumulate(T, AccFun(K, V, Acc), Count + 1, Filter, AccFun); maybe_accumulate( - [{_K, {_SQN, tomb, _SH, _MD, _LMD}}|T], - Acc, Count, Filter, AccFun) -> + [{_K, {_SQN, tomb, _SH, _MD, _LMD}} | T], + Acc, + Count, + Filter, + AccFun +) -> maybe_accumulate(T, Acc, Count, Filter, AccFun); maybe_accumulate( - [{_K, {_SQN, tomb, _SH, _MD}}|T], - Acc, Count, Filter, AccFun) -> + [{_K, {_SQN, tomb, _SH, _MD}} | T], + Acc, + Count, + Filter, + AccFun +) -> maybe_accumulate(T, Acc, Count, Filter, AccFun); maybe_accumulate( - [{K, {_SQN, {active, TS}, _SH, _MD, LMD}=V}|T], - Acc, Count, {Now, {LowDate, HighDate}}=Filter, AccFun) - when TS >= Now, LMD >= LowDate, LMD =< HighDate -> + [{K, {_SQN, {active, TS}, _SH, _MD, LMD} = V} | T], + Acc, + Count, + {Now, {LowDate, HighDate}} = Filter, + AccFun +) when + TS >= Now, LMD >= LowDate, LMD =< HighDate +-> maybe_accumulate(T, AccFun(K, V, Acc), Count + 1, Filter, AccFun); maybe_accumulate( - [_LV|T], - Acc, Count, Filter, AccFun) -> + [_LV | T], + Acc, + Count, + Filter, + AccFun +) -> maybe_accumulate(T, Acc, Count, Filter, AccFun). -spec accumulate_index( - {boolean()|binary(), term_expression()}, - leveled_runner:fold_keys_fun()) - -> leveled_penciller:pclacc_fun(). + {boolean() | binary(), term_expression()}, + leveled_runner:fold_keys_fun() +) -> + leveled_penciller:pclacc_fun(). accumulate_index({false, undefined}, FoldKeysFun) -> fun( - {?IDX_TAG, Bucket, _IndexInfo, ObjKey}, _Value, Acc) - when ObjKey =/= null -> + {?IDX_TAG, Bucket, _IndexInfo, ObjKey}, _Value, Acc + ) when + ObjKey =/= null + -> FoldKeysFun(Bucket, ObjKey, Acc) end; accumulate_index({true, undefined}, FoldKeysFun) -> fun( - {?IDX_TAG, Bucket, {_IdxFld, IdxValue}, ObjKey}, _Value, Acc) - when IdxValue =/= null, ObjKey =/= null -> + {?IDX_TAG, Bucket, {_IdxFld, IdxValue}, ObjKey}, _Value, Acc + ) when + IdxValue =/= null, ObjKey =/= null + -> FoldKeysFun(Bucket, {IdxValue, ObjKey}, Acc) end; accumulate_index( - {AddTerm, {query, EvalFun, FilterFun}}, FoldKeysFun) -> - fun({?IDX_TAG, Bucket, {_IdxFld, IdxValue}, ObjKey}, _Value, Acc) - when is_binary(ObjKey) -> + {AddTerm, {query, EvalFun, FilterFun}}, FoldKeysFun +) -> + fun({?IDX_TAG, Bucket, {_IdxFld, IdxValue}, ObjKey}, _Value, Acc) when + is_binary(ObjKey) + -> CptMap = EvalFun(IdxValue, ObjKey), check_captured_terms( CptMap, - FilterFun, AddTerm, FoldKeysFun, - Bucket, IdxValue, ObjKey, - Acc) + FilterFun, + AddTerm, + FoldKeysFun, + Bucket, + IdxValue, + ObjKey, + Acc + ) end; accumulate_index({AddTerm, TermRegex}, FoldKeysFun) -> - fun({?IDX_TAG, Bucket, {_IdxFld, IdxValue}, ObjKey}, _Value, Acc) - when IdxValue =/= null, ObjKey =/= null, ?IS_DEF(TermRegex) -> + fun({?IDX_TAG, Bucket, {_IdxFld, IdxValue}, ObjKey}, _Value, Acc) when + IdxValue =/= null, ObjKey =/= null, ?IS_DEF(TermRegex) + -> case leveled_util:regex_run(IdxValue, TermRegex, []) of nomatch -> Acc; @@ -362,7 +419,8 @@ accumulate_index({AddTerm, TermRegex}, FoldKeysFun) -> end. check_captured_terms( - CptMap, FilterFun, AddTerm, FoldKeysFun, B, IdxValue, ObjKey, Acc) -> + CptMap, FilterFun, AddTerm, FoldKeysFun, B, IdxValue, ObjKey, Acc +) -> case FilterFun(CptMap) of true -> case AddTerm of @@ -374,7 +432,7 @@ check_captured_terms( case maps:get(CptKey, CptMap, undefined) of undefined -> Acc; - CptValue -> + CptValue -> FoldKeysFun(B, {CptValue, ObjKey}, Acc) end end; @@ -382,11 +440,9 @@ check_captured_terms( Acc end. - - -spec key_dominates(ledger_kv(), ledger_kv()) -> boolean(). %% @doc -%% When comparing two keys in the ledger need to find if one key comes before +%% When comparing two keys in the ledger need to find if one key comes before %% the other, or if the match, which key is "better" and should be the winner key_dominates(LObj, RObj) -> strip_to_seqonly(LObj) >= strip_to_seqonly(RObj). @@ -394,27 +450,31 @@ key_dominates(LObj, RObj) -> -spec maybe_reap_expiredkey(ledger_kv(), {boolean(), integer()}) -> boolean(). %% @doc %% Make a reap decision based on the level in the ledger (needs to be expired -%% and in the basement). the level is a tuple of the is_basement boolean, and +%% and in the basement). the level is a tuple of the is_basement boolean, and %% a timestamp passed into the calling function maybe_reap_expiredkey(KV, LevelD) -> Status = strip_to_statusonly(KV), maybe_reap(Status, LevelD). maybe_reap({_, infinity}, _) -> - false; % key is not set to expire + % key is not set to expire + false; maybe_reap({_, TS}, {true, CurrTS}) when CurrTS > TS -> - true; % basement and ready to expire + % basement and ready to expire + true; maybe_reap(tomb, {true, _CurrTS}) -> - true; % always expire in basement + % always expire in basement + true; maybe_reap(_, _) -> false. -spec count_tombs( - list(ledger_kv()), non_neg_integer()) -> - non_neg_integer(). + list(ledger_kv()), non_neg_integer() +) -> + non_neg_integer(). count_tombs([], Count) -> Count; -count_tombs([{_K, V}|T], Count) when is_tuple(V) -> +count_tombs([{_K, V} | T], Count) when is_tuple(V) -> case element(2, V) of tomb -> count_tombs(T, Count + 1); @@ -422,10 +482,10 @@ count_tombs([{_K, V}|T], Count) when is_tuple(V) -> count_tombs(T, Count) end. --spec from_ledgerkey(atom(), tuple()) -> false|tuple(). +-spec from_ledgerkey(atom(), tuple()) -> false | tuple(). %% @doc -%% Return the "significant information" from the Ledger Key (normally the -%% {Bucket, Key} pair) if and only if the ExpectedTag matched the tag - +%% Return the "significant information" from the Ledger Key (normally the +%% {Bucket, Key} pair) if and only if the ExpectedTag matched the tag - %% otherwise return false from_ledgerkey(ExpectedTag, {ExpectedTag, Bucket, Key, SubKey}) -> from_ledgerkey({ExpectedTag, Bucket, Key, SubKey}); @@ -443,7 +503,8 @@ from_ledgerkey({_Tag, Bucket, Key, _SubKey}) -> {Bucket, Key}. -spec to_objectkey( - key(), single_key(), tag(), binary(), binary()) -> object_key(). + key(), single_key(), tag(), binary(), binary() +) -> object_key(). %% @doc %% Convert something into a ledger key to_objectkey(Bucket, Key, Tag, Field, Value) when Tag == ?IDX_TAG -> @@ -454,7 +515,7 @@ to_objectkey(Bucket, Key, Tag, Field, Value) when Tag == ?IDX_TAG -> (key(), single_key(), leveled_head:object_tag()) -> primary_key(); (key(), key(), tag()) -> object_key(). -else. --spec to_objectkey(key(), key()|single_key(), tag()) -> object_key(). +-spec to_objectkey(key(), key() | single_key(), tag()) -> object_key(). -endif. %% @doc %% Convert something into a ledger key @@ -464,12 +525,13 @@ to_objectkey(Bucket, Key, Tag) -> {Tag, Bucket, Key, null}. -spec to_querykey( - key(), single_key()|null, tag(), binary(), binary()) - -> query_key(). + key(), single_key() | null, tag(), binary(), binary() +) -> + query_key(). to_querykey(Bucket, Key, Tag, Field, Value) when Tag == ?IDX_TAG -> {?IDX_TAG, Bucket, {Field, Value}, Key}. --spec to_querykey(key()|null, key()|null, tag()) -> query_key(). +-spec to_querykey(key() | null, key() | null, tag()) -> query_key(). %% @doc %% Convert something into a ledger query key to_querykey(Bucket, {Key, SubKey}, Tag) -> @@ -487,21 +549,22 @@ isvalid_ledgerkey(_LK) -> false. -spec endkey_passed( - query_key()|slimmed_key(), - object_key()|slimmed_key()) -> boolean(). + query_key() | slimmed_key(), + object_key() | slimmed_key() +) -> boolean(). %% @doc %% Compare a key against a query key, only comparing elements that are non-null -%% in the Query key. -%% +%% in the Query key. +%% %% Query key of `all` matches all keys %% Query key element of `null` matches all keys less than or equal in previous %% elements -%% +%% %% This function is required to make sense of this with erlang term order, %% where otherwise atom() < binary() -%% +%% %% endkey_passed means "Query End Key has been passed when scanning this range" -%% +%% %% If the Query End Key is within the range ending in RangeEndkey then %% endkey_passed is true. This range extends beyond the end of the Query %% range, and so no further ranges need to be added to the Query results. @@ -527,26 +590,29 @@ endkey_passed(QueryEndKey, RangeEndKey) -> % required QueryEndKey < RangeEndKey. - %%%============================================================================ %%% Journal Compaction functions %%%============================================================================ -spec inker_reload_strategy(compaction_strategy()) -> compaction_strategy(). %% @doc -%% Take the default strategy for compaction, and override the approach for any +%% Take the default strategy for compaction, and override the approach for any %% tags passed in inker_reload_strategy(AltList) -> - DefaultList = - lists:map(fun leveled_head:default_reload_strategy/1, - leveled_head:defined_objecttags()), - lists:ukeymerge(1, - lists:ukeysort(1, AltList), - lists:ukeysort(1, DefaultList)). - + DefaultList = + lists:map( + fun leveled_head:default_reload_strategy/1, + leveled_head:defined_objecttags() + ), + lists:ukeymerge( + 1, + lists:ukeysort(1, AltList), + lists:ukeysort(1, DefaultList) + ). -spec get_tagstrategy( - ledger_key()|tag()|dummy, compaction_strategy()) -> compaction_method(). + ledger_key() | tag() | dummy, compaction_strategy() +) -> compaction_method(). %% @doc %% Work out the compaction strategy for the key get_tagstrategy({Tag, _, _, _}, Strategy) -> @@ -578,14 +644,16 @@ to_inkerkey(LedgerKey, SQN) -> primary_key(), non_neg_integer(), any(), - journal_keychanges(), - compression_method(), boolean()) - -> {journal_key(), binary()}. + journal_keychanges(), + compression_method(), + boolean() +) -> + {journal_key(), binary()}. %% @doc %% Convert to the correct format of a Journal key and value to_inkerkv(LedgerKey, SQN, Object, KeyChanges, PressMethod, Compress) -> InkerType = check_forinkertype(LedgerKey, Object), - Value = + Value = create_value_for_journal({Object, KeyChanges}, Compress, PressMethod), {{SQN, InkerType, LedgerKey}, Value}. @@ -613,30 +681,31 @@ from_inkerkv(Object, ToIgnoreKeyChanges) -> Object end. --spec create_value_for_journal({any(), journal_keychanges()|binary()}, - boolean(), compression_method()) -> binary(). +-spec create_value_for_journal( + {any(), journal_keychanges() | binary()}, + boolean(), + compression_method() +) -> binary(). %% @doc %% Serialise the value to be stored in the Journal -create_value_for_journal({Object, KeyChanges}, Compress, Method) - when not is_binary(KeyChanges) -> +create_value_for_journal({Object, KeyChanges}, Compress, Method) when + not is_binary(KeyChanges) +-> KeyChangeBin = term_to_binary(KeyChanges, [compressed]), create_value_for_journal({Object, KeyChangeBin}, Compress, Method); create_value_for_journal({Object, KeyChangeBin}, Compress, Method) -> KeyChangeBinLen = byte_size(KeyChangeBin), ObjectBin = serialise_object(Object, Compress, Method), TypeCode = encode_valuetype(is_binary(Object), Compress, Method), - <>. maybe_compress({null, KeyChanges}, _PressMethod) -> create_value_for_journal({null, KeyChanges}, false, native); maybe_compress(JournalBin, PressMethod) -> Length0 = byte_size(JournalBin) - 5, - <> = JournalBin, + <> = + JournalBin, {IsBinary, IsCompressed, CompMethod} = decode_valuetype(Type), case IsCompressed of true -> @@ -644,15 +713,17 @@ maybe_compress(JournalBin, PressMethod) -> false -> Length1 = Length0 - KeyChangeLength, <> = JBin0, - V0 = {deserialise_object(OBin2, IsBinary, IsCompressed, CompMethod), - binary_to_term(KCBin2)}, + V0 = { + deserialise_object(OBin2, IsBinary, IsCompressed, CompMethod), + binary_to_term(KCBin2) + }, create_value_for_journal(V0, true, PressMethod) end. serialise_object(Object, false, _Method) when is_binary(Object) -> Object; serialise_object(Object, true, Method) when is_binary(Object) -> - case Method of + case Method of lz4 -> {ok, Bin} = lz4:pack(Object), Bin; @@ -677,20 +748,22 @@ revert_value_from_journal(JournalBin) -> revert_value_from_journal(JournalBin, ToIgnoreKeyChanges) -> Length0 = byte_size(JournalBin) - 5, - <> = JournalBin, + <> = + JournalBin, {IsBinary, IsCompressed, CompMethod} = decode_valuetype(Type), Length1 = Length0 - KeyChangeLength, case ToIgnoreKeyChanges of true -> <> = JBin0, - {deserialise_object(OBin2, IsBinary, IsCompressed, CompMethod), - {[], infinity}}; + {deserialise_object(OBin2, IsBinary, IsCompressed, CompMethod), { + [], infinity + }}; false -> <> = JBin0, - {deserialise_object(OBin2, IsBinary, IsCompressed, CompMethod), - binary_to_term(KCBin2)} + { + deserialise_object(OBin2, IsBinary, IsCompressed, CompMethod), + binary_to_term(KCBin2) + } end. deserialise_object(Binary, true, true, lz4) -> @@ -705,20 +778,21 @@ deserialise_object(Binary, true, false, _) -> deserialise_object(Binary, false, _, _) -> binary_to_term(Binary). --spec encode_valuetype(boolean(), boolean(), native|lz4|zstd|none) -> 0..15. +-spec encode_valuetype(boolean(), boolean(), native | lz4 | zstd | none) -> + 0..15. %% @doc Note that IsCompressed will be based on the compression_point %% configuration option when the object is first stored (i.e. only `true` if %% this is set to `on_receipt`). On compaction this will be set to true. encode_valuetype(IsBinary, IsCompressed, Method) -> - {Bit3, Bit4} = - case Method of + {Bit3, Bit4} = + case Method of lz4 -> {4, 0}; zstd -> {4, 8}; native -> {0, 0}; none -> {0, 0} end, Bit2 = - case IsBinary of + case IsBinary of true -> 2; false -> 0 end, @@ -729,9 +803,8 @@ encode_valuetype(IsBinary, IsCompressed, Method) -> end, Bit1 + Bit2 + Bit3 + Bit4. - --spec decode_valuetype(integer()) - -> {boolean(), boolean(), compression_method()}. +-spec decode_valuetype(integer()) -> + {boolean(), boolean(), compression_method()}. %% @doc %% Check bit flags to confirm how the object has been serialised decode_valuetype(TypeInt) -> @@ -745,7 +818,7 @@ decode_valuetype(TypeInt) -> lz4; 12 -> zstd - end, + end, {IsBinary, IsCompressed, CompressionMethod}. -spec from_journalkey(journal_key()) -> {integer(), ledger_key()}. @@ -769,67 +842,79 @@ is_full_journalentry({_SQN, ?INKT_STND, _LK}) -> is_full_journalentry(_OtherJKType) -> false. - %%%============================================================================ %%% Other Ledger Functions %%%============================================================================ - --spec obj_objectspecs(list(tuple()), integer(), integer()|infinity) - -> list(ledger_kv()). +-spec obj_objectspecs(list(tuple()), integer(), integer() | infinity) -> + list(ledger_kv()). %% @doc %% Convert object specs to KV entries ready for the ledger obj_objectspecs(ObjectSpecs, SQN, TTL) -> - lists:map(fun(ObjectSpec) -> gen_headspec(ObjectSpec, SQN, TTL) end, - ObjectSpecs). + lists:map( + fun(ObjectSpec) -> gen_headspec(ObjectSpec, SQN, TTL) end, + ObjectSpecs + ). --spec idx_indexspecs(index_specs(), - any(), any(), integer(), integer()|infinity) - -> list(ledger_kv()). +-spec idx_indexspecs( + index_specs(), + any(), + any(), + integer(), + integer() | infinity +) -> + list(ledger_kv()). %% @doc %% Convert index specs to KV entries ready for the ledger idx_indexspecs(IndexSpecs, Bucket, Key, SQN, TTL) -> lists:map( - fun({IdxOp, IdxFld, IdxTrm}) -> - gen_indexspec(Bucket, Key, IdxOp, IdxFld, IdxTrm, SQN, TTL) - end, - IndexSpecs - ). + fun({IdxOp, IdxFld, IdxTrm}) -> + gen_indexspec(Bucket, Key, IdxOp, IdxFld, IdxTrm, SQN, TTL) + end, + IndexSpecs + ). gen_indexspec(Bucket, Key, IdxOp, IdxField, IdxTerm, SQN, TTL) -> Status = set_status(IdxOp, TTL), - {to_objectkey(Bucket, Key, ?IDX_TAG, IdxField, IdxTerm), - {SQN, Status, no_lookup, null}}. + { + to_objectkey(Bucket, Key, ?IDX_TAG, IdxField, IdxTerm), + {SQN, Status, no_lookup, null} + }. --spec gen_headspec(object_spec(), integer(), integer()|infinity) -> ledger_kv(). +-spec gen_headspec(object_spec(), integer(), integer() | infinity) -> + ledger_kv(). %% @doc %% Take an object_spec as passed in a book_mput, and convert it into to a %% valid ledger key and value. Supports different shaped tuples for different %% versions of the object_spec gen_headspec( - {IdxOp, v1, Bucket, Key, SubKey, LMD, Value}, SQN, TTL) - when is_binary(Key) -> - % v1 object spec + {IdxOp, v1, Bucket, Key, SubKey, LMD, Value}, SQN, TTL +) when + is_binary(Key) +-> + % v1 object spec Status = set_status(IdxOp, TTL), K = - case SubKey of + case SubKey of null -> to_objectkey(Bucket, Key, ?HEAD_TAG); SKB when is_binary(SKB) -> to_objectkey(Bucket, {Key, SKB}, ?HEAD_TAG) - end, + end, {K, {SQN, Status, segment_hash(K), Value, get_last_lastmodification(LMD)}}; gen_headspec( - {IdxOp, Bucket, Key, SubKey, Value}, SQN, TTL) - when is_binary(Key) -> + {IdxOp, Bucket, Key, SubKey, Value}, SQN, TTL +) when + is_binary(Key) +-> gen_headspec({IdxOp, v1, Bucket, Key, SubKey, undefined, Value}, SQN, TTL). - -spec return_proxy( leveled_head:object_tag(), leveled_head:object_metadata(), pid(), - journal_ref()) -> proxy_objectbin(). + journal_ref() +) -> proxy_objectbin(). %% @doc %% If the object has a value, return the metadata and a proxy through which %% the application or runner can access the value. @@ -838,40 +923,41 @@ gen_headspec( return_proxy(Tag, ObjMetadata, InkerClone, JournalRef) -> Size = leveled_head:get_size(Tag, ObjMetadata), HeadBin = leveled_head:build_head(Tag, ObjMetadata), - term_to_binary({proxy_object, - HeadBin, - Size, - {fun leveled_bookie:fetch_value/2, - InkerClone, - JournalRef}}). + term_to_binary( + {proxy_object, HeadBin, Size, { + fun leveled_bookie:fetch_value/2, InkerClone, JournalRef + }} + ). -spec set_status( - add|remove, non_neg_integer()|infinity) -> - tomb|{active, non_neg_integer()|infinity}. + add | remove, non_neg_integer() | infinity +) -> + tomb | {active, non_neg_integer() | infinity}. set_status(add, TTL) -> {active, TTL}; set_status(remove, _TTL) -> - %% TODO: timestamps for delayed reaping + %% TODO: timestamps for delayed reaping tomb. -spec generate_ledgerkv( - primary_key(), - integer(), - dynamic(), - integer(), - non_neg_integer()|infinity) -> - { - key(), - single_key(), - ledger_value_v2(), - {segment_hash(), non_neg_integer()|null}, - list(erlang:timestamp()) - }. + primary_key(), + integer(), + dynamic(), + integer(), + non_neg_integer() | infinity +) -> + { + key(), + single_key(), + ledger_value_v2(), + {segment_hash(), non_neg_integer() | null}, + list(erlang:timestamp()) + }. %% @doc %% Function to extract from an object the information necessary to populate %% the Penciller's ledger. %% Outputs - -%% Bucket - original Bucket extracted from the PrimaryKey +%% Bucket - original Bucket extracted from the PrimaryKey %% Key - original Key extracted from the PrimaryKey %% Value - the value to be used in the Ledger (essentially the extracted %% metadata) @@ -881,7 +967,11 @@ set_status(remove, _TTL) -> %% siblings) generate_ledgerkv(PrimaryKey, SQN, Obj, Size, TS) -> {Tag, Bucket, Key, _} = PrimaryKey, - Status = case Obj of delete -> tomb; _ -> {active, TS} end, + Status = + case Obj of + delete -> tomb; + _ -> {active, TS} + end, Hash = segment_hash(PrimaryKey), {MD, LastMods} = leveled_head:extract_metadata(Tag, Size, Obj), ObjHash = leveled_head:get_hash(Tag, MD), @@ -896,7 +986,8 @@ generate_ledgerkv(PrimaryKey, SQN, Obj, Size, TS) -> {Bucket, Key, Value, {Hash, ObjHash}, LastMods}. -spec get_last_lastmodification( - list(erlang:timestamp())|undefined) -> pos_integer()|undefined. + list(erlang:timestamp()) | undefined +) -> pos_integer() | undefined. %% @doc %% Get the highest of the last modifications measured in seconds. This will be %% stored as 4 bytes (unsigned) so will last for another 80 + years @@ -923,7 +1014,8 @@ get_keyandobjhash(LK, Value) -> MD = element(4, Value), case Tag of ?IDX_TAG -> - from_ledgerkey(LK); % returns {Bucket, Key, IdxValue} + % returns {Bucket, Key, IdxValue} + from_ledgerkey(LK); _ -> {Bucket, Key, leveled_head:get_hash(Tag, MD)} end. @@ -938,7 +1030,6 @@ next_key({Type, Bucket}) when is_binary(Type), is_binary(Bucket) -> true = is_binary(UpdBucket), {Type, UpdBucket}. - %%%============================================================================ %%% Test %%%============================================================================ @@ -949,10 +1040,11 @@ next_key({Type, Bucket}) when is_binary(Type), is_binary(Bucket) -> -spec convert_to_ledgerv( leveled_codec:ledger_key(), - integer(), + integer(), any(), integer(), - non_neg_integer()|infinity) -> leveled_codec:ledger_value(). + non_neg_integer() | infinity +) -> leveled_codec:ledger_value(). convert_to_ledgerv(PK, SQN, Obj, Size, TS) -> {_B, _K, MV, _H, _LMs} = leveled_codec:generate_ledgerkv(PK, SQN, Obj, Size, TS), @@ -965,22 +1057,38 @@ valid_ledgerkey_test() -> ?assertMatch(false, isvalid_ledgerkey(KeyNotTuple)), TagNotAtom = {"tag", <<"B">>, <<"K">>, null}, ?assertMatch(false, isvalid_ledgerkey(TagNotAtom)), - ?assertMatch(retain, get_tagstrategy(UserDefTag, inker_reload_strategy([]))). + ?assertMatch( + retain, get_tagstrategy(UserDefTag, inker_reload_strategy([])) + ). indexspecs_test() -> - IndexSpecs = [{add, "t1_int", 456}, - {add, "t1_bin", "adbc123"}, - {remove, "t1_bin", "abdc456"}], + IndexSpecs = [ + {add, "t1_int", 456}, + {add, "t1_bin", "adbc123"}, + {remove, "t1_bin", "abdc456"} + ], Changes = idx_indexspecs(IndexSpecs, "Bucket", "Key2", 1, infinity), - ?assertMatch({{i, "Bucket", {"t1_int", 456}, "Key2"}, - {1, {active, infinity}, no_lookup, null}}, - lists:nth(1, Changes)), - ?assertMatch({{i, "Bucket", {"t1_bin", "adbc123"}, "Key2"}, - {1, {active, infinity}, no_lookup, null}}, - lists:nth(2, Changes)), - ?assertMatch({{i, "Bucket", {"t1_bin", "abdc456"}, "Key2"}, - {1, tomb, no_lookup, null}}, - lists:nth(3, Changes)). + ?assertMatch( + { + {i, "Bucket", {"t1_int", 456}, "Key2"}, + {1, {active, infinity}, no_lookup, null} + }, + lists:nth(1, Changes) + ), + ?assertMatch( + { + {i, "Bucket", {"t1_bin", "adbc123"}, "Key2"}, + {1, {active, infinity}, no_lookup, null} + }, + lists:nth(2, Changes) + ), + ?assertMatch( + { + {i, "Bucket", {"t1_bin", "abdc456"}, "Key2"}, + {1, tomb, no_lookup, null} + }, + lists:nth(3, Changes) + ). endkey_passed_test() -> TestKey = {i, null, null, null}, @@ -989,20 +1097,23 @@ endkey_passed_test() -> ?assertMatch(false, endkey_passed(TestKey, K1)), ?assertMatch(true, endkey_passed(TestKey, K2)). - - %% Test below proved that the overhead of performing hashes was trivial %% Maybe 5 microseconds per hash hashperf_test() -> - OL = lists:map(fun(_X) -> crypto:strong_rand_bytes(8192) end, lists:seq(1, 1000)), + OL = lists:map( + fun(_X) -> crypto:strong_rand_bytes(8192) end, lists:seq(1, 1000) + ), SW = os:timestamp(), _HL = lists:map(fun(Obj) -> erlang:phash2(Obj) end, OL), - io:format(user, "1000 object hashes in ~w microseconds~n", - [timer:now_diff(os:timestamp(), SW)]). + io:format( + user, + "1000 object hashes in ~w microseconds~n", + [timer:now_diff(os:timestamp(), SW)] + ). head_segment_compare_test() -> - % Reminder to align native and parallel(leveled_ko) key stores for + % Reminder to align native and parallel(leveled_ko) key stores for % kv_index_tictactree H1 = segment_hash({?HEAD_TAG, <<"B1">>, <<"K1">>, null}), H2 = segment_hash({?RIAK_TAG, <<"B1">>, <<"K1">>, null}), @@ -1018,5 +1129,4 @@ headspec_v0v1_test() -> TTL = infinity, ?assertMatch(true, gen_headspec(V0, 1, TTL) == gen_headspec(V1, 1, TTL)). - -endif. diff --git a/src/leveled_ebloom.erl b/src/leveled_ebloom.erl index 21a8f40b..d0fac3c5 100644 --- a/src/leveled_ebloom.erl +++ b/src/leveled_ebloom.erl @@ -4,13 +4,13 @@ %% (a leveled codec type) are, used for building and checking - the filter %% splits a single hash into a 1 byte slot identifier, and 2 x 12 bit hashes %% (so k=2, although only a single hash is used). -%% +%% %% The filter is designed to support a maximum of 64K keys, larger numbers of %% keys will see higher fprs - with a 40% fpr at 250K keys. -%% +%% %% The filter uses the second "Extra Hash" part of the segment-hash to ensure %% no overlap of fpr with the leveled_sst find_pos function. -%% +%% %% The completed bloom is a binary - to minimise the cost of copying between %% processes and holding in memory. @@ -19,18 +19,18 @@ -export([ create_bloom/1, check_hash/2 - ]). +]). -define(BLOOM_SLOTSIZE_BYTES, 512). -define(INTEGER_SLICE_SIZE, 64). -define(INTEGER_SLICES, 64). - % i.e. ?INTEGER_SLICES * ?INTEGER_SLICE_SIZE = ?BLOOM_SLOTSIZE_BYTES div 8 +% i.e. ?INTEGER_SLICES * ?INTEGER_SLICE_SIZE = ?BLOOM_SLOTSIZE_BYTES div 8 -define(MASK_BSR, 6). - % i.e. 2 ^ (12 - 6) = ?INTEGER_SLICES +% i.e. 2 ^ (12 - 6) = ?INTEGER_SLICES -define(MASK_BAND, 63). - % i.e. integer slize size - 1 +% i.e. integer slize size - 1 -define(SPLIT_BAND, 4095). - % i.e. (?BLOOM_SLOTSIZE_BYTES * 8) - 1 +% i.e. (?BLOOM_SLOTSIZE_BYTES * 8) - 1 -type bloom() :: binary(). @@ -68,7 +68,7 @@ create_bloom(HashList) -> %% element of the leveled_codec:segment_hash/0 type is used - a 32-bit hash. check_hash(_Hash, <<>>) -> false; -check_hash({_SegHash, Hash}, BloomBin) when is_binary(BloomBin)-> +check_hash({_SegHash, Hash}, BloomBin) when is_binary(BloomBin) -> SlotSplit = byte_size(BloomBin) div ?BLOOM_SLOTSIZE_BYTES, {Slot, [H0, H1]} = split_hash(Hash, SlotSplit), Pos = ((Slot + 1) * ?BLOOM_SLOTSIZE_BYTES) - 1, @@ -78,29 +78,31 @@ check_hash({_SegHash, Hash}, BloomBin) when is_binary(BloomBin)-> _ -> false end. - + %%%============================================================================ %%% Internal Functions %%%============================================================================ --type slot_count() :: 0|2..128. +-type slot_count() :: 0 | 2..128. -type bloom_hash() :: 0..16#FFF. -type external_hash() :: 0..16#FFFFFFFF. -spec map_hashes( - list(leveled_codec:segment_hash()), tuple(), slot_count()) -> tuple(). -map_hashes([], HashListTuple, _SlotCount) -> + list(leveled_codec:segment_hash()), tuple(), slot_count() +) -> tuple(). +map_hashes([], HashListTuple, _SlotCount) -> HashListTuple; -map_hashes([{_SH, EH}|Rest], HashListTuple, SlotCount) -> +map_hashes([{_SH, EH} | Rest], HashListTuple, SlotCount) -> {Slot, [H0, H1]} = split_hash(EH, SlotCount), SlotHL = element(Slot + 1, HashListTuple), map_hashes( Rest, setelement(Slot + 1, HashListTuple, [H0, H1 | SlotHL]), - SlotCount). + SlotCount + ). --spec split_hash(external_hash(), slot_count()) - -> {non_neg_integer(), [bloom_hash()]}. +-spec split_hash(external_hash(), slot_count()) -> + {non_neg_integer(), [bloom_hash()]}. split_hash(Hash, SlotSplit) -> Slot = (Hash band 255) rem SlotSplit, H0 = (Hash bsr 8) band ?SPLIT_BAND, @@ -121,7 +123,8 @@ build_bloom(SlotHashes, SlotCount) when SlotCount > 0 -> HashList = element(I, SlotHashes), SlotBin = add_hashlist( - lists:usort(HashList), 0, 1, ?INTEGER_SLICES, <<>>), + lists:usort(HashList), 0, 1, ?INTEGER_SLICES, <<>> + ), <> end, <<>>, @@ -129,11 +132,12 @@ build_bloom(SlotHashes, SlotCount) when SlotCount > 0 -> ). -spec add_hashlist( - list(bloom_hash()), - non_neg_integer(), - non_neg_integer(), - 0..?INTEGER_SLICES, - binary()) -> bloom(). + list(bloom_hash()), + non_neg_integer(), + non_neg_integer(), + 0..?INTEGER_SLICES, + binary() +) -> bloom(). add_hashlist([], ThisSlice, SliceCount, SliceCount, AccBin) -> <>; add_hashlist([], ThisSlice, SliceNumber, SliceCount, AccBin) -> @@ -142,19 +146,23 @@ add_hashlist([], ThisSlice, SliceNumber, SliceCount, AccBin) -> 0, SliceNumber + 1, SliceCount, - <>); -add_hashlist([H0|Rest], ThisSlice, SliceNumber, SliceCount, AccBin) - when ((H0 bsr ?MASK_BSR) + 1) == SliceNumber -> + <> + ); +add_hashlist([H0 | Rest], ThisSlice, SliceNumber, SliceCount, AccBin) when + ((H0 bsr ?MASK_BSR) + 1) == SliceNumber +-> Mask0 = 1 bsl (H0 band (?MASK_BAND)), add_hashlist( - Rest, ThisSlice bor Mask0, SliceNumber, SliceCount, AccBin); + Rest, ThisSlice bor Mask0, SliceNumber, SliceCount, AccBin + ); add_hashlist(Rest, ThisSlice, SliceNumber, SliceCount, AccBin) -> add_hashlist( Rest, 0, SliceNumber + 1, SliceCount, - <>). + <> + ). %%%============================================================================ %%% Test @@ -184,7 +192,8 @@ generate_orderedkeys(Seqn, Count, Acc, BucketLow, BucketHigh) -> Chunk = crypto:strong_rand_bytes(16), MV = leveled_codec:convert_to_ledgerv(LK, Seqn, Chunk, 64, infinity), generate_orderedkeys( - Seqn + 1, Count - 1, [{LK, MV}|Acc], BucketLow, BucketHigh). + Seqn + 1, Count - 1, [{LK, MV} | Acc], BucketLow, BucketHigh + ). get_hashlist(N) -> KVL = generate_orderedkeys(1, N, 1, 20), @@ -200,7 +209,7 @@ check_all_hashes(BloomBin, HashList) -> ?assertMatch(true, check_hash(Hash, BloomBin)) end, lists:foreach(CheckFun, HashList). - + check_neg_hashes(BloomBin, HashList, Counters) -> CheckFun = fun(Hash, {AccT, AccF}) -> @@ -216,7 +225,8 @@ check_neg_hashes(BloomBin, HashList, Counters) -> empty_bloom_test() -> BloomBin0 = create_bloom([]), ?assertMatch( - {0, 4}, check_neg_hashes(BloomBin0, [0, 10, 100, 100000], {0, 0})). + {0, 4}, check_neg_hashes(BloomBin0, [0, 10, 100, 100000], {0, 0}) + ). bloom_test_() -> {timeout, 120, fun bloom_test_ranges/0}. @@ -234,18 +244,18 @@ bloom_test_ranges() -> test_bloom(1000, 4). test_bloom(N, Runs) -> - ListOfHashLists = + ListOfHashLists = lists:map(fun(_X) -> get_hashlist(N * 2) end, lists:seq(1, Runs)), SpliListFun = - fun(HashList) -> - HitOrMissFun = - fun (Entry, {HitL, MissL}) -> + fun(HashList) -> + HitOrMissFun = + fun(Entry, {HitL, MissL}) -> case rand:uniform() < 0.5 of - true -> - {[Entry|HitL], MissL}; + true -> + {[Entry | HitL], MissL}; false -> - {HitL, [Entry|MissL]} - end + {HitL, [Entry | MissL]} + end end, lists:foldl(HitOrMissFun, {[], []}, HashList) end, @@ -254,9 +264,10 @@ test_bloom(N, Runs) -> SWa = os:timestamp(), ListOfBlooms = lists:map( - fun({HL, _ML}) -> create_bloom(HL) end, SplitListOfHashLists), - TSa = timer:now_diff(os:timestamp(), SWa)/Runs, - + fun({HL, _ML}) -> create_bloom(HL) end, SplitListOfHashLists + ), + TSa = timer:now_diff(os:timestamp(), SWa) / Runs, + SWb = os:timestamp(), PosChecks = lists:foldl( @@ -267,11 +278,12 @@ test_bloom(N, Runs) -> ChecksMade + length(HL) end, 0, - lists:seq(1, Runs)), + lists:seq(1, Runs) + ), TSb = timer:now_diff(os:timestamp(), SWb), SWc = os:timestamp(), - {Pos, Neg} = + {Pos, Neg} = lists:foldl( fun(Nth, Acc) -> {_HL, ML} = lists:nth(Nth, SplitListOfHashLists), @@ -279,7 +291,8 @@ test_bloom(N, Runs) -> check_neg_hashes(BB, ML, Acc) end, {0, 0}, - lists:seq(1, Runs)), + lists:seq(1, Runs) + ), FPR = Pos / (Pos + Neg), TSc = timer:now_diff(os:timestamp(), SWc), @@ -289,10 +302,10 @@ test_bloom(N, Runs) -> io:format( user, "Test with size ~w has microsecond timings: - " - "build in ~w then ~.3f per pos-check, ~.3f per neg-check, " - "fpr ~.3f with bytes-per-key ~.3f~n", - [N, round(TSa), TSb / PosChecks, TSc / (Pos + Neg), FPR, BytesPerKey]). - + "build in ~w then ~.3f per pos-check, ~.3f per neg-check, " + "fpr ~.3f with bytes-per-key ~.3f~n", + [N, round(TSa), TSb / PosChecks, TSc / (Pos + Neg), FPR, BytesPerKey] + ). split_builder_speed_test_() -> {timeout, 60, fun split_builder_speed_tester/0}. @@ -300,7 +313,7 @@ split_builder_speed_test_() -> split_builder_speed_tester() -> N = 40000, Runs = 50, - ListOfHashLists = + ListOfHashLists = lists:map(fun(_X) -> get_hashlist(N * 2) end, lists:seq(1, Runs)), Timings = @@ -326,6 +339,5 @@ split_builder_speed_tester() -> "Total time in microseconds for map_hashlist ~w build_bloom ~w~n", [lists:sum(MTs), lists:sum(BTs)] ). - -endif. diff --git a/src/leveled_eval.erl b/src/leveled_eval.erl index 7186055b..fbe86b39 100644 --- a/src/leveled_eval.erl +++ b/src/leveled_eval.erl @@ -1,6 +1,6 @@ %% -------- Eval Functions --------- %% -%% Support for different eval expressions within leveled +%% Support for different eval expressions within leveled %% -module(leveled_eval). @@ -13,7 +13,8 @@ -spec generate_eval_function( string(), - map()) -> fun((binary(), binary()) -> map())|{error, term()}. + map() +) -> fun((binary(), binary()) -> map()) | {error, term()}. generate_eval_function(EvalString, Substitutions) -> try {ok, ParsedEval} = generate_eval_expression(EvalString, Substitutions), @@ -45,9 +46,14 @@ apply_eval({eval, Eval}, Term, Key, AttrMap) -> apply_eval(Eval, Term, Key, AttrMap); apply_eval({'PIPE', Eval1, 'INTO', Eval2}, Term, Key, AttrMap) -> apply_eval(Eval2, Term, Key, apply_eval(Eval1, Term, Key, AttrMap)); -apply_eval({ - delim, {identifier, _, InKey}, {string, _, Delim}, ExpKeys}, - Term, Key, AttrMap) -> +apply_eval( + { + delim, {identifier, _, InKey}, {string, _, Delim}, ExpKeys + }, + Term, + Key, + AttrMap +) -> case term_to_process(InKey, Term, Key, AttrMap) of TermToSplit when is_binary(TermToSplit) -> CP = @@ -64,8 +70,11 @@ apply_eval({ AttrMap end; apply_eval( - {join, InKeys, {string, _, Delim}, {identifier, _, OutKey}}, - _Term, _Key, AttrMap) -> + {join, InKeys, {string, _, Delim}, {identifier, _, OutKey}}, + _Term, + _Key, + AttrMap +) -> NewTerm = unicode:characters_to_binary( lists:join( @@ -81,13 +90,16 @@ apply_eval( ), maps:put(OutKey, NewTerm, AttrMap); apply_eval( - { - split, - {identifier, _, InKey}, - {string, _, Splitter}, - {identifier, _, OutKey} - }, - Term, Key, AttrMap) -> + { + split, + {identifier, _, InKey}, + {string, _, Splitter}, + {identifier, _, OutKey} + }, + Term, + Key, + AttrMap +) -> case term_to_process(InKey, Term, Key, AttrMap) of TermToSplit when is_binary(TermToSplit) -> CP = @@ -98,15 +110,18 @@ apply_eval( NewSplitCP; SplitCP -> SplitCP - end, + end, TermList = binary:split(TermToSplit, CP, [global, trim_all]), maps:put(OutKey, TermList, AttrMap); _ -> AttrMap end; apply_eval( - {slice, {identifier, _, InKey}, WidthAttr, {identifier, _, OutKey}}, - Term, Key, AttrMap) -> + {slice, {identifier, _, InKey}, WidthAttr, {identifier, _, OutKey}}, + Term, + Key, + AttrMap +) -> Width = element(3, WidthAttr), case term_to_process(InKey, Term, Key, AttrMap) of TermToSlice when is_binary(TermToSlice) -> @@ -116,17 +131,20 @@ apply_eval( fun(S) -> string:slice(TermToSlice, S, Width) end, lists:map( fun(I) -> Width * I end, - lists:seq(0, TermCount - 1))), + lists:seq(0, TermCount - 1) + ) + ), maps:put(OutKey, TermList, AttrMap); _ -> AttrMap end; apply_eval( - {index, - {identifier, _, InKey}, - StartAtr, LengthAttr, - {identifier, _, OutKey}}, - Term, Key, AttrMap) -> + {index, {identifier, _, InKey}, StartAtr, LengthAttr, + {identifier, _, OutKey}}, + Term, + Key, + AttrMap +) -> Start = element(3, StartAtr), Length = element(3, LengthAttr), case term_to_process(InKey, Term, Key, AttrMap) of @@ -145,10 +163,12 @@ apply_eval( AttrMap end; apply_eval( - {kvsplit, - {identifier, _, InKey}, - {string, _, DelimPair}, {string, _, DelimKV}}, - Term, Key, AttrMap) -> + {kvsplit, {identifier, _, InKey}, {string, _, DelimPair}, + {string, _, DelimKV}}, + Term, + Key, + AttrMap +) -> case term_to_process(InKey, Term, Key, AttrMap) of TermToSplit when is_binary(TermToSplit) -> lists:foldl( @@ -167,8 +187,11 @@ apply_eval( AttrMap end; apply_eval( - {to_integer, {identifier, _, InKey}, {identifier, _, OutKey}}, - Term, Key, AttrMap) -> + {to_integer, {identifier, _, InKey}, {identifier, _, OutKey}}, + Term, + Key, + AttrMap +) -> case term_to_process(InKey, Term, Key, AttrMap) of TermToConvert when is_binary(TermToConvert) -> case string:to_integer(TermToConvert) of @@ -183,8 +206,11 @@ apply_eval( AttrMap end; apply_eval( - {to_string, {identifier, _, InKey}, {identifier, _, OutKey}}, - Term, Key, AttrMap) -> + {to_string, {identifier, _, InKey}, {identifier, _, OutKey}}, + Term, + Key, + AttrMap +) -> case term_to_process(InKey, Term, Key, AttrMap) of TermToConvert when is_integer(TermToConvert) -> maps:put( @@ -198,8 +224,11 @@ apply_eval( AttrMap end; apply_eval( - {map, InID, Comparator, MapList, Default, OutID}, - Term, Key, AttrMap) -> + {map, InID, Comparator, MapList, Default, OutID}, + Term, + Key, + AttrMap +) -> {identifier, _, InKey} = InID, {identifier, _, OutKey} = OutID, TermToCompare = term_to_process(InKey, Term, Key, AttrMap), @@ -207,13 +236,17 @@ apply_eval( case lists:dropwhile(F, MapList) of [] -> maps:put(OutKey, element(3, Default), AttrMap); - [{mapping, _T, Assignment}|_Rest] -> + [{mapping, _T, Assignment} | _Rest] -> maps:put(OutKey, element(3, Assignment), AttrMap) end; apply_eval( - {MathOp, OperandX, OperandY, {identifier, _, OutKey}}, - _Term, _Key, AttrMap) - when MathOp == add; MathOp == subtract -> + {MathOp, OperandX, OperandY, {identifier, _, OutKey}}, + _Term, + _Key, + AttrMap +) when + MathOp == add; MathOp == subtract +-> X = maybe_fetch_operand(OperandX, AttrMap), Y = maybe_fetch_operand(OperandY, AttrMap), case MathOp of @@ -225,12 +258,15 @@ apply_eval( AttrMap end; apply_eval( - {regex, {identifier, _, InKey}, CompiledRE, ExpKeys}, - Term, Key, AttrMap) -> + {regex, {identifier, _, InKey}, CompiledRE, ExpKeys}, + Term, + Key, + AttrMap +) -> ExpectedKeyLength = length(ExpKeys), Opts = [{capture, all_but_first, binary}], case term_to_process(InKey, Term, Key, AttrMap) of - TermToCapture when is_binary(TermToCapture)-> + TermToCapture when is_binary(TermToCapture) -> case leveled_util:regex_run(TermToCapture, CompiledRE, Opts) of {match, CptTerms} when length(CptTerms) == ExpectedKeyLength -> CptMap = maps:from_list(lists:zip(ExpKeys, CptTerms)), @@ -266,9 +302,9 @@ reverse_compare_mapping('=', Term) -> fun({mapping, T, _A}) -> Term =/= element(3, T) end. -spec delim(binary(), binary:cp(), map(), list(string())) -> map(). -delim(_Rem, _CP, AttrMap, []) -> +delim(_Rem, _CP, AttrMap, []) -> AttrMap; -delim(Term, CP, AttrMap, [Key|Rest]) -> +delim(Term, CP, AttrMap, [Key | Rest]) -> case binary:match(Term, CP) of nomatch -> maps:put(Key, Term, AttrMap); @@ -306,9 +342,9 @@ delim_test() -> CompiledDelim = compile_delim(Delim), Result1 = delim( - Term1, + Term1, CompiledDelim, - #{}, + #{}, ["$fn", "$dob", "$dod", "$gns", "$pcs"] ), ExpMap1 = @@ -327,9 +363,9 @@ delim_test() -> Term2 = <<"SOMEONE|19901223|20240405|TedBob">>, Result2 = delim( - Term2, + Term2, CompiledDelim, - #{}, + #{}, ["$fn", "$dob", "$dod", "$gns", "$pcs"] ), ExpMap2 = @@ -347,9 +383,9 @@ delim_test() -> Term3 = <<"SOMEONE|19901223||TedBob">>, Result3 = delim( - Term3, - CompiledDelim, - #{}, + Term3, + CompiledDelim, + #{}, ["$fn", "$dob", "$dod", "$gns", "$pcs"] ), ExpMap3 = @@ -366,16 +402,15 @@ delim_test() -> Term4 = <<"SOMEONE|19901223|20240405|TedBob|LS1_4BT|">>, Result4 = delim( - Term4, + Term4, CompiledDelim, - #{}, + #{}, ["$fn", "$dob", "$dod", "$gns", "$pcs"] ), ?assertMatch( ExpResult1, lists:sort(maps:to_list(Result4)) - ) - . + ). basic_compile_pattern_test() -> % Check nothing happens unexpected with caching in process dictionary @@ -399,7 +434,7 @@ basic_compile_pattern_test() -> #{<<"delim1">> => <<"#">>, <<"delim2">> => <<"|">>} ), true = is_function(Fun2, 2), - + M2 = Fun2(T2, <<"K1">>), GNL2 = maps:get(<<"gnl">>, M2), ?assertMatch([<<"Ted">>, <<"Bob">>], GNL2), @@ -408,9 +443,7 @@ basic_compile_pattern_test() -> ?assertMatch([<<"Ted">>, <<"Bob">>], GNL3), M4 = Fun1(T1, <<"K1">>), GNL4 = maps:get(<<"gnl">>, M4), - ?assertMatch([<<"Ted">>, <<"Bob">>], GNL4) - . - + ?assertMatch([<<"Ted">>, <<"Bob">>], GNL4). parse_error_test() -> Q1 = "delm($term, \"|\", ($fn, $dob, $dod, $gns, $pcs))", @@ -421,7 +454,7 @@ parse_error_test() -> ?assertMatch({error, _E2}, generate_eval_function(Q2, maps:new())), ?assertMatch({error, _E3}, generate_eval_function(Q3, maps:new())), ?assertMatch({error, _E4}, generate_eval_function(Q4, maps:new())), - + Q5 = "begins_with($fn, :prefix)", ?assertMatch( {error, _E5A}, @@ -432,11 +465,10 @@ parse_error_test() -> generate_eval_function(Q5, #{<<"prefx">> => <<"ÅßE"/utf8>>}) ). - basic_test() -> EvalString1 = "delim($term, \"|\", ($fn, $dob, $dod, $gns, $pcs))", EvalString2 = "delim($gns, \"#\", ($gn1, $gn2, $gn3))", - + EvalString3 = EvalString1 ++ " | " ++ EvalString2, {ok, Tokens3, _EndLine3} = leveled_evallexer:string(EvalString3), {ok, ParsedExp3} = leveled_evalparser:parse(Tokens3), @@ -456,7 +488,6 @@ basic_test() -> ?assertMatch(<<"Mia">>, maps:get(<<"gn2">>, EvalOut3)), ?assertNot(maps:is_key(<<"gn3">>, EvalOut3)), - EvalString4 = EvalString3 ++ " | join(($dob, $fn), \"|\", $dobfn)", {ok, Tokens4, _EndLine4} = leveled_evallexer:string(EvalString4), {ok, ParsedExp4} = leveled_evalparser:parse(Tokens4), @@ -477,8 +508,8 @@ basic_test() -> ?assertNot(maps:is_key(<<"gn3">>, EvalOut4)), ?assertMatch(<<"19861216|SMITH">>, maps:get(<<"dobfn">>, EvalOut4)), - - EvalString5 = EvalString4 ++ " | index($dob, 0, 4, $yob) | to_integer($yob, $yob)", + EvalString5 = + EvalString4 ++ " | index($dob, 0, 4, $yob) | to_integer($yob, $yob)", {ok, Tokens5, _EndLine5} = leveled_evallexer:string(EvalString5), {ok, ParsedExp5} = leveled_evalparser:parse(Tokens5), EvalOut5 = @@ -542,7 +573,9 @@ basic_test() -> ?assertMatch(<<"19861216">>, maps:get(<<"dob">>, EvalOut8)), ?assertMatch(undefined, maps:get(<<"dod">>, EvalOut8, undefined)), ?assertMatch(<<"LS1 4BT#LS8 1ZZ">>, maps:get(<<"pcs">>, EvalOut8)), - ?assertMatch([<<"Willow">>, <<"Mia">>, <<"Vera">>], maps:get(<<"gns">>, EvalOut8)), + ?assertMatch( + [<<"Willow">>, <<"Mia">>, <<"Vera">>], maps:get(<<"gns">>, EvalOut8) + ), EvalString9 = "delim($term, \"|\", ($name, $height, $weight, $pick)) |" @@ -564,123 +597,123 @@ basic_test() -> ?assertMatch(224, maps:get(<<"height">>, EvalOut9)), ?assertMatch(95, maps:get(<<"weight">>, EvalOut9)), ?assertMatch(<<"#1">>, maps:get(<<"pick">>, EvalOut9)), - % Not changes as not starting with integer + % Not changes as not starting with integer ?assertMatch(<<"SPURS">>, maps:get(<<"team">>, EvalOut9)), ?assertMatch(<<"00001">>, maps:get(<<"number">>, EvalOut9)), ?assertNot(maps:is_key(<<"doh">>, EvalOut9)), %% Age at 30 April 2024 EvalString10 = - EvalString5 ++ - " | index($dob, 4, 4, $birthday)" - " | map($birthday, <=, ((\"0430\", 2024)), 2023, $yoc)" - " | subtract($yoc, $yob, $age)" - " | add($age, 1, $age_next)" - " | to_string($age, $age)" - , + EvalString5 ++ + " | index($dob, 4, 4, $birthday)" + " | map($birthday, <=, ((\"0430\", 2024)), 2023, $yoc)" + " | subtract($yoc, $yob, $age)" + " | add($age, 1, $age_next)" + " | to_string($age, $age)", + {ok, Tokens10, _EndLine10} = leveled_evallexer:string(EvalString10), {ok, ParsedExp10} = leveled_evalparser:parse(Tokens10), EvalOut10A = apply_eval( - ParsedExp10, - <<"SMITH|19861216||Willow#Mia#Vera|LS1 4BT#LS8 1ZZ">>, - <<"9000000001">>, - maps:new() - ), + ParsedExp10, + <<"SMITH|19861216||Willow#Mia#Vera|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), ?assertMatch(<<"37">>, maps:get(<<"age">>, EvalOut10A)), ?assertMatch(38, maps:get(<<"age_next">>, EvalOut10A)), EvalOut10B = apply_eval( - ParsedExp10, - <<"SMITH|19860216||Willow#Mia#Vera|LS1 4BT#LS8 1ZZ">>, - <<"9000000001">>, - maps:new() - ), + ParsedExp10, + <<"SMITH|19860216||Willow#Mia#Vera|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), ?assertMatch(<<"38">>, maps:get(<<"age">>, EvalOut10B)), EvalString10F = - EvalString1 ++ - " | index($dob, 0, 4, $yob)" - " | index($dob, 4, 4, $birthday)" - " | map($birthday, <=, ((\"0430\", 2024)), 2023, $yoc)" - " | subtract($yoc, $yob, $age)" + EvalString1 ++ + " | index($dob, 0, 4, $yob)" + " | index($dob, 4, 4, $birthday)" + " | map($birthday, <=, ((\"0430\", 2024)), 2023, $yoc)" + " | subtract($yoc, $yob, $age)" % yob has not been converted to an integer, % so the age will not be set - " | to_string($age, $age)" - , + " | to_string($age, $age)", + {ok, Tokens10F, _EndLine10F} = leveled_evallexer:string(EvalString10F), {ok, ParsedExp10F} = leveled_evalparser:parse(Tokens10F), EvalOut10F = apply_eval( - ParsedExp10F, - <<"SMITH|19861216||Willow#Mia#Vera|LS1 4BT#LS8 1ZZ">>, - <<"9000000001">>, - maps:new() - ), + ParsedExp10F, + <<"SMITH|19861216||Willow#Mia#Vera|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), ?assertNot(maps:is_key(<<"age">>, EvalOut10F)), EvalString11A = EvalString1 ++ " | map($dob, <, " - "((\"1946\", \"Silent\"), (\"1966\", \"Boomer\")," - "(\"1980\", \"GenX\"), (\"1997\", \"Millenial\")), \"GenZ\"," - " $generation)", + "((\"1946\", \"Silent\"), (\"1966\", \"Boomer\")," + "(\"1980\", \"GenX\"), (\"1997\", \"Millenial\")), \"GenZ\"," + " $generation)", {ok, Tokens11A, _EndLine11A} = leveled_evallexer:string(EvalString11A), {ok, ParsedExp11A} = leveled_evalparser:parse(Tokens11A), EvalOut11A = apply_eval( - ParsedExp11A, - <<"SMITH|19861216||Willow#Mia#Vera|LS1 4BT#LS8 1ZZ">>, - <<"9000000001">>, - maps:new() - ), + ParsedExp11A, + <<"SMITH|19861216||Willow#Mia#Vera|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), ?assertMatch(<<"Millenial">>, maps:get(<<"generation">>, EvalOut11A)), EvalString11B = EvalString1 ++ " | map($dob, <=, " - "((\"1945\", \"Silent\"), (\"1965\", \"Boomer\")," - "(\"1979\", \"GenX\"), (\"1996\", \"Millenial\")), \"GenZ\"," - " $generation)", + "((\"1945\", \"Silent\"), (\"1965\", \"Boomer\")," + "(\"1979\", \"GenX\"), (\"1996\", \"Millenial\")), \"GenZ\"," + " $generation)", {ok, Tokens11B, _EndLine11B} = leveled_evallexer:string(EvalString11B), {ok, ParsedExp11B} = leveled_evalparser:parse(Tokens11B), EvalOut11B = apply_eval( - ParsedExp11B, - <<"SMITH|19861216||Willow#Mia#Vera|LS1 4BT#LS8 1ZZ">>, - <<"9000000001">>, - maps:new() - ), + ParsedExp11B, + <<"SMITH|19861216||Willow#Mia#Vera|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), ?assertMatch(<<"Millenial">>, maps:get(<<"generation">>, EvalOut11B)), EvalString11C = EvalString1 ++ " | map($dob, >, " - "((\"1996\", \"GenZ\"), (\"1979\", \"Millenial\")," - "(\"1965\", \"GenX\"), (\"1945\", \"Boomer\")), \"Silent\"," - " $generation)", + "((\"1996\", \"GenZ\"), (\"1979\", \"Millenial\")," + "(\"1965\", \"GenX\"), (\"1945\", \"Boomer\")), \"Silent\"," + " $generation)", {ok, Tokens11C, _EndLine11C} = leveled_evallexer:string(EvalString11C), {ok, ParsedExp11C} = leveled_evalparser:parse(Tokens11C), EvalOut11C = apply_eval( - ParsedExp11C, - <<"SMITH|19861216||Willow#Mia#Vera|LS1 4BT#LS8 1ZZ">>, - <<"9000000001">>, - maps:new() - ), + ParsedExp11C, + <<"SMITH|19861216||Willow#Mia#Vera|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), ?assertMatch(<<"Millenial">>, maps:get(<<"generation">>, EvalOut11C)), EvalString11D = EvalString1 ++ " | map($dob, >=, " - "((\"1997\", \"GenZ\"), (\"1980\", \"Millenial\")," - "(\"1966\", \"GenX\"), (\"1946\", \"Boomer\")), \"Silent\"," - " $generation)", + "((\"1997\", \"GenZ\"), (\"1980\", \"Millenial\")," + "(\"1966\", \"GenX\"), (\"1946\", \"Boomer\")), \"Silent\"," + " $generation)", {ok, Tokens11D, _EndLine11D} = leveled_evallexer:string(EvalString11D), {ok, ParsedExp11D} = leveled_evalparser:parse(Tokens11D), EvalOut11D = apply_eval( - ParsedExp11D, - <<"SMITH|19861216||Willow#Mia#Vera|LS1 4BT#LS8 1ZZ">>, - <<"9000000001">>, - maps:new() - ), + ParsedExp11D, + <<"SMITH|19861216||Willow#Mia#Vera|LS1 4BT#LS8 1ZZ">>, + <<"9000000001">>, + maps:new() + ), ?assertMatch(<<"Millenial">>, maps:get(<<"generation">>, EvalOut11D)), EvalString12 = @@ -694,17 +727,17 @@ basic_test() -> " add($DEBUG, $INFO, $TOTAL) |" " add($TOTAL, $WARN, $TOTAL) |" " add($TOTAL, $ERROR, $TOTAL) |" - " add($TOTAL, $CRITICAL, $TOTAL)" - , + " add($TOTAL, $CRITICAL, $TOTAL)", + {ok, Tokens12, _EndLine12} = leveled_evallexer:string(EvalString12), {ok, ParsedExp12} = leveled_evalparser:parse(Tokens12), EvalOut12 = apply_eval( - ParsedExp12, - <<"063881703147|DEBUG=804|INFO=186|WARN=10">>, - <<"ABC1233">>, - maps:new() - ), + ParsedExp12, + <<"063881703147|DEBUG=804|INFO=186|WARN=10">>, + <<"ABC1233">>, + maps:new() + ), ?assertMatch(63881703147, maps:get(<<"ts">>, EvalOut12)), ?assertMatch(1000, maps:get(<<"TOTAL">>, EvalOut12)), ?assertNot(maps:is_key(<<"CRITICAL">>, EvalOut12)), @@ -712,9 +745,9 @@ basic_test() -> EvalString13 = "kvsplit($term, \"|\", \":\") |" " map($cup_year, =, " - "((\"1965\", \"bad\"), (\"1970\", \"bad\"), " - "(\"1972\", \"good\"), (\"1974\", \"bad\")), " - "\"indifferent\", $cup_happy) ", + "((\"1965\", \"bad\"), (\"1970\", \"bad\"), " + "(\"1972\", \"good\"), (\"1974\", \"bad\")), " + "\"indifferent\", $cup_happy) ", {ok, Tokens13, _EndLine13} = leveled_evallexer:string(EvalString13), {ok, ParsedExp13} = leveled_evalparser:parse(Tokens13), EvalOut13A = @@ -739,13 +772,12 @@ basic_test() -> check_regex_eval( "regex($term, :regex, ($fn, $dob, $dod, $gns, $pcs))", ExtractRegex - ) - . + ). unicode_test() -> EvalString1 = "delim($term, \"|\", ($fn, $dob, $dod, $gns, $pcs))", EvalString2 = "delim($gns, \"#\", ($gn1, $gn2, $gn3))", - + EvalString3 = EvalString1 ++ " | " ++ EvalString2, {ok, Tokens3, _EndLine3} = leveled_evallexer:string(EvalString3), {ok, ParsedExp3} = leveled_evalparser:parse(Tokens3), @@ -754,14 +786,14 @@ unicode_test() -> apply_eval( ParsedExp3, <<"ÅßERG|19861216||Willow#Mia|LS1 4BT#LS8 1ZZ"/utf8>>, - % Note index terms will have to be unicode_binary() type - % for this to work a latin-1 binary of - % <<"ÅßERG|19861216||Willow#Mia|LS1 4BT#LS8 1ZZ">> will fail to - % match - use unicode:characters_to_binary(B, latin1, utf8) to - % convert + % Note index terms will have to be unicode_binary() type + % for this to work a latin-1 binary of + % <<"ÅßERG|19861216||Willow#Mia|LS1 4BT#LS8 1ZZ">> will fail to + % match - use unicode:characters_to_binary(B, latin1, utf8) to + % convert <<"9000000001">>, maps:new() - ), + ), ?assertMatch(<<"ÅßERG"/utf8>>, maps:get(<<"fn">>, EvalOutUnicode0)), FE19 = "begins_with($fn, :prefix)", {ok, Filter19} = @@ -775,7 +807,7 @@ unicode_test() -> EvalOutUnicode0 ) ), - + EvalString4 = EvalString1 ++ "| slice($gns, 2, $gns)", {ok, Tokens4, _EndLine4} = leveled_evallexer:string(EvalString4), {ok, ParsedExp4} = leveled_evalparser:parse(Tokens4), @@ -785,7 +817,7 @@ unicode_test() -> <<"ÅßERG|19861216||Åbß0Ca|LS1 4BT#LS8 1ZZ"/utf8>>, <<"9000000001">>, maps:new() - ), + ), FE20 = ":gsc_check IN $gns", {ok, Filter20} = leveled_filter:generate_filter_expression( @@ -830,15 +862,13 @@ unicode_test() -> Filter23, EvalOutUnicode1 ) - ) - . - + ). check_regex_eval(EvalString14, ExtractRegex) -> {ok, ParsedExp14} = generate_eval_expression( EvalString14, - #{<<"regex">> => list_to_binary(ExtractRegex)} + #{<<"regex">> => list_to_binary(ExtractRegex)} ), EvalOut14 = apply_eval( @@ -871,11 +901,11 @@ bad_type_test() -> ?assertMatch(224, maps:get(<<"height">>, EvalOut9)), ?assertMatch(95, maps:get(<<"weight">>, EvalOut9)), ?assertMatch(<<"#1">>, maps:get(<<"pick">>, EvalOut9)), - % Not changes as not starting with integer + % Not changes as not starting with integer ?assertMatch(<<"SPURS">>, maps:get(<<"team">>, EvalOut9)), ?assertMatch(<<"00001">>, maps:get(<<"number">>, EvalOut9)), ?assertNot(maps:is_key(<<"doh">>, EvalOut9)), - + EvalStringF1 = EvalString9 ++ " | delim($height, \"|\", ($foo, $bar))", {ok, TokensF1, _EndLineF1} = leveled_evallexer:string(EvalStringF1), {ok, ParsedExpF1} = leveled_evalparser:parse(TokensF1), @@ -889,7 +919,7 @@ bad_type_test() -> ?assertNot(maps:is_key(<<"foo">>, EvalOutF1)), ?assertNot(maps:is_key(<<"bar">>, EvalOutF1)), ?assertMatch(224, maps:get(<<"height">>, EvalOutF1)), - + EvalStringF2 = EvalString9 ++ " | split($height, \"|\", $foo)", {ok, TokensF2, _EndLineF2} = leveled_evallexer:string(EvalStringF2), {ok, ParsedExpF2} = leveled_evalparser:parse(TokensF2), @@ -970,9 +1000,9 @@ bad_type_test() -> EvalStringF8 = EvalString9 ++ - " | regex($height, :regex, ($height_int)) |" - " to_integer($height_int, $height_int)", - + " | regex($height, :regex, ($height_int)) |" + " to_integer($height_int, $height_int)", + {ok, ParsedExpF8} = generate_eval_expression( EvalStringF8, @@ -989,10 +1019,10 @@ bad_type_test() -> EvalStringF9 = EvalString9 ++ - " | to_string($height, $height)" - " | regex($height, :regex, ($height_int)) |" - " to_integer($height_int, $height_int)", - + " | to_string($height, $height)" + " | regex($height, :regex, ($height_int)) |" + " to_integer($height_int, $height_int)", + {ok, ParsedExpF9} = generate_eval_expression( EvalStringF9, @@ -1005,17 +1035,15 @@ bad_type_test() -> <<"SPURS|00001">>, maps:new() ), - ?assertMatch(224, maps:get(<<"height_int">>, EvalOutF9)) - . - + ?assertMatch(224, maps:get(<<"height_int">>, EvalOutF9)). generate_test() -> EvalString13 = "kvsplit($term, \"|\", \":\") |" " map($cup_year, =, " - "((\"1965\", \"bad\"), (\"1970\", \"bad\"), " - "(:clarke, \"good\"), (\"1974\", \"bad\")), " - "\"indifferent\", $cup_happy) ", + "((\"1965\", \"bad\"), (\"1970\", \"bad\"), " + "(:clarke, \"good\"), (\"1974\", \"bad\")), " + "\"indifferent\", $cup_happy) ", {ok, ParsedExp13} = generate_eval_expression(EvalString13, #{<<"clarke">> => <<"1972">>}), EvalOut13A = @@ -1026,4 +1054,4 @@ generate_test() -> generate_eval_expression(EvalString13, maps:new()) ). --endif. \ No newline at end of file +-endif. diff --git a/src/leveled_filter.erl b/src/leveled_filter.erl index 0b38d3b3..35884849 100644 --- a/src/leveled_filter.erl +++ b/src/leveled_filter.erl @@ -1,6 +1,6 @@ %% -------- Filter Functions --------- %% -%% Support for different filter expressions within leveled +%% Support for different filter expressions within leveled %% -module(leveled_filter). @@ -11,14 +11,16 @@ generate_filter_expression/2, apply_filter/2, substitute_items/3 - ]). + ] +). %%%============================================================================ %%% External API %%%============================================================================ -spec generate_filter_function( - string(), map()) -> fun((map()) -> boolean())|{error, term()}. + string(), map() +) -> fun((map()) -> boolean()) | {error, term()}. generate_filter_function(FilterString, Substitutions) -> try {ok, ParsedFilter} = @@ -33,7 +35,6 @@ generate_filter_function(FilterString, Substitutions) -> {error, Error} end. - %%%============================================================================ %%% Internal functions %%%============================================================================ @@ -56,11 +57,14 @@ apply_filter({'BETWEEN', {identifier, _, ID}, CmpA, CmpB}, AttrMap) -> false end; apply_filter( - {'BETWEEN', {Type, _, V0}, {Type, _, VL}, {Type, _, VH}}, _) - when VL =< VH -> + {'BETWEEN', {Type, _, V0}, {Type, _, VL}, {Type, _, VH}}, _ +) when + VL =< VH +-> V0 >= VL andalso V0 =< VH; apply_filter( - {'BETWEEN', {integer, TL0, I0}, {identifier, _, ID}, CmpB}, AttrMap) -> + {'BETWEEN', {integer, TL0, I0}, {identifier, _, ID}, CmpB}, AttrMap +) -> case maps:get(ID, AttrMap, notfound) of V when is_integer(V) -> apply_filter( @@ -71,41 +75,39 @@ apply_filter( false end; apply_filter( - {'BETWEEN', - {integer, TL0, I0}, {integer, TLL, IL}, {identifier, _, ID} - }, - AttrMap) -> + {'BETWEEN', {integer, TL0, I0}, {integer, TLL, IL}, {identifier, _, ID}}, + AttrMap +) -> case maps:get(ID, AttrMap, notfound) of V when is_integer(V) -> apply_filter( - {'BETWEEN', - {integer, TL0, I0}, {integer, TLL, IL}, {integer, 0, V} - }, + {'BETWEEN', {integer, TL0, I0}, {integer, TLL, IL}, + {integer, 0, V}}, AttrMap ); _ -> false end; apply_filter( - {'BETWEEN', {string, TL0, S0}, {identifier, _, ID}, CmpB}, AttrMap) -> + {'BETWEEN', {string, TL0, S0}, {identifier, _, ID}, CmpB}, AttrMap +) -> case maps:get(ID, AttrMap, notfound) of V when is_binary(V) -> apply_filter( - {'BETWEEN', {string, TL0, S0}, {string, 0, V}, CmpB}, AttrMap); + {'BETWEEN', {string, TL0, S0}, {string, 0, V}, CmpB}, AttrMap + ); _ -> false end; apply_filter( - {'BETWEEN', - {string, TL0, S0}, {string, TLL, SL}, {identifier, _, ID} - }, - AttrMap) -> + {'BETWEEN', {string, TL0, S0}, {string, TLL, SL}, {identifier, _, ID}}, + AttrMap +) -> case maps:get(ID, AttrMap, notfound) of V when is_binary(V) -> apply_filter( - {'BETWEEN', - {string, TL0, S0}, {string, TLL, SL}, {string, 0, V} - }, + {'BETWEEN', {string, TL0, S0}, {string, TLL, SL}, + {string, 0, V}}, AttrMap ); _ -> @@ -121,15 +123,17 @@ apply_filter({'IN', {string, _, TestString}, {identifier, _, ID}}, AttrMap) -> false end; apply_filter( - {'IN', {identifier, _, ID}, CheckList}, AttrMap) - when is_list(CheckList) -> + {'IN', {identifier, _, ID}, CheckList}, AttrMap +) when + is_list(CheckList) +-> case maps:get(ID, AttrMap, notfound) of notfound -> false; V -> lists:member(V, lists:map(fun(C) -> element(3, C) end, CheckList)) end; -apply_filter({{comparator, Cmp, TLC}, {identifier, _ , ID}, CmpB}, AttrMap) -> +apply_filter({{comparator, Cmp, TLC}, {identifier, _, ID}, CmpB}, AttrMap) -> case maps:get(ID, AttrMap, notfound) of notfound -> false; @@ -159,7 +163,7 @@ apply_filter({{comparator, Cmp, _}, {Type, _, TL}, {Type, _, TR}}, _AttrMap) -> compare(Cmp, TL, TR); apply_filter({{comparator, _, _}, _, _}, _AttrMap) -> false; -apply_filter({contains, {identifier, _, ID}, {string, _ , SubStr}}, AttrMap) -> +apply_filter({contains, {identifier, _, ID}, {string, _, SubStr}}, AttrMap) -> case maps:get(ID, AttrMap, notfound) of V when is_binary(V) -> case string:find(V, SubStr) of @@ -172,7 +176,8 @@ apply_filter({contains, {identifier, _, ID}, {string, _ , SubStr}}, AttrMap) -> false end; apply_filter( - {begins_with, {identifier, _, ID}, {string, _ , SubStr}}, AttrMap) -> + {begins_with, {identifier, _, ID}, {string, _, SubStr}}, AttrMap +) -> case maps:get(ID, AttrMap, notfound) of V when is_binary(V) -> case string:prefix(V, SubStr) of @@ -185,18 +190,19 @@ apply_filter( false end; apply_filter( - {ends_with, {identifier, _, ID}, {string, _ , SubStr}}, AttrMap) -> -case maps:get(ID, AttrMap, notfound) of - V when is_binary(V) -> - case string:prefix(string:reverse(V), string:reverse(SubStr)) of - nomatch -> - false; - _ -> - true - end; - _ -> - false -end; + {ends_with, {identifier, _, ID}, {string, _, SubStr}}, AttrMap +) -> + case maps:get(ID, AttrMap, notfound) of + V when is_binary(V) -> + case string:prefix(string:reverse(V), string:reverse(SubStr)) of + nomatch -> + false; + _ -> + true + end; + _ -> + false + end; apply_filter({attribute_exists, {identifier, _, ID}}, AttrMap) -> maps:is_key(ID, AttrMap); apply_filter({attribute_not_exists, {identifier, _, ID}}, AttrMap) -> @@ -221,24 +227,27 @@ generate_filter_expression(FilterString, Substitutions) -> substitute_items([], _Subs, UpdTokens) -> lists:reverse(UpdTokens); -substitute_items([{substitution, LN, ID}|Rest], Subs, UpdTokens) -> +substitute_items([{substitution, LN, ID} | Rest], Subs, UpdTokens) -> case maps:get(ID, Subs, notfound) of notfound -> {error, lists:flatten( - io_lib:format("Substitution ~p not found", [ID]))}; + io_lib:format("Substitution ~p not found", [ID]) + )}; Value when is_binary(Value) -> substitute_items( - Rest, Subs, [{string, LN, Value}|UpdTokens]); + Rest, Subs, [{string, LN, Value} | UpdTokens] + ); Value when is_integer(Value) -> - substitute_items(Rest, Subs, [{integer, LN, Value}|UpdTokens]); + substitute_items(Rest, Subs, [{integer, LN, Value} | UpdTokens]); _UnexpectedValue -> {error, lists:flatten( - io_lib:format("Substitution ~p unexpected type", [ID]))} + io_lib:format("Substitution ~p unexpected type", [ID]) + )} end; -substitute_items([Token|Rest], Subs, UpdTokens) -> - substitute_items(Rest, Subs, [Token|UpdTokens]). +substitute_items([Token | Rest], Subs, UpdTokens) -> + substitute_items(Rest, Subs, [Token | UpdTokens]). compare('>', V, CmpA) -> V > CmpA; compare('>=', V, CmpA) -> V >= CmpA; @@ -247,7 +256,6 @@ compare('<=', V, CmpA) -> V =< CmpA; compare('=', V, CmpA) -> V == CmpA; compare('<>', V, CmpA) -> V =/= CmpA. - %%%============================================================================ %%% Test %%%============================================================================ @@ -257,10 +265,14 @@ compare('<>', V, CmpA) -> V =/= CmpA. -include_lib("eunit/include/eunit.hrl"). parse_error_test() -> - FE1 = "($a BETWEN \"A\" AND \"A12\") OR (($b >= \"30\") AND contains($c, :d))", - FE2 = "($a BETWEEN \"A\" AND \"A12\") ANDOR (($b >= \"30\") AND contains($c, :d))", - FE3 = "($a BETWEEN \"A\" AND \"A12\") OR (($b >= \"30\") AND contains($c, :d)))", - FE4 = "($a BETWEEN \"A\" AND \"A12\") OR (($b >= \"30\") AND contains($c, :d))", + FE1 = + "($a BETWEN \"A\" AND \"A12\") OR (($b >= \"30\") AND contains($c, :d))", + FE2 = + "($a BETWEEN \"A\" AND \"A12\") ANDOR (($b >= \"30\") AND contains($c, :d))", + FE3 = + "($a BETWEEN \"A\" AND \"A12\") OR (($b >= \"30\") AND contains($c, :d)))", + FE4 = + "($a BETWEEN \"A\" AND \"A12\") OR (($b >= \"30\") AND contains($c, :d))", SubsMissing = maps:from_list([{<<"a">>, <<"MA">>}]), SubsWrongType = maps:from_list([{<<"d">>, "42"}]), SubsCorrect = maps:from_list([{<<"d">>, <<"MA">>}]), @@ -286,7 +298,8 @@ parse_error_test() -> ). invalid_filterexpression_test() -> - FE1 = "($a BETWEEN \"A\" AND \"A12\") OR (($b >= \"30\") AND contains($c, :d))", + FE1 = + "($a BETWEEN \"A\" AND \"A12\") OR (($b >= \"30\") AND contains($c, :d))", SubsMissing = maps:from_list([{<<"a">>, <<"MA">>}]), ?assertMatch( {error, "Substitution <<\"d\">> not found"}, @@ -300,45 +313,48 @@ invalid_filterexpression_test() -> SubsPresent = maps:from_list([{<<"d">>, <<"MA">>}]), FE2 = "($a IN (\"A\", 12)) OR (($b >= \"30\") AND contains($c, :d))", ?assertMatch( - {error, {1, leveled_filterparser,["syntax error before: ","12"]}}, + {error, {1, leveled_filterparser, ["syntax error before: ", "12"]}}, generate_filter_expression(FE2, SubsPresent) ), SubsWrongTypeForContains = maps:from_list([{<<"d">>, 42}]), FE4 = "($a BETWEEN 12 AND 12) OR (($b >= \"30\") AND contains($c, :d))", ?assertMatch( - {error, {1, leveled_filterparser, ["syntax error before: ","42"]}}, + {error, {1, leveled_filterparser, ["syntax error before: ", "42"]}}, generate_filter_expression(FE4, SubsWrongTypeForContains) ). filterexpression_test() -> - FE1 = "($a BETWEEN \"A\" AND \"A12\") AND (($b >= 30) AND contains($c, :d))", + FE1 = + "($a BETWEEN \"A\" AND \"A12\") AND (($b >= 30) AND contains($c, :d))", SubsPresent = maps:from_list([{<<"d">>, <<"MA">>}]), {ok, Filter1} = generate_filter_expression(FE1, SubsPresent), M1 = #{<<"a">> => <<"A11">>, <<"b">> => 100, <<"c">> => <<"CARTMAN">>}, ?assert(apply_filter(Filter1, M1)), - % ok - + % ok + M2 = #{<<"a">> => <<"A11">>, <<"b">> => 10, <<"c">> => <<"CARTMAN">>}, ?assertNot(apply_filter(Filter1, M2)), - % $b < 30 - + % $b < 30 + FE2 = "($a BETWEEN \"A\" AND \"A12\") AND (($b >= 30) OR contains($c, :d))", {ok, Filter2} = generate_filter_expression(FE2, SubsPresent), ?assert(apply_filter(Filter2, M2)), - % OR used so ($b >= 30) = false is ok - + % OR used so ($b >= 30) = false is ok + FE3 = "($a BETWEEN \"A12\" AND \"A\") AND (($b >= 30) OR contains($c, :d))", {ok, Filter3} = generate_filter_expression(FE3, SubsPresent), ?assertNot(apply_filter(Filter3, M2)), - % swapping the low/high - not ok - between explicitly requires low/high - - M3 = #{<<"a">> => <<"A11">>, <<"b">> => <<"100">>, <<"c">> => <<"CARTMAN">>}, + % swapping the low/high - not ok - between explicitly requires low/high + + M3 = #{ + <<"a">> => <<"A11">>, <<"b">> => <<"100">>, <<"c">> => <<"CARTMAN">> + }, ?assertNot(apply_filter(Filter1, M3)), - % substitution b is not an integer + % substitution b is not an integer M3A = #{<<"a">> => 11, <<"b">> => 100, <<"c">> => <<"CARTMAN">>}, ?assertNot(apply_filter(Filter1, M3A)), - % substitution a is an integer - + % substitution a is an integer + FE4 = "($dob BETWEEN \"19700101\" AND \"19791231\") " "AND (contains($gns, \"#Willow\") AND contains($pcs, \"#LS\"))", @@ -354,15 +370,25 @@ filterexpression_test() -> FE5 = "($dob >= \"19740301\" AND $dob <= \"19761030\")" " OR ($dod > \"20200101\" AND $dod < \"20230101\")", - + {ok, Filter5} = generate_filter_expression(FE5, maps:new()), F = fun(M) -> apply_filter(Filter5, M) end, - M5 = maps:from_list([{<<"dob">>, <<"19750202">>}, {<<"dod">>, <<"20221216">>}]), - M6 = maps:from_list([{<<"dob">>, <<"19750202">>}, {<<"dod">>, <<"20191216">>}]), - M7 = maps:from_list([{<<"dob">>, <<"19790202">>}, {<<"dod">>, <<"20221216">>}]), - M8 = maps:from_list([{<<"dob">>, <<"19790202">>}, {<<"dod">>, <<"20191216">>}]), - M9 = maps:from_list([{<<"dob">>, <<"19790202">>}, {<<"dod">>, <<"20241216">>}]), + M5 = maps:from_list([ + {<<"dob">>, <<"19750202">>}, {<<"dod">>, <<"20221216">>} + ]), + M6 = maps:from_list([ + {<<"dob">>, <<"19750202">>}, {<<"dod">>, <<"20191216">>} + ]), + M7 = maps:from_list([ + {<<"dob">>, <<"19790202">>}, {<<"dod">>, <<"20221216">>} + ]), + M8 = maps:from_list([ + {<<"dob">>, <<"19790202">>}, {<<"dod">>, <<"20191216">>} + ]), + M9 = maps:from_list([ + {<<"dob">>, <<"19790202">>}, {<<"dod">>, <<"20241216">>} + ]), M10 = maps:new(), ?assertMatch(true, F(M5)), ?assertMatch(true, F(M6)), @@ -397,7 +423,8 @@ filterexpression_test() -> {ok, Filter7} = generate_filter_expression(FE7, maps:new()), ?assert(apply_filter(Filter7, M11)), - FE8 = "(contains($gn, \"MA\") OR $fn BETWEEN \"SM\" AND \"SN\")" + FE8 = + "(contains($gn, \"MA\") OR $fn BETWEEN \"SM\" AND \"SN\")" " OR $dob IN (\"19910301\", \"19910103\")", {ok, Filter8} = generate_filter_expression(FE8, maps:new()), ?assert(apply_filter(Filter8, #{<<"dob">> => <<"19910301">>})), @@ -405,121 +432,169 @@ filterexpression_test() -> ?assertNot(apply_filter(Filter8, #{<<"dob">> => <<"19910102">>})), ?assertNot(apply_filter(Filter8, #{<<"gn">> => <<"Nikki">>})), - FE9 = "(contains($gn, \"MA\") OR $fn BETWEEN \"SM\" AND \"SN\")" + FE9 = + "(contains($gn, \"MA\") OR $fn BETWEEN \"SM\" AND \"SN\")" " OR $dob IN (\"19910301\", \"19910103\")", - % Only match with a type match + % Only match with a type match {ok, Filter9} = generate_filter_expression(FE9, maps:new()), ?assert(apply_filter(Filter9, #{<<"dob">> => <<"19910301">>})), ?assert(apply_filter(Filter9, #{<<"dob">> => <<"19910103">>})), ?assertNot(apply_filter(Filter9, #{<<"dob">> => <<"19910401">>})), ?assertNot(apply_filter(Filter9, #{<<"dob">> => <<"19910104">>})), - FE10 = "NOT contains($gn, \"MA\") AND " - "(NOT $dob IN (\"19910301\", \"19910103\"))", + FE10 = + "NOT contains($gn, \"MA\") AND " + "(NOT $dob IN (\"19910301\", \"19910103\"))", {ok, Filter10} = generate_filter_expression(FE10, maps:new()), ?assert( apply_filter( Filter10, - #{<<"gn">> => <<"JAMES">>, <<"dob">> => <<"19910201">>})), + #{<<"gn">> => <<"JAMES">>, <<"dob">> => <<"19910201">>} + ) + ), ?assertNot( apply_filter( Filter10, - #{<<"gn">> => <<"EMMA">>, <<"dob">> => <<"19910201">>})), + #{<<"gn">> => <<"EMMA">>, <<"dob">> => <<"19910201">>} + ) + ), ?assertNot( apply_filter( Filter10, - #{<<"gn">> => <<"JAMES">>, <<"dob">> => <<"19910301">>})), - - FE11 = "NOT contains($gn, \"MA\") AND " + #{<<"gn">> => <<"JAMES">>, <<"dob">> => <<"19910301">>} + ) + ), + + FE11 = + "NOT contains($gn, \"MA\") AND " "NOT $dob IN (\"19910301\", \"19910103\")", {ok, Filter11} = generate_filter_expression(FE11, maps:new()), ?assert( apply_filter( Filter11, - #{<<"gn">> => <<"JAMES">>, <<"dob">> => <<"19910201">>})), + #{<<"gn">> => <<"JAMES">>, <<"dob">> => <<"19910201">>} + ) + ), ?assertNot( apply_filter( Filter11, - #{<<"gn">> => <<"EMMA">>, <<"dob">> => <<"19910201">>})), + #{<<"gn">> => <<"EMMA">>, <<"dob">> => <<"19910201">>} + ) + ), ?assertNot( apply_filter( Filter11, - #{<<"gn">> => <<"JAMES">>, <<"dob">> => <<"19910301">>})), - + #{<<"gn">> => <<"JAMES">>, <<"dob">> => <<"19910301">>} + ) + ), + FE12 = "begins_with($gn, \"MA\") AND begins_with($fn, :fn)", {ok, Filter12} = generate_filter_expression(FE12, #{<<"fn">> => <<"SU">>}), ?assert( apply_filter( Filter12, - #{<<"gn">> => <<"MATTY">>, <<"fn">> => <<"SUMMER">>})), + #{<<"gn">> => <<"MATTY">>, <<"fn">> => <<"SUMMER">>} + ) + ), ?assertNot( apply_filter( Filter12, - #{<<"gn">> => <<"MITTY">>, <<"fn">> => <<"SUMMER">>})), + #{<<"gn">> => <<"MITTY">>, <<"fn">> => <<"SUMMER">>} + ) + ), ?assertNot( apply_filter( Filter12, - #{<<"gn">> => <<"MATTY">>, <<"fn">> => <<"SIMMS">>})), + #{<<"gn">> => <<"MATTY">>, <<"fn">> => <<"SIMMS">>} + ) + ), ?assertNot( apply_filter( Filter12, - #{<<"gn">> => 42, <<"fn">> => <<"SUMMER">>})), - + #{<<"gn">> => 42, <<"fn">> => <<"SUMMER">>} + ) + ), + FE12E = "ends_with($gn, \"TY\") AND begins_with($fn, :fn)", {ok, Filter12E} = generate_filter_expression(FE12E, #{<<"fn">> => <<"SU">>}), ?assert( apply_filter( Filter12E, - #{<<"gn">> => <<"MATTY">>, <<"fn">> => <<"SUMMER">>})), + #{<<"gn">> => <<"MATTY">>, <<"fn">> => <<"SUMMER">>} + ) + ), ?assertNot( apply_filter( Filter12E, - #{<<"gn">> => <<"MATTI">>, <<"fn">> => <<"SUMMER">>})), + #{<<"gn">> => <<"MATTI">>, <<"fn">> => <<"SUMMER">>} + ) + ), ?assertNot( apply_filter( Filter12E, - #{<<"gn">> => <<"MATTY">>, <<"fn">> => <<"SIMMS">>})), + #{<<"gn">> => <<"MATTY">>, <<"fn">> => <<"SIMMS">>} + ) + ), ?assertNot( apply_filter( Filter12E, - #{<<"gn">> => 42, <<"fn">> => <<"SUMMER">>})), + #{<<"gn">> => 42, <<"fn">> => <<"SUMMER">>} + ) + ), - FE13 = "attribute_exists($dob) AND attribute_not_exists($consent) " - "AND attribute_empty($dod)", + FE13 = + "attribute_exists($dob) AND attribute_not_exists($consent) " + "AND attribute_empty($dod)", {ok, Filter13} = generate_filter_expression(FE13, maps:new()), ?assert( apply_filter( Filter13, - #{<<"dob">> => <<"19440812">>, <<"dod">> => <<>>})), + #{<<"dob">> => <<"19440812">>, <<"dod">> => <<>>} + ) + ), ?assertNot( apply_filter( Filter13, - #{<<"dod">> => <<>>})), + #{<<"dod">> => <<>>} + ) + ), ?assertNot( apply_filter( Filter13, - #{<<"dob">> => <<"19440812">>, + #{ + <<"dob">> => <<"19440812">>, <<"consent">> => <<>>, - <<"dod">> => <<>>})), + <<"dod">> => <<>> + } + ) + ), ?assertNot( apply_filter( Filter13, - #{<<"dob">> => <<"19440812">>, <<"dod">> => <<"20240213">>})), + #{<<"dob">> => <<"19440812">>, <<"dod">> => <<"20240213">>} + ) + ), FE14 = "\"M1\" IN $gns", {ok, Filter14} = generate_filter_expression(FE14, maps:new()), ?assert( apply_filter( Filter14, - #{<<"gns">> => [<<"MA">>, <<"M1">>, <<"A0">>]})), + #{<<"gns">> => [<<"MA">>, <<"M1">>, <<"A0">>]} + ) + ), ?assertNot( apply_filter( Filter14, - #{<<"gns">> => [<<"MA">>, <<"M2">>, <<"A0">>]})), + #{<<"gns">> => [<<"MA">>, <<"M2">>, <<"A0">>]} + ) + ), ?assertNot( apply_filter( Filter14, - #{<<"gns">> => <<"M1">>})), + #{<<"gns">> => <<"M1">>} + ) + ), FE15 = "(attribute_empty($dod) AND $dob < :date)" @@ -703,8 +778,6 @@ filterexpression_test() -> Filter19, #{<<"fn">> => <<"Aberg"/utf8>>} ) - ) - - . + ). -endif. diff --git a/src/leveled_head.erl b/src/leveled_head.erl index 5e158ad9..177882ad 100644 --- a/src/leveled_head.erl +++ b/src/leveled_head.erl @@ -18,18 +18,20 @@ -include("leveled.hrl"). --export([key_to_canonicalbinary/1, - build_head/2, - extract_metadata/3, - diff_indexspecs/3 - ]). - --export([get_size/2, - get_hash/2, - defined_objecttags/0, - default_reload_strategy/1, - standard_hash/1 - ]). +-export([ + key_to_canonicalbinary/1, + build_head/2, + extract_metadata/3, + diff_indexspecs/3 +]). + +-export([ + get_size/2, + get_hash/2, + defined_objecttags/0, + default_reload_strategy/1, + standard_hash/1 +]). %% Exported for testing purposes -export( @@ -37,74 +39,83 @@ riak_metadata_to_binary/2, riak_extract_metadata/2, get_indexes_from_siblingmetabin/2 - ]). + ] +). - --define(MAGIC, 53). % riak_kv -> riak_object +% riak_kv -> riak_object +-define(MAGIC, 53). -define(V1_VERS, 1). --type object_tag() :: ?STD_TAG|?RIAK_TAG. - % tags assigned to objects - % (not other special entities such as ?HEAD or ?IDX) +-type object_tag() :: ?STD_TAG | ?RIAK_TAG. +% tags assigned to objects +% (not other special entities such as ?HEAD or ?IDX) -type headonly_tag() :: ?HEAD_TAG. - % Tag assigned to head_only objects. Behaviour cannot be changed +% Tag assigned to head_only objects. Behaviour cannot be changed -type riak_metadata() :: - { - binary()|delete, - % Sibling Metadata - binary()|null, - % Vclock Metadata - non_neg_integer()|null, - % Hash of vclock - non-exportable - non_neg_integer() - % Size in bytes of real object - }. + { + binary() | delete, + % Sibling Metadata + binary() | null, + % Vclock Metadata + non_neg_integer() | null, + % Hash of vclock - non-exportable + non_neg_integer() + % Size in bytes of real object + }. -type std_metadata() :: { - non_neg_integer()|null, - % Hash of value - non_neg_integer(), - % Size in bytes of real object - list(tuple())|undefined - % User-define metadata + non_neg_integer() | null, + % Hash of value + non_neg_integer(), + % Size in bytes of real object + list(tuple()) | undefined + % User-define metadata }. - % std_metadata() must be outputted as the metadata format by any - % app-defined function +% std_metadata() must be outputted as the metadata format by any +% app-defined function -type head_metadata() :: leveled_codec:head_value(). --type object_metadata() :: riak_metadata()|std_metadata(). +-type object_metadata() :: riak_metadata() | std_metadata(). -type appdefinable_function() :: key_to_canonicalbinary | build_head | extract_metadata | diff_indexspecs. - % Functions for which default behaviour can be over-written for the - % application's own tags +% Functions for which default behaviour can be over-written for the +% application's own tags -type appdefinable_keyfun() :: fun((tuple()) -> binary()). -type appdefinable_headfun() :: fun((object_tag(), std_metadata()) -> head()). +%% erlfmt:ignore - issues with editors when function definitions are split -type appdefinable_metadatafun() :: - fun((leveled_codec:tag(), non_neg_integer(), binary()|delete) -> - {std_metadata(), list(erlang:timestamp())}). + fun((leveled_codec:tag(), non_neg_integer(), binary() | delete) + -> {std_metadata(), list(erlang:timestamp())} + ). +%% erlfmt:ignore - issues with editors when function definitions are split -type appdefinable_indexspecsfun() :: - fun((object_tag(), std_metadata(), std_metadata()|not_present) -> - leveled_codec:index_specs()). + fun((object_tag(), std_metadata(), std_metadata() | not_present) + -> leveled_codec:index_specs() + ). -type appdefinable_function_fun() :: - appdefinable_keyfun() | appdefinable_headfun() | - appdefinable_metadatafun() | appdefinable_indexspecsfun(). + appdefinable_keyfun() + | appdefinable_headfun() + | appdefinable_metadatafun() + | appdefinable_indexspecsfun(). -type appdefinable_function_tuple() :: {appdefinable_function(), appdefinable_function_fun()}. -type index_op() :: add | remove. -type index_value() :: integer() | binary(). --type head() :: binary()|tuple()|head_metadata(). +-type head() :: binary() | tuple() | head_metadata(). --export_type([object_tag/0, - headonly_tag/0, - head/0, - object_metadata/0, - appdefinable_function_tuple/0]). +-export_type([ + object_tag/0, + headonly_tag/0, + head/0, + object_metadata/0, + appdefinable_function_tuple/0 +]). %%%============================================================================ %%% Mutable External Functions @@ -115,39 +126,41 @@ %% Convert a key to a binary in a consistent way for the tag. The binary will %% then be used to create the hash key_to_canonicalbinary( - {?RIAK_TAG, Bucket, Key, null}) - when is_binary(Bucket), is_binary(Key) -> + {?RIAK_TAG, Bucket, Key, null} +) when + is_binary(Bucket), is_binary(Key) +-> <>; key_to_canonicalbinary( - {?RIAK_TAG, {BucketType, Bucket}, Key, SubKey}) - when is_binary(BucketType), is_binary(Bucket) -> - key_to_canonicalbinary({?RIAK_TAG, - <>, - Key, - SubKey}); + {?RIAK_TAG, {BucketType, Bucket}, Key, SubKey} +) when + is_binary(BucketType), is_binary(Bucket) +-> + key_to_canonicalbinary( + {?RIAK_TAG, <>, Key, SubKey} + ); key_to_canonicalbinary(Key) when element(1, Key) == ?STD_TAG -> default_key_to_canonicalbinary(Key); key_to_canonicalbinary(Key) -> OverrideFun = get_appdefined_function( - key_to_canonicalbinary, + key_to_canonicalbinary, fun default_key_to_canonicalbinary/1, 1 ), OverrideFun(Key). - + default_key_to_canonicalbinary(Key) -> leveled_util:t2b(Key). - -spec build_head (object_tag(), object_metadata()) -> head(); - (headonly_tag(), head_metadata()) -> head() . + (headonly_tag(), head_metadata()) -> head(). %% @doc %% Return the object metadata as a binary to be the "head" of the object build_head(?HEAD_TAG, Value) -> % Metadata is not extracted with head objects, the head response is - % just the unfiltered value that was input. + % just the unfiltered value that was input. default_build_head(?HEAD_TAG, Value); build_head(?RIAK_TAG, Metadata) -> {SibData, Vclock, _Hash, _Size} = Metadata, @@ -156,17 +169,18 @@ build_head(?STD_TAG, Metadata) -> default_build_head(?STD_TAG, Metadata); build_head(Tag, Metadata) -> OverrideFun = - get_appdefined_function(build_head, - fun default_build_head/2, - 2), + get_appdefined_function( + build_head, + fun default_build_head/2, + 2 + ), OverrideFun(Tag, Metadata). default_build_head(_Tag, Metadata) -> Metadata. - --spec extract_metadata(object_tag(), non_neg_integer(), binary()) - -> {object_metadata(), list(erlang:timestamp())}. +-spec extract_metadata(object_tag(), non_neg_integer(), binary()) -> + {object_metadata(), list(erlang:timestamp())}. %% @doc %% Take the inbound object and extract from it the metadata to be stored within %% the ledger (and ultimately returned from a leveled_boookie:book_head/4 @@ -187,19 +201,22 @@ extract_metadata(?STD_TAG, SizeAsStoredInJournal, Obj) -> default_extract_metadata(?STD_TAG, SizeAsStoredInJournal, Obj); extract_metadata(Tag, SizeAsStoredInJournal, Obj) -> OverrideFun = - get_appdefined_function(extract_metadata, - fun default_extract_metadata/3, - 3), + get_appdefined_function( + extract_metadata, + fun default_extract_metadata/3, + 3 + ), OverrideFun(Tag, SizeAsStoredInJournal, Obj). default_extract_metadata(_Tag, SizeAsStoredInJournal, Obj) -> {{standard_hash(Obj), SizeAsStoredInJournal, undefined}, []}. - --spec diff_indexspecs(object_tag(), - object_metadata(), - object_metadata()|not_present) - -> leveled_codec:index_specs(). +-spec diff_indexspecs( + object_tag(), + object_metadata(), + object_metadata() | not_present +) -> + leveled_codec:index_specs(). %% @doc %% Take an object metadata part from within the journal, and an object metadata %% part from the ledger (which should have a lower SQN), and generate index @@ -223,9 +240,11 @@ diff_indexspecs(?STD_TAG, UpdatedMetadata, CurrentMetadata) -> default_diff_indexspecs(?STD_TAG, UpdatedMetadata, CurrentMetadata); diff_indexspecs(Tag, UpdatedMetadata, CurrentMetadata) -> OverrideFun = - get_appdefined_function(diff_indexspecs, - fun default_diff_indexspecs/3, - 3), + get_appdefined_function( + diff_indexspecs, + fun default_diff_indexspecs/3, + 3 + ), OverrideFun(Tag, UpdatedMetadata, CurrentMetadata). default_diff_indexspecs(_Tag, _UpdatedMetadata, _CurrentMetadata) -> @@ -241,28 +260,29 @@ default_diff_indexspecs(_Tag, _UpdatedMetadata, _CurrentMetadata) -> defined_objecttags() -> [?STD_TAG, ?RIAK_TAG]. - -spec default_reload_strategy( - object_tag()) -> {object_tag(), leveled_codec:compaction_method()}. + object_tag() +) -> {object_tag(), leveled_codec:compaction_method()}. %% @doc %% State the compaction_method to be used when reloading the Ledger from the -%% journal for each object tag. Note, no compaction strategy required for +%% journal for each object tag. Note, no compaction strategy required for %% head_only tag default_reload_strategy(Tag) -> {Tag, retain}. -spec get_size( - object_tag(), object_metadata()) -> non_neg_integer(). + object_tag(), object_metadata() +) -> non_neg_integer(). %% @doc %% Fetch the size from the metadata get_size(?RIAK_TAG, {_, _, _, Size}) -> Size; -get_size(Tag, {_, Size, _}) when Tag =/= ?HEAD_TAG-> +get_size(Tag, {_, Size, _}) when Tag =/= ?HEAD_TAG -> Size. - -spec get_hash( - object_tag(), object_metadata()) -> non_neg_integer()|null. + object_tag(), object_metadata() +) -> non_neg_integer() | null. %% @doc %% Fetch the hash from the metadata get_hash(?RIAK_TAG, {_, _, Hash, _}) -> @@ -276,7 +296,6 @@ get_hash(Tag, {Hash, _, _}) when Tag =/= ?HEAD_TAG -> standard_hash(Obj) -> erlang:phash2(term_to_binary(Obj)). - %%%============================================================================ %%% Handling Override Functions %%%============================================================================ @@ -291,7 +310,7 @@ standard_hash(Obj) -> (diff_indexspecs, appdefinable_indexspecsfun(), 3) -> appdefinable_indexspecsfun(). %% @doc -%% If a keylist of [{function_name, fun()}] has been set as an environment +%% If a keylist of [{function_name, fun()}] has been set as an environment %% variable for a tag, then this FunctionName can be used instead of the %% default get_appdefined_function(FunctionName, DefaultFun, RequiredArity) -> @@ -306,11 +325,11 @@ get_appdefined_function(FunctionName, DefaultFun, RequiredArity) -> %%% Tag-specific Internal Functions %%%============================================================================ - -spec riak_extract_metadata( - binary()|delete, non_neg_integer()) -> {riak_metadata(), list()}. + binary() | delete, non_neg_integer() +) -> {riak_metadata(), list()}. %% @doc -%% Riak extract metadata should extract a metadata object which is a +%% Riak extract metadata should extract a metadata object which is a %% five-tuple of: %% - Binary of sibling Metadata %% - Binary of vector clock metadata @@ -318,99 +337,104 @@ get_appdefined_function(FunctionName, DefaultFun, RequiredArity) -> %% - The largest last modified date of the object %% - Size of the object %% -%% The metadata object should be returned with the full list of last +%% The metadata object should be returned with the full list of last %% modified dates (which will be used for recent anti-entropy index creation) riak_extract_metadata(delete, Size) -> {{delete, null, null, Size}, []}; riak_extract_metadata(ObjBin, Size) -> {VclockBin, SibBin, LastMods} = riak_metadata_from_binary(ObjBin), - {{binary:copy(SibBin), - binary:copy(VclockBin), - erlang:phash2(lists:sort(binary_to_term(VclockBin))), - Size}, - LastMods}. + { + { + binary:copy(SibBin), + binary:copy(VclockBin), + erlang:phash2(lists:sort(binary_to_term(VclockBin))), + Size + }, + LastMods + }. %% <>. riak_metadata_to_binary(VclockBin, SibMetaBin) -> VclockLen = byte_size(VclockBin), - <>. - -riak_metadata_from_binary(V1Binary) -> <> = V1Binary, + VclockBin/binary, SibMetaBin/binary>>. + +riak_metadata_from_binary(V1Binary) -> + <> = + V1Binary, <> = Rest, {SibMetaBin, LastMods} = case SibCount of SC when is_integer(SC) -> - get_metadata_from_siblings(SibsBin, - SibCount, - <>, - []) + get_metadata_from_siblings( + SibsBin, + SibCount, + <>, + [] + ) end, {VclockBin, SibMetaBin, LastMods}. get_metadata_from_siblings(<<>>, 0, SibMetaBin, LastMods) -> {SibMetaBin, LastMods}; -get_metadata_from_siblings(<>, - SibCount, - SibMetaBin, - LastMods) -> +get_metadata_from_siblings( + <>, + SibCount, + SibMetaBin, + LastMods +) -> <<_ValBin:ValLen/binary, MetaLen:32/integer, Rest1/binary>> = Rest0, <> = Rest1, LastMod = - case MetaBin of - <> -> - {MegaSec, Sec, MicroSec}; + {MegaSec, Sec, MicroSec}; _ -> {0, 0, 0} end, - get_metadata_from_siblings(Rest2, - SibCount - 1, - <>, - [LastMod|LastMods]). - - -get_indexes_from_siblingmetabin(<<0:32/integer, - MetaLen:32/integer, - MetaBin:MetaLen/binary, - RestBin/binary>>, - Indexes) -> + get_metadata_from_siblings( + Rest2, + SibCount - 1, + <>, + [LastMod | LastMods] + ). + +get_indexes_from_siblingmetabin( + <<0:32/integer, MetaLen:32/integer, MetaBin:MetaLen/binary, + RestBin/binary>>, + Indexes +) -> UpdIndexes = lists:umerge(get_indexes_frommetabin(MetaBin), Indexes), get_indexes_from_siblingmetabin(RestBin, UpdIndexes); -get_indexes_from_siblingmetabin(<>, - Indexes) when SibCount > 0 -> +get_indexes_from_siblingmetabin( + <>, + Indexes +) when SibCount > 0 -> get_indexes_from_siblingmetabin(RestBin, Indexes); get_indexes_from_siblingmetabin(_, Indexes) -> Indexes. - %% @doc %% Parse the metabinary for an individual sibling and return a list of index %% entries. -get_indexes_frommetabin(<<_LMD1:32/integer, _LMD2:32/integer, _LMD3:32/integer, - VTagLen:8/integer, _VTag:VTagLen/binary, - Deleted:1/binary-unit:8, - MetaRestBin/binary>>) when Deleted /= <<1>> -> +get_indexes_frommetabin( + <<_LMD1:32/integer, _LMD2:32/integer, _LMD3:32/integer, VTagLen:8/integer, + _VTag:VTagLen/binary, Deleted:1/binary-unit:8, MetaRestBin/binary>> +) when Deleted /= <<1>> -> lists:usort(indexes_of_metabinary(MetaRestBin)); get_indexes_frommetabin(_) -> []. - indexes_of_metabinary(<<>>) -> []; -indexes_of_metabinary(<>) -> +indexes_of_metabinary( + <> +) -> Key = decode_maybe_binary(KeyBin), case Key of <<"index">> -> @@ -420,7 +444,6 @@ indexes_of_metabinary(<>) -> Bin; decode_maybe_binary(<<0, Bin/binary>>) -> @@ -429,14 +452,14 @@ decode_maybe_binary(<<_Other:8, Bin/binary>>) -> Bin. -spec diff_index_data( - [{binary(), index_value()}], [{binary(), index_value()}]) -> - [{index_op(), binary(), index_value()}]. + [{binary(), index_value()}], [{binary(), index_value()}] +) -> + [{index_op(), binary(), index_value()}]. diff_index_data(OldIndexes, AllIndexes) -> OldIndexSet = ordsets:from_list(OldIndexes), AllIndexSet = ordsets:from_list(AllIndexes), diff_specs_core(AllIndexSet, OldIndexSet). - diff_specs_core(AllIndexSet, OldIndexSet) -> NewIndexSet = ordsets:subtract(AllIndexSet, OldIndexSet), RemoveIndexSet = @@ -454,12 +477,12 @@ diff_specs_core(AllIndexSet, OldIndexSet) -> %% form of triplets of the form %% {IndexOperation, IndexField, IndexValue}. -spec assemble_index_specs( - [{binary(), binary()}], index_op()) -> - [{index_op(), binary(), binary()}]. + [{binary(), binary()}], index_op() +) -> + [{index_op(), binary(), binary()}]. assemble_index_specs(Indexes, IndexOp) -> [{IndexOp, Index, Value} || {Index, Value} <- Indexes]. - %%%============================================================================ %%% Test %%%============================================================================ @@ -470,66 +493,92 @@ assemble_index_specs(Indexes, IndexOp) -> index_extract_test() -> SibMetaBin = - <<0,0,0,1,0,0,0,0,0,0,0,221,0,0,6,48,0,4,130,247,0,1,250,134, - 1,101,0,0,0,0,4,1,77,68,75,0,0,0,44,0,131,107,0,39,77,68, - 86,101,49,55,52,55,48,50,55,45,54,50,99,49,45,52,48,57,55, - 45,97,53,102,50,45,53,54,98,51,98,97,57,57,99,55,56,50,0,0, - 0,6,1,105,110,100,101,120,0,0,0,79,0,131,108,0,0,0,2,104,2, - 107,0,8,105,100,120,49,95,98,105,110,107,0,20,50,49,53,50, - 49,49,48,55,50,51,49,55,51,48,83,111,112,104,105,97,104,2, - 107,0,8,105,100,120,49,95,98,105,110,107,0,19,50,49,56,50, - 48,53,49,48,49,51,48,49,52,54,65,118,101,114,121,106,0,0,0, - 5,1,77,68,75,50,0,0,0,44,0,131,107,0,39,77,68,86,101,49,55, - 52,55,48,50,55,45,54,50,99,49,45,52,48,57,55,45,97,53,102, - 50,45,53,54,98,51,98,97,57,57,99,55,56,50>>, + <<0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 221, 0, 0, 6, 48, 0, 4, 130, 247, 0, + 1, 250, 134, 1, 101, 0, 0, 0, 0, 4, 1, 77, 68, 75, 0, 0, 0, 44, 0, + 131, 107, 0, 39, 77, 68, 86, 101, 49, 55, 52, 55, 48, 50, 55, 45, + 54, 50, 99, 49, 45, 52, 48, 57, 55, 45, 97, 53, 102, 50, 45, 53, 54, + 98, 51, 98, 97, 57, 57, 99, 55, 56, 50, 0, 0, 0, 6, 1, 105, 110, + 100, 101, 120, 0, 0, 0, 79, 0, 131, 108, 0, 0, 0, 2, 104, 2, 107, 0, + 8, 105, 100, 120, 49, 95, 98, 105, 110, 107, 0, 20, 50, 49, 53, 50, + 49, 49, 48, 55, 50, 51, 49, 55, 51, 48, 83, 111, 112, 104, 105, 97, + 104, 2, 107, 0, 8, 105, 100, 120, 49, 95, 98, 105, 110, 107, 0, 19, + 50, 49, 56, 50, 48, 53, 49, 48, 49, 51, 48, 49, 52, 54, 65, 118, + 101, 114, 121, 106, 0, 0, 0, 5, 1, 77, 68, 75, 50, 0, 0, 0, 44, 0, + 131, 107, 0, 39, 77, 68, 86, 101, 49, 55, 52, 55, 48, 50, 55, 45, + 54, 50, 99, 49, 45, 52, 48, 57, 55, 45, 97, 53, 102, 50, 45, 53, 54, + 98, 51, 98, 97, 57, 57, 99, 55, 56, 50>>, Indexes = get_indexes_from_siblingmetabin(SibMetaBin, []), - ExpIndexes = [{"idx1_bin","21521107231730Sophia"}, - {"idx1_bin","21820510130146Avery"}], + ExpIndexes = [ + {"idx1_bin", "21521107231730Sophia"}, + {"idx1_bin", "21820510130146Avery"} + ], ?assertMatch(ExpIndexes, Indexes), SibMetaBinNoIdx = - <<0,0,0,1,0,0,0,0,0,0,0,128,0,0,6,48,0,4,130,247,0,1,250,134, - 1,101,0,0,0,0,4,1,77,68,75,0,0,0,44,0,131,107,0,39,77,68, - 86,101,49,55,52,55,48,50,55,45,54,50,99,49,45,52,48,57,55, - 45,97,53,102,50,45,53,54,98,51,98,97,57,57,99,55,56,50,0,0,0, - 5,1,77,68,75,50,0,0,0,44,0,131,107,0,39,77,68,86,101,49,55, - 52,55,48,50,55,45,54,50,99,49,45,52,48,57,55,45,97,53,102, - 50,45,53,54,98,51,98,97,57,57,99,55,56,50>>, + <<0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 6, 48, 0, 4, 130, 247, 0, + 1, 250, 134, 1, 101, 0, 0, 0, 0, 4, 1, 77, 68, 75, 0, 0, 0, 44, 0, + 131, 107, 0, 39, 77, 68, 86, 101, 49, 55, 52, 55, 48, 50, 55, 45, + 54, 50, 99, 49, 45, 52, 48, 57, 55, 45, 97, 53, 102, 50, 45, 53, 54, + 98, 51, 98, 97, 57, 57, 99, 55, 56, 50, 0, 0, 0, 5, 1, 77, 68, 75, + 50, 0, 0, 0, 44, 0, 131, 107, 0, 39, 77, 68, 86, 101, 49, 55, 52, + 55, 48, 50, 55, 45, 54, 50, 99, 49, 45, 52, 48, 57, 55, 45, 97, 53, + 102, 50, 45, 53, 54, 98, 51, 98, 97, 57, 57, 99, 55, 56, 50>>, ?assertMatch([], get_indexes_from_siblingmetabin(SibMetaBinNoIdx, [])), SibMetaBinOverhang = - <<0,0,0,1,0,0,0,0,0,0,0,221,0,0,6,48,0,4,130,247,0,1,250,134, - 1,101,0,0,0,0,4,1,77,68,75,0,0,0,44,0,131,107,0,39,77,68, - 86,101,49,55,52,55,48,50,55,45,54,50,99,49,45,52,48,57,55, - 45,97,53,102,50,45,53,54,98,51,98,97,57,57,99,55,56,50,0,0, - 0,6,1,105,110,100,101,120,0,0,0,79,0,131,108,0,0,0,2,104,2, - 107,0,8,105,100,120,49,95,98,105,110,107,0,20,50,49,53,50, - 49,49,48,55,50,51,49,55,51,48,83,111,112,104,105,97,104,2, - 107,0,8,105,100,120,49,95,98,105,110,107,0,19,50,49,56,50, - 48,53,49,48,49,51,48,49,52,54,65,118,101,114,121,106,0,0,0, - 5,1,77,68,75,50,0,0,0,44,0,131,107,0,39,77,68,86,101,49,55, - 52,55,48,50,55,45,54,50,99,49,45,52,48,57,55,45,97,53,102, - 50,45,53,54,98,51,98,97,57,57,99,55,56,50,0,0,0,0,0,0,0,4, - 0,0,0,0>>, - ?assertMatch(ExpIndexes, - get_indexes_from_siblingmetabin(SibMetaBinOverhang, [])). + <<0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 221, 0, 0, 6, 48, 0, 4, 130, 247, 0, + 1, 250, 134, 1, 101, 0, 0, 0, 0, 4, 1, 77, 68, 75, 0, 0, 0, 44, 0, + 131, 107, 0, 39, 77, 68, 86, 101, 49, 55, 52, 55, 48, 50, 55, 45, + 54, 50, 99, 49, 45, 52, 48, 57, 55, 45, 97, 53, 102, 50, 45, 53, 54, + 98, 51, 98, 97, 57, 57, 99, 55, 56, 50, 0, 0, 0, 6, 1, 105, 110, + 100, 101, 120, 0, 0, 0, 79, 0, 131, 108, 0, 0, 0, 2, 104, 2, 107, 0, + 8, 105, 100, 120, 49, 95, 98, 105, 110, 107, 0, 20, 50, 49, 53, 50, + 49, 49, 48, 55, 50, 51, 49, 55, 51, 48, 83, 111, 112, 104, 105, 97, + 104, 2, 107, 0, 8, 105, 100, 120, 49, 95, 98, 105, 110, 107, 0, 19, + 50, 49, 56, 50, 48, 53, 49, 48, 49, 51, 48, 49, 52, 54, 65, 118, + 101, 114, 121, 106, 0, 0, 0, 5, 1, 77, 68, 75, 50, 0, 0, 0, 44, 0, + 131, 107, 0, 39, 77, 68, 86, 101, 49, 55, 52, 55, 48, 50, 55, 45, + 54, 50, 99, 49, 45, 52, 48, 57, 55, 45, 97, 53, 102, 50, 45, 53, 54, + 98, 51, 98, 97, 57, 57, 99, 55, 56, 50, 0, 0, 0, 0, 0, 0, 0, 4, 0, + 0, 0, 0>>, + ?assertMatch( + ExpIndexes, + get_indexes_from_siblingmetabin(SibMetaBinOverhang, []) + ). diff_index_test() -> UpdIndexes = - [{<<"idx1_bin">>,<<"20840930001702Zoe">>}, - {<<"idx1_bin">>,<<"20931011172606Emily">>}], + [ + {<<"idx1_bin">>, <<"20840930001702Zoe">>}, + {<<"idx1_bin">>, <<"20931011172606Emily">>} + ], OldIndexes = - [{<<"idx1_bin">>,<<"20231126131808Madison">>}, - {<<"idx1_bin">>,<<"20931011172606Emily">>}], + [ + {<<"idx1_bin">>, <<"20231126131808Madison">>}, + {<<"idx1_bin">>, <<"20931011172606Emily">>} + ], IdxSpecs = diff_index_data(OldIndexes, UpdIndexes), - ?assertMatch([{add, <<"idx1_bin">>, <<"20840930001702Zoe">>}, - {remove, <<"idx1_bin">>,<<"20231126131808Madison">>}], IdxSpecs). + ?assertMatch( + [ + {add, <<"idx1_bin">>, <<"20840930001702Zoe">>}, + {remove, <<"idx1_bin">>, <<"20231126131808Madison">>} + ], + IdxSpecs + ). decode_test() -> Bin = <<"999">>, BinTerm = term_to_binary("999"), - ?assertMatch("999", binary_to_list( - decode_maybe_binary(<<1:8/integer, Bin/binary>>))), + ?assertMatch( + "999", + binary_to_list( + decode_maybe_binary(<<1:8/integer, Bin/binary>>) + ) + ), ?assertMatch("999", decode_maybe_binary(<<0:8/integer, BinTerm/binary>>)), - ?assertMatch("999", binary_to_list( - decode_maybe_binary(<<2:8/integer, Bin/binary>>))). - --endif. \ No newline at end of file + ?assertMatch( + "999", + binary_to_list( + decode_maybe_binary(<<2:8/integer, Bin/binary>>) + ) + ). + +-endif. diff --git a/src/leveled_iclerk.erl b/src/leveled_iclerk.erl index a2f9d329..f891531d 100644 --- a/src/leveled_iclerk.erl +++ b/src/leveled_iclerk.erl @@ -25,7 +25,7 @@ %% those deltas will be resolved through external anti-entropy (e.g. read %% repair or AAE) - or alternatively the risk of loss of persisted data from %% the ledger is accepted for this data type -%% +%% %% During the compaction process for the Journal, the file chosen for %% compaction is scanned in SQN order, and a FilterFun is passed (which will %% normally perform a check against a snapshot of the persisted part of the @@ -74,21 +74,25 @@ -include("leveled.hrl"). --export([init/1, - handle_call/3, - handle_cast/2, - handle_info/2, - terminate/2, - code_change/3]). - --export([clerk_new/1, - clerk_compact/6, - clerk_hashtablecalc/3, - clerk_trim/3, - clerk_stop/1, - clerk_loglevel/2, - clerk_addlogs/2, - clerk_removelogs/2]). +-export([ + init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3 +]). + +-export([ + clerk_new/1, + clerk_compact/6, + clerk_hashtablecalc/3, + clerk_trim/3, + clerk_stop/1, + clerk_loglevel/2, + clerk_addlogs/2, + clerk_removelogs/2 +]). -export([schedule_compaction/3]). @@ -101,54 +105,70 @@ -define(SINGLEFILE_COMPACTION_TARGET, 50.0). -define(MAXRUNLENGTH_COMPACTION_TARGET, 75.0). --record(state, {inker :: pid() | undefined, - max_run_length :: integer() | undefined, - cdb_options = #cdb_options{} :: #cdb_options{}, - waste_retention_period :: integer() | undefined, - waste_path :: string() | undefined, - reload_strategy = ?DEFAULT_RELOAD_STRATEGY :: list(), - singlefile_compactionperc = ?SINGLEFILE_COMPACTION_TARGET :: float(), - maxrunlength_compactionperc = ?MAXRUNLENGTH_COMPACTION_TARGET ::float(), - compression_method = native :: lz4|native|none, - scored_files = [] :: list(candidate()), - scoring_state :: scoring_state()|undefined, - score_onein = 1 :: pos_integer()}). - --record(candidate, {low_sqn :: integer(), - filename :: string(), - journal :: pid(), - compaction_perc :: float()}). - --record(scoring_state, {filter_fun :: leveled_inker:filterfun(), - filter_server :: leveled_inker:filterserver(), - max_sqn :: non_neg_integer(), - close_fun :: leveled_inker:filterclosefun(), - start_time :: erlang:timestamp()}). +-record(state, { + inker :: pid() | undefined, + max_run_length :: integer() | undefined, + cdb_options = #cdb_options{} :: #cdb_options{}, + waste_retention_period :: integer() | undefined, + waste_path :: string() | undefined, + reload_strategy = ?DEFAULT_RELOAD_STRATEGY :: list(), + singlefile_compactionperc = ?SINGLEFILE_COMPACTION_TARGET :: float(), + maxrunlength_compactionperc = ?MAXRUNLENGTH_COMPACTION_TARGET :: float(), + compression_method = native :: lz4 | native | none, + scored_files = [] :: list(candidate()), + scoring_state :: scoring_state() | undefined, + score_onein = 1 :: pos_integer() +}). + +-record(candidate, { + low_sqn :: integer(), + filename :: string(), + journal :: pid(), + compaction_perc :: float() +}). + +-record(scoring_state, { + filter_fun :: leveled_inker:filterfun(), + filter_server :: leveled_inker:filterserver(), + max_sqn :: non_neg_integer(), + close_fun :: leveled_inker:filterclosefun(), + start_time :: erlang:timestamp() +}). -type iclerk_options() :: #iclerk_options{}. -type candidate() :: #candidate{}. -type scoring_state() :: #scoring_state{}. -type score_parameters() :: {integer(), float(), float()}. - % Score parameters are a tuple - % - of maximum run length; how long a run of consecutive files can be for - % one compaction run - % - maximum run compaction target; percentage space which should be - % released from a compaction run of the maximum length to make it a run - % worthwhile of compaction (released space is 100.0 - target e.g. 70.0 - % means that 30.0% should be released) - % - single_file compaction target; percentage space which should be - % released from a compaction run of a single file to make it a run - % worthwhile of compaction (released space is 100.0 - target e.g. 70.0 - % means that 30.0% should be released) +% Score parameters are a tuple +% - of maximum run length; how long a run of consecutive files can be for +% one compaction run +% - maximum run compaction target; percentage space which should be +% released from a compaction run of the maximum length to make it a run +% worthwhile of compaction (released space is 100.0 - target e.g. 70.0 +% means that 30.0% should be released) +% - single_file compaction target; percentage space which should be +% released from a compaction run of a single file to make it a run +% worthwhile of compaction (released space is 100.0 - target e.g. 70.0 +% means that 30.0% should be released) -type key_size() :: - {{non_neg_integer(), - leveled_codec:journal_key_tag(), - leveled_codec:ledger_key()}, non_neg_integer()}. + { + { + non_neg_integer(), + leveled_codec:journal_key_tag(), + leveled_codec:ledger_key() + }, + non_neg_integer() + }. -type corrupted_test_key_size() :: - {{non_neg_integer(), - leveled_codec:journal_key_tag(), - leveled_codec:ledger_key(), - null}, non_neg_integer()}. + { + { + non_neg_integer(), + leveled_codec:journal_key_tag(), + leveled_codec:ledger_key(), + null + }, + non_neg_integer() + }. %%%============================================================================ %%% API @@ -166,23 +186,22 @@ clerk_new(InkerClerkOpts) -> ), {ok, Clerk}. --spec clerk_compact(pid(), - pid(), - leveled_inker:filterinitfun(), - leveled_inker:filterclosefun(), - leveled_inker:filterfun(), - list()) -> ok. +-spec clerk_compact( + pid(), + pid(), + leveled_inker:filterinitfun(), + leveled_inker:filterclosefun(), + leveled_inker:filterfun(), + list() +) -> ok. %% @doc -%% Trigger a compaction for this clerk if the threshold of data recovery has +%% Trigger a compaction for this clerk if the threshold of data recovery has %% been met clerk_compact(Pid, Checker, InitiateFun, CloseFun, FilterFun, Manifest) -> - gen_server:cast(Pid, - {compact, - Checker, - InitiateFun, - CloseFun, - FilterFun, - Manifest}). + gen_server:cast( + Pid, + {compact, Checker, InitiateFun, CloseFun, FilterFun, Manifest} + ). -spec clerk_trim(pid(), integer(), list()) -> ok. %% @doc @@ -196,8 +215,14 @@ clerk_trim(Pid, PersistedSQN, ManifestAsList) -> %% of the hastable in the CDB file - so that the file is not blocked during %% this calculation clerk_hashtablecalc(HashTree, StartPos, CDBpid) -> - {ok, Clerk} = gen_server:start_link(?MODULE, [leveled_log:get_opts(), - #iclerk_options{}], []), + {ok, Clerk} = gen_server:start_link( + ?MODULE, + [ + leveled_log:get_opts(), + #iclerk_options{} + ], + [] + ), gen_server:cast(Clerk, {hashtable_calc, HashTree, StartPos, CDBpid}). -spec clerk_stop(pid()) -> ok. @@ -224,7 +249,6 @@ clerk_addlogs(Pid, ForcedLogs) -> clerk_removelogs(Pid, ForcedLogs) -> gen_server:cast(Pid, {remove_logs, ForcedLogs}). - -spec clerk_scorefilelist(pid(), list(candidate())) -> ok. %% @doc %% Score the file at the head of the list and then send the tail of the list to @@ -234,7 +258,6 @@ clerk_scorefilelist(Pid, []) -> clerk_scorefilelist(Pid, CandidateList) -> gen_server:cast(Pid, {score_filelist, CandidateList}). - %%%============================================================================ %%% gen_server callbacks %%%============================================================================ @@ -245,15 +268,15 @@ init([LogOpts, IClerkOpts]) -> CDBopts = IClerkOpts#iclerk_options.cdb_options, WP = CDBopts#cdb_options.waste_path, WRP = IClerkOpts#iclerk_options.waste_retention_period, - - MRL = + + MRL = case IClerkOpts#iclerk_options.max_run_length of undefined -> ?MAX_COMPACTION_RUN; MRL0 -> MRL0 end, - + SFL_CompPerc = case IClerkOpts#iclerk_options.singlefile_compactionperc of undefined -> @@ -268,20 +291,21 @@ init([LogOpts, IClerkOpts]) -> MRLCP when is_float(MRLCP) -> MRLCP end, - - {ok, #state{max_run_length = MRL, - inker = IClerkOpts#iclerk_options.inker, - cdb_options = CDBopts, - reload_strategy = ReloadStrategy, - waste_path = WP, - waste_retention_period = WRP, - singlefile_compactionperc = SFL_CompPerc, - maxrunlength_compactionperc = MRL_CompPerc, - compression_method = - IClerkOpts#iclerk_options.compression_method, - score_onein = - IClerkOpts#iclerk_options.score_onein - }}. + + {ok, #state{ + max_run_length = MRL, + inker = IClerkOpts#iclerk_options.inker, + cdb_options = CDBopts, + reload_strategy = ReloadStrategy, + waste_path = WP, + waste_retention_period = WRP, + singlefile_compactionperc = SFL_CompPerc, + maxrunlength_compactionperc = MRL_CompPerc, + compression_method = + IClerkOpts#iclerk_options.compression_method, + score_onein = + IClerkOpts#iclerk_options.score_onein + }}. handle_call(stop, _From, State) -> case State#state.scoring_state of @@ -290,25 +314,29 @@ handle_call(stop, _From, State) -> ScoringState -> % Closed when scoring files, and so need to shutdown FilterServer % to close down neatly - CloseFun = ScoringState#scoring_state.close_fun, - FilterServer = ScoringState#scoring_state.filter_server, - CloseFun(FilterServer) + CloseFun = ScoringState#scoring_state.close_fun, + FilterServer = ScoringState#scoring_state.filter_server, + CloseFun(FilterServer) end, {stop, normal, ok, State}. -handle_cast({compact, Checker, InitiateFun, CloseFun, FilterFun, Manifest0}, - State) -> - leveled_log:log(ic014, [State#state.reload_strategy, - State#state.max_run_length]), +handle_cast( + {compact, Checker, InitiateFun, CloseFun, FilterFun, Manifest0}, + State +) -> + leveled_log:log(ic014, [ + State#state.reload_strategy, + State#state.max_run_length + ]), % Empty the waste folder clear_waste(State), - SW = os:timestamp(), - % Clock to record the time it takes to calculate the potential for - % compaction - + SW = os:timestamp(), + % Clock to record the time it takes to calculate the potential for + % compaction + % Need to fetch manifest at start rather than have it be passed in % Don't want to process a queued call waiting on an old manifest - [_Active|Manifest] = Manifest0, + [_Active | Manifest] = Manifest0, {FilterServer, MaxSQN} = InitiateFun(Checker), NotRollingFun = fun({_LowSQN, _FN, Pid, _LK}) -> @@ -325,17 +353,24 @@ handle_cast({compact, Checker, InitiateFun, CloseFun, FilterFun, Manifest0}, }, {noreply, State#state{scored_files = [], scoring_state = ScoringState}}; handle_cast( - {score_filelist, [Entry|Tail]}, - State = #state{scoring_state = ScoringState}) - when ?IS_DEF(ScoringState) -> + {score_filelist, [Entry | Tail]}, + State = #state{scoring_state = ScoringState} +) when + ?IS_DEF(ScoringState) +-> Candidates = State#state.scored_files, {LowSQN, FN, JournalP, _LK} = Entry, CpctPerc = - case {leveled_cdb:cdb_getcachedscore(JournalP, os:timestamp()), + case + { + leveled_cdb:cdb_getcachedscore(JournalP, os:timestamp()), rand:uniform(State#state.score_onein) == 1, - State#state.score_onein} of - {CachedScore, _UseNewScore, ScoreOneIn} - when CachedScore == undefined; ScoreOneIn == 1 -> + State#state.score_onein + } + of + {CachedScore, _UseNewScore, ScoreOneIn} when + CachedScore == undefined; ScoreOneIn == 1 + -> % If caches are not used, always use the current score check_single_file( JournalP, @@ -351,7 +386,7 @@ handle_cast( % Expectation is that this will reduce instances of individual % files being compacted when a run is missed due to cached % scores being used in surrounding journals - NewScore = + NewScore = check_single_file( JournalP, ScoringState#scoring_state.filter_fun, @@ -374,10 +409,12 @@ handle_cast( compaction_perc = CpctPerc }, ok = clerk_scorefilelist(self(), Tail), - {noreply, State#state{scored_files = [Candidate|Candidates]}}; + {noreply, State#state{scored_files = [Candidate | Candidates]}}; handle_cast( - scoring_complete, State = #state{scoring_state = ScoringState}) - when ?IS_DEF(ScoringState) -> + scoring_complete, State = #state{scoring_state = ScoringState} +) when + ?IS_DEF(ScoringState) +-> MaxRunLength = State#state.max_run_length, CDBopts = State#state.cdb_options, Candidates = lists:reverse(State#state.scored_files), @@ -387,8 +424,7 @@ handle_cast( CloseFun = ScoringState#scoring_state.close_fun, SW = ScoringState#scoring_state.start_time, ScoreParams = - {MaxRunLength, - State#state.maxrunlength_compactionperc, + {MaxRunLength, State#state.maxrunlength_compactionperc, State#state.singlefile_compactionperc}, {BestRun0, Score} = assess_candidates(Candidates, ScoreParams), leveled_log:log_timer(ic003, [Score, length(BestRun0)], SW), @@ -409,7 +445,7 @@ handle_cast( FilesToDelete = lists:map( fun(C) -> - { + { C#candidate.low_sqn, C#candidate.filename, C#candidate.journal, @@ -422,16 +458,19 @@ handle_cast( ok = CloseFun(FilterServer), ok = leveled_inker:ink_clerkcomplete( - State#state.inker, ManifestSlice, FilesToDelete); + State#state.inker, ManifestSlice, FilesToDelete + ); false -> ok = CloseFun(FilterServer), ok = leveled_inker:ink_clerkcomplete(State#state.inker, [], []) end, {noreply, State#state{scoring_state = undefined}, hibernate}; handle_cast( - {trim, PersistedSQN, ManifestAsList}, State = #state{inker = Ink}) - when ?IS_DEF(Ink) -> - FilesToDelete = + {trim, PersistedSQN, ManifestAsList}, State = #state{inker = Ink} +) when + ?IS_DEF(Ink) +-> + FilesToDelete = leveled_imanifest:find_persistedentries(PersistedSQN, ManifestAsList), leveled_log:log(ic007, []), ok = leveled_inker:ink_clerkcomplete(Ink, [], FilesToDelete), @@ -467,14 +506,14 @@ terminate(Reason, _State) -> code_change(_OldVsn, State, _Extra) -> {ok, State}. - %%%============================================================================ %%% External functions %%%============================================================================ -spec schedule_compaction( - list(integer()), integer(), {integer(), integer(), integer()}) -> - integer(). + list(integer()), integer(), {integer(), integer(), integer()} +) -> + integer(). %% @doc %% Schedule the next compaction event for this store. Chooses a random %% interval, and then a random start time within the first third @@ -497,52 +536,52 @@ schedule_compaction(CompactionHours, RunsPerDay, CurrentTS) -> % n intervals at random, but then only chose the next one. After each % event is occurred the random process is repeated to determine the next % event to schedule i.e. the unused schedule is discarded. - + IntervalLength = 60 div ?INTERVALS_PER_HOUR, TotalHours = length(CompactionHours), - + LocalTime = calendar:now_to_local_time(CurrentTS), - {{NowY, NowMon, NowD}, - {NowH, NowMin, _NowS}} = LocalTime, + {{NowY, NowMon, NowD}, {NowH, NowMin, _NowS}} = LocalTime, CurrentInterval = {NowH, NowMin div IntervalLength + 1}, - + % Randomly select an hour and an interval for each of the runs expected % today. RandSelect = fun(_X) -> - {lists:nth(rand:uniform(TotalHours), CompactionHours), - rand:uniform(?INTERVALS_PER_HOUR)} + { + lists:nth(rand:uniform(TotalHours), CompactionHours), + rand:uniform(?INTERVALS_PER_HOUR) + } end, RandIntervals = lists:sort(lists:map(RandSelect, lists:seq(1, RunsPerDay))), - + % Pick the next interval from the list. The intervals before current time % are considered as intervals tomorrow, so will only be next if there are % no other today CheckNotBefore = fun(A) -> A =< CurrentInterval end, {TooEarly, MaybeOK} = lists:splitwith(CheckNotBefore, RandIntervals), - {NextDate, {NextH, NextI}} = + {NextDate, {NextH, NextI}} = case MaybeOK of [] -> % Use first interval picked tomorrow if none of selected run times % are today Tmrw = calendar:date_to_gregorian_days(NowY, NowMon, NowD) + 1, - {calendar:gregorian_days_to_date(Tmrw), - lists:nth(1, TooEarly)}; + {calendar:gregorian_days_to_date(Tmrw), lists:nth(1, TooEarly)}; _ -> {{NowY, NowMon, NowD}, lists:nth(1, MaybeOK)} end, - + % Calculate the offset in seconds to this next interval - NextS0 = NextI * (IntervalLength * 60) - - rand:uniform(IntervalLength * 60), + NextS0 = + NextI * (IntervalLength * 60) - + rand:uniform(IntervalLength * 60), NextM = NextS0 div 60, NextS = NextS0 rem 60, TimeDiff = calendar:time_difference(LocalTime, {NextDate, {NextH, NextM, NextS}}), {Days, {Hours, Mins, Secs}} = TimeDiff, Days * 86400 + Hours * 3600 + Mins * 60 + Secs. - %%%============================================================================ %%% Internal functions @@ -553,33 +592,43 @@ schedule_compaction(CompactionHours, RunsPerDay, CurrentTS) -> leveled_inker:filterfun(), leveled_inker:filterserver(), leveled_codec:sqn(), - non_neg_integer(), non_neg_integer(), - leveled_codec:compaction_strategy()) -> - float(). + non_neg_integer(), + non_neg_integer(), + leveled_codec:compaction_strategy() +) -> + float(). %% @doc -%% Get a score for a single CDB file in the journal. This will pull out a bunch +%% Get a score for a single CDB file in the journal. This will pull out a bunch %% of keys and sizes at random in an efficient way (by scanning the hashtable %% then just picking the key and size information of disk). -%% -%% The score should represent a percentage which is the size of the file by -%% comparison to the original file if compaction was to be run. So if a file +%% +%% The score should represent a percentage which is the size of the file by +%% comparison to the original file if compaction was to be run. So if a file %% can be reduced in size by 30% the score will be 70%. -%% -%% The score is based on a random sample - so will not be consistent between +%% +%% The score is based on a random sample - so will not be consistent between %% calls. -check_single_file(CDB, FilterFun, FilterServer, MaxSQN, - SampleSize, BatchSize, - ReloadStrategy) -> +check_single_file( + CDB, + FilterFun, + FilterServer, + MaxSQN, + SampleSize, + BatchSize, + ReloadStrategy +) -> FN = leveled_cdb:cdb_filename(CDB), SW = os:timestamp(), PositionList = leveled_cdb:cdb_getpositions(CDB, SampleSize), KeySizeList = fetch_inbatches(PositionList, BatchSize, CDB, []), - Score = - size_comparison_score(KeySizeList, - FilterFun, - FilterServer, - MaxSQN, - ReloadStrategy), + Score = + size_comparison_score( + KeySizeList, + FilterFun, + FilterServer, + MaxSQN, + ReloadStrategy + ), safely_log_filescore(PositionList, FN, Score, SW), Score. @@ -587,29 +636,37 @@ safely_log_filescore([], FN, Score, SW) -> leveled_log:log_timer(ic004, [Score, empty, FN], SW); safely_log_filescore(PositionList, FN, Score, SW) -> AvgJump = - (lists:last(PositionList) - lists:nth(1, PositionList)) - div length(PositionList), + (lists:last(PositionList) - lists:nth(1, PositionList)) div + length(PositionList), leveled_log:log_timer(ic004, [Score, AvgJump, FN], SW). --spec size_comparison_score(list(key_size() | corrupted_test_key_size()), - leveled_inker:filterfun(), - leveled_inker:filterserver(), - leveled_codec:sqn(), - leveled_codec:compaction_strategy()) -> - float(). -size_comparison_score(KeySizeList, - FilterFun, FilterServer, MaxSQN, - ReloadStrategy) -> +-spec size_comparison_score( + list(key_size() | corrupted_test_key_size()), + leveled_inker:filterfun(), + leveled_inker:filterserver(), + leveled_codec:sqn(), + leveled_codec:compaction_strategy() +) -> + float(). +size_comparison_score( + KeySizeList, + FilterFun, + FilterServer, + MaxSQN, + ReloadStrategy +) -> FoldFunForSizeCompare = fun(KS, {ActSize, RplSize}) -> case KS of {{SQN, Type, PK}, Size} -> ToRetain = - to_retain({SQN, Type, PK}, - FilterFun, - FilterServer, - MaxSQN, - ReloadStrategy), + to_retain( + {SQN, Type, PK}, + FilterFun, + FilterServer, + MaxSQN, + ReloadStrategy + ), case ToRetain of true -> {ActSize + Size - ?CRC_SIZE, RplSize}; @@ -621,12 +678,12 @@ size_comparison_score(KeySizeList, _ -> % There is a key which is not in expected format % Not that the key-size list has been filtered for - % errors by leveled_cdb - but this doesn't know the + % errors by leveled_cdb - but this doesn't know the % expected format of the key {ActSize, RplSize} end end, - + R0 = lists:foldl(FoldFunForSizeCompare, {0, 0}, KeySizeList), {ActiveSize, ReplacedSize} = R0, case ActiveSize + ReplacedSize of @@ -636,12 +693,11 @@ size_comparison_score(KeySizeList, 100 * ActiveSize / (ActiveSize + ReplacedSize) end. - fetch_inbatches([], _BatchSize, CDB, CheckedList) -> ok = leveled_cdb:cdb_clerkcomplete(CDB), CheckedList; fetch_inbatches(PositionList, BatchSize, CDB, CheckedList) -> - {Batch, Tail} = + {Batch, Tail} = if length(PositionList) >= BatchSize -> lists:split(BatchSize, PositionList); @@ -651,27 +707,27 @@ fetch_inbatches(PositionList, BatchSize, CDB, CheckedList) -> KL_List = leveled_cdb:cdb_directfetch(CDB, Batch, key_size), fetch_inbatches(Tail, BatchSize, CDB, CheckedList ++ KL_List). - -spec assess_candidates( - list(candidate()), score_parameters()) -> {list(candidate()), float()}. + list(candidate()), score_parameters() +) -> {list(candidate()), float()}. %% @doc %% For each run length we need to assess all the possible runs of candidates, %% to determine which is the best score - to be put forward as the best %% candidate run for compaction. -%% +%% %% Although this requires many loops over the list of the candidate, as the %% file scores have already been calculated the cost per loop should not be %% a high burden. Reducing the maximum run length, will reduce the cost of %% this exercise should be a problem. %% %% The score parameters are used to produce the score of the compaction run, -%% with a higher score being better. The parameters are the maximum run +%% with a higher score being better. The parameters are the maximum run %% length and the compaction targets (for max run length and single file). %% The score of an individual file is the approximate percentage of the space -%% that would be retained after compaction (e.g. 100 less the percentage of -%% space wasted by historic objects). +%% that would be retained after compaction (e.g. 100 less the percentage of +%% space wasted by historic objects). %% -%% So a file score of 60% indicates that 40% of the space would be +%% So a file score of 60% indicates that 40% of the space would be %% reclaimed following compaction. A single file target of 50% would not be %% met for this file. However, if there are 4 consecutive files scoring 60%, %% and the maximum run length is 4, and the maximum run length compaction @@ -680,22 +736,27 @@ fetch_inbatches(PositionList, BatchSize, CDB, CheckedList) -> assess_candidates(AllCandidates, Params) -> MaxRunLength = min(element(1, Params), length(AllCandidates)), NaiveBestRun = lists:sublist(AllCandidates, MaxRunLength), - % This will end up being scored twice, but lets take a guess at - % the best scoring run to take into the loop + % This will end up being scored twice, but lets take a guess at + % the best scoring run to take into the loop FoldFun = fun(RunLength, Best) -> assess_for_runlength(RunLength, AllCandidates, Params, Best) end, % Check all run lengths to find the best candidate. Reverse the list of % run lengths, so that longer runs win on equality of score - lists:foldl(FoldFun, - {NaiveBestRun, score_run(NaiveBestRun, Params)}, - lists:reverse(lists:seq(1, MaxRunLength))). - + lists:foldl( + FoldFun, + {NaiveBestRun, score_run(NaiveBestRun, Params)}, + lists:reverse(lists:seq(1, MaxRunLength)) + ). --spec assess_for_runlength(integer(), list(candidate()), score_parameters(), - {list(candidate()), float()}) - -> {list(candidate()), float()}. +-spec assess_for_runlength( + integer(), + list(candidate()), + score_parameters(), + {list(candidate()), float()} +) -> + {list(candidate()), float()}. %% @doc %% For a given run length, calculate the scores for all consecutive runs of %% files, comparing the score with the best run which has beens een so far. @@ -714,24 +775,23 @@ assess_for_runlength(RunLength, AllCandidates, Params, Best) -> end, lists:foldl(FoldFun, Best, lists:seq(1, NumberOfRuns)). - -spec score_run(list(candidate()), score_parameters()) -> float(). %% @doc -%% Score a run. Caluclate the avergae score across all the files in the run, +%% Score a run. Caluclate the avergae score across all the files in the run, %% and deduct that from a target score. Good candidate runs for comapction -%% have larger (positive) scores. Bad candidate runs for compaction have +%% have larger (positive) scores. Bad candidate runs for compaction have %% negative scores. score_run([], _Params) -> 0.0; score_run(Run, {MaxRunLength, MR_CT, SF_CT}) -> - TargetIncr = + TargetIncr = case MaxRunLength of 1 -> 0.0; MaxRunSize -> (MR_CT - SF_CT) / (MaxRunSize - 1) end, - Target = SF_CT + TargetIncr * (length(Run) - 1), + Target = SF_CT + TargetIncr * (length(Run) - 1), RunTotal = lists:foldl( fun(Cand, Acc) -> Acc + Cand#candidate.compaction_perc end, @@ -740,45 +800,81 @@ score_run(Run, {MaxRunLength, MR_CT, SF_CT}) -> ), Target - RunTotal / length(Run). - print_compaction_run(BestRun, ScoreParams) -> leveled_log:log( - ic005, [length(BestRun), score_run(BestRun, ScoreParams)]), + ic005, [length(BestRun), score_run(BestRun, ScoreParams)] + ), lists:foreach( fun(File) -> leveled_log:log(ic006, [File#candidate.filename]) end, - BestRun). + BestRun + ). sort_run(RunOfFiles) -> CompareFun = fun(Cand1, Cand2) -> - Cand1#candidate.low_sqn =< Cand2#candidate.low_sqn end, + Cand1#candidate.low_sqn =< Cand2#candidate.low_sqn + end, lists:sort(CompareFun, RunOfFiles). -compact_files(BestRun, CDBopts, FilterFun, FilterServer, - MaxSQN, RStrategy, PressMethod) -> +compact_files( + BestRun, + CDBopts, + FilterFun, + FilterServer, + MaxSQN, + RStrategy, + PressMethod +) -> BatchesOfPositions = get_all_positions(BestRun, []), - compact_files(BatchesOfPositions, - CDBopts, - null, - FilterFun, - FilterServer, - MaxSQN, - RStrategy, - PressMethod, - []). - - -compact_files([], _CDBopts, null, _FilterFun, _FilterServer, _MaxSQN, - _RStrategy, _PressMethod, ManSlice0) -> + compact_files( + BatchesOfPositions, + CDBopts, + null, + FilterFun, + FilterServer, + MaxSQN, + RStrategy, + PressMethod, + [] + ). + +compact_files( + [], + _CDBopts, + null, + _FilterFun, + _FilterServer, + _MaxSQN, + _RStrategy, + _PressMethod, + ManSlice0 +) -> ManSlice0; -compact_files([], _CDBopts, ActiveJournal0, _FilterFun, _FilterServer, _MaxSQN, - _RStrategy, _PressMethod, ManSlice0) -> +compact_files( + [], + _CDBopts, + ActiveJournal0, + _FilterFun, + _FilterServer, + _MaxSQN, + _RStrategy, + _PressMethod, + ManSlice0 +) -> ManSlice1 = ManSlice0 ++ leveled_imanifest:generate_entry(ActiveJournal0), ManSlice1; -compact_files([Batch|T], CDBopts, ActiveJournal0, - FilterFun, FilterServer, MaxSQN, - RStrategy, PressMethod, ManSlice0) -> +compact_files( + [Batch | T], + CDBopts, + ActiveJournal0, + FilterFun, + FilterServer, + MaxSQN, + RStrategy, + PressMethod, + ManSlice0 +) -> {SrcJournal, PositionList} = Batch, KVCs0 = leveled_cdb:cdb_directfetch(SrcJournal, PositionList, key_value_check), @@ -786,7 +882,8 @@ compact_files([Batch|T], CDBopts, ActiveJournal0, filter_output(KVCs0, FilterFun, FilterServer, MaxSQN, RStrategy), {ActiveJournal1, ManSlice1} = write_values( - KVCs1, CDBopts, ActiveJournal0, ManSlice0, PressMethod), + KVCs1, CDBopts, ActiveJournal0, ManSlice0, PressMethod + ), % The inker's clerk will no longer need these (potentially large) binaries, % so force garbage collection at this point. This will mean when we roll % each CDB file there will be no remaining references to the binaries that @@ -806,7 +903,7 @@ compact_files([Batch|T], CDBopts, ActiveJournal0, get_all_positions([], PositionBatches) -> PositionBatches; -get_all_positions([HeadRef|RestOfBest], PositionBatches) -> +get_all_positions([HeadRef | RestOfBest], PositionBatches) -> SrcJournal = HeadRef#candidate.journal, Positions = leveled_cdb:cdb_getpositions(SrcJournal, all), leveled_log:log(ic008, [HeadRef#candidate.filename, length(Positions)]), @@ -830,13 +927,12 @@ split_positions_into_batches(Positions, Journal, Batches) -> Tail, Journal, Batches ++ [{Journal, ThisBatch}] ). - %% @doc %% For the Keys and values taken from the Journal file, which are required %% in the compacted journal file. To be required, they must still be active %% (i.e. be the current SQN for that LedgerKey in the Ledger). However, if %% it is not active, we still need to retain some information if for this -%% object tag we want to be able to rebuild the KeyStore by relaoding the +%% object tag we want to be able to rebuild the KeyStore by relaoding the %% KeyDeltas (the retain reload strategy) %% %% If the reload strategy is recalc, we assume that we can reload by @@ -853,7 +949,6 @@ filter_output(KVCs, FilterFun, FilterServer, MaxSQN, Strategy) -> filter_output_fun(FilterFun, FilterServer, MaxSQN, Strategy), lists:reverse(lists:foldl(FoldFun, [], KVCs)). - filter_output_fun(FilterFun, FilterServer, MaxSQN, Strategy) -> fun(KVC0, Acc) -> case KVC0 of @@ -865,22 +960,24 @@ filter_output_fun(FilterFun, FilterServer, MaxSQN, Strategy) -> to_retain(JK, FilterFun, FilterServer, MaxSQN, Strategy), case ToRetain of true -> - [KVC0|Acc]; + [KVC0 | Acc]; convert -> {JK0, JV0} = leveled_codec:revert_to_keydeltas(JK, JV), - [{JK0, JV0, null}|Acc]; + [{JK0, JV0, null} | Acc]; false -> Acc end end end. --spec to_retain(leveled_codec:journal_key(), - leveled_inker:filterfun(), - leveled_inker:filterserver(), - leveled_codec:sqn(), - leveled_codec:compaction_strategy()) -> boolean()|convert. +-spec to_retain( + leveled_codec:journal_key(), + leveled_inker:filterfun(), + leveled_inker:filterserver(), + leveled_codec:sqn(), + leveled_codec:compaction_strategy() +) -> boolean() | convert. to_retain(JournalKey, FilterFun, FilterServer, MaxSQN, ReloadStrategy) -> {SQN, LK} = leveled_codec:from_journalkey(JournalKey), @@ -912,17 +1009,18 @@ to_retain(JournalKey, FilterFun, FilterServer, MaxSQN, ReloadStrategy) -> end end. - write_values([], _CDBopts, Journal0, ManSlice0, _PressMethod) -> {Journal0, ManSlice0}; write_values(KVCList, CDBopts, Journal0, ManSlice0, PressMethod) -> - KVList = - lists:map(fun({K, V, _C}) -> - % Compress the value as part of compaction - {K, leveled_codec:maybe_compress(V, PressMethod)} - end, - KVCList), - {ok, Journal1} = + KVList = + lists:map( + fun({K, V, _C}) -> + % Compress the value as part of compaction + {K, leveled_codec:maybe_compress(V, PressMethod)} + end, + KVCList + ), + {ok, Journal1} = case Journal0 of null -> {TK, _TV} = lists:nth(1, KVList), @@ -942,7 +1040,7 @@ write_values(KVCList, CDBopts, Journal0, ManSlice0, PressMethod) -> ManSlice1 = ManSlice0 ++ leveled_imanifest:generate_entry(Journal1), write_values(KVCList, CDBopts, null, ManSlice1, PressMethod) end. - + clear_waste(State) -> case State#state.waste_path of undefined -> @@ -953,7 +1051,7 @@ clear_waste(State) -> N = calendar:datetime_to_gregorian_seconds(calendar:local_time()), DeleteJournalFun = fun(DelJ) -> - LMD = {_,_} = filelib:last_modified(WP ++ DelJ), + LMD = {_, _} = filelib:last_modified(WP ++ DelJ), case N - calendar:datetime_to_gregorian_seconds(LMD) of LMD_Delta when LMD_Delta >= WRP -> ok = file:delete(WP ++ DelJ), @@ -980,15 +1078,18 @@ schedule_test() -> schedule_test_bycount(4). schedule_test_bycount(N) -> - LocalTimeAsDateTime = {{2017,3,30},{15,27,0}}, - CurrentTS= local_time_to_now(LocalTimeAsDateTime), + LocalTimeAsDateTime = {{2017, 3, 30}, {15, 27, 0}}, + CurrentTS = local_time_to_now(LocalTimeAsDateTime), SecondsToCompaction0 = schedule_compaction([16], N, CurrentTS), io:format("Seconds to compaction ~w~n", [SecondsToCompaction0]), ?assertMatch(true, SecondsToCompaction0 > 1800), ?assertMatch(true, SecondsToCompaction0 < 5700), - SecondsToCompaction1 = schedule_compaction([14], N, CurrentTS), % tomorrow! - io:format("Seconds to compaction ~w for count ~w~n", - [SecondsToCompaction1, N]), + % tomorrow! + SecondsToCompaction1 = schedule_compaction([14], N, CurrentTS), + io:format( + "Seconds to compaction ~w for count ~w~n", + [SecondsToCompaction1, N] + ), ?assertMatch(true, SecondsToCompaction1 >= 81180), ?assertMatch(true, SecondsToCompaction1 =< 84780). @@ -1001,12 +1102,17 @@ local_time_to_now(DateTime) -> simple_score_test() -> DummyC = #candidate{ - low_sqn = 1, filename="dummy", journal=self(), compaction_perc = 0 + low_sqn = 1, + filename = "dummy", + journal = self(), + compaction_perc = 0 }, - Run1 = [DummyC#candidate{compaction_perc = 75.0}, - DummyC#candidate{compaction_perc = 75.0}, - DummyC#candidate{compaction_perc = 76.0}, - DummyC#candidate{compaction_perc = 70.0}], + Run1 = [ + DummyC#candidate{compaction_perc = 75.0}, + DummyC#candidate{compaction_perc = 75.0}, + DummyC#candidate{compaction_perc = 76.0}, + DummyC#candidate{compaction_perc = 70.0} + ], ?assertMatch(-4.0, score_run(Run1, {4, 70.0, 40.0})), Run2 = [DummyC#candidate{compaction_perc = 75.0}], ?assertMatch(-35.0, score_run(Run2, {4, 70.0, 40.0})), @@ -1017,7 +1123,7 @@ simple_score_test() -> file_gc_test() -> State = #state{ - waste_path="test/test_area/waste/", waste_retention_period = 1 + waste_path = "test/test_area/waste/", waste_retention_period = 1 }, ok = filelib:ensure_dir(State#state.waste_path), file:write_file( @@ -1037,21 +1143,23 @@ file_gc_test() -> {ok, ClearedJournals2} = file:list_dir(State#state.waste_path), ?assertMatch([], ClearedJournals2). - check_bestrun(CandidateList, Params) -> {BestRun, _Score} = assess_candidates(CandidateList, Params), lists:map(fun(C) -> C#candidate.filename end, BestRun). find_bestrun_test() -> -%% Tests dependent on these defaults -%% -define(MAX_COMPACTION_RUN, 4). -%% -define(SINGLEFILE_COMPACTION_TARGET, 40.0). -%% -define(MAXRUNLENGTH_COMPACTION_TARGET, 60.0). -%% Tested first with blocks significant as no back-tracking + %% Tests dependent on these defaults + %% -define(MAX_COMPACTION_RUN, 4). + %% -define(SINGLEFILE_COMPACTION_TARGET, 40.0). + %% -define(MAXRUNLENGTH_COMPACTION_TARGET, 60.0). + %% Tested first with blocks significant as no back-tracking Params = {4, 60.0, 40.0}, DummyC = #candidate{ - low_sqn = 1, filename="dummy", journal=self(), compaction_perc = 0 + low_sqn = 1, + filename = "dummy", + journal = self(), + compaction_perc = 0 }, Block1 = [ @@ -1089,12 +1197,12 @@ find_bestrun_test() -> CList0 = Block1 ++ Block2 ++ Block3 ++ Block4 ++ Block5, ?assertMatch(["b", "c", "d", "e"], check_bestrun(CList0, Params)), CList1 = - CList0 ++ [DummyC#candidate{compaction_perc = 20.0, filename="s"}], + CList0 ++ [DummyC#candidate{compaction_perc = 20.0, filename = "s"}], ?assertMatch(["s"], check_bestrun(CList1, Params)), CList2 = Block4 ++ Block3 ++ Block2 ++ Block1 ++ Block5, ?assertMatch(["h", "a", "b", "c"], check_bestrun(CList2, Params)), CList3 = Block5 ++ Block1 ++ Block2 ++ Block3 ++ Block4, - ?assertMatch(["b", "c", "d", "e"],check_bestrun(CList3, Params)). + ?assertMatch(["b", "c", "d", "e"], check_bestrun(CList3, Params)). handle_emptycandidatelist_test() -> {A, B} = assess_candidates([], {4, 60.0, 40.0}), @@ -1105,14 +1213,21 @@ test_ledgerkey(Key) -> {o, "Bucket", Key, null}. test_inkerkv(SQN, Key, V, IdxSpecs) -> - leveled_codec:to_inkerkv(test_ledgerkey(Key), SQN, V, IdxSpecs, - native, false). + leveled_codec:to_inkerkv( + test_ledgerkey(Key), + SQN, + V, + IdxSpecs, + native, + false + ). fetch_testcdb(RP) -> FN1 = leveled_inker:filepath(RP, 1, new_journal), - {ok, - CDB1} = leveled_cdb:cdb_open_writer(FN1, - #cdb_options{binary_mode=true}), + {ok, CDB1} = leveled_cdb:cdb_open_writer( + FN1, + #cdb_options{binary_mode = true} + ), {K1, V1} = test_inkerkv(1, "Key1", "Value1", {[], infinity}), {K2, V2} = test_inkerkv(2, "Key2", "Value2", {[], infinity}), {K3, V3} = test_inkerkv(3, "Key3", "Value3", {[], infinity}), @@ -1130,23 +1245,26 @@ fetch_testcdb(RP) -> ok = leveled_cdb:cdb_put(CDB1, K7, V7), ok = leveled_cdb:cdb_put(CDB1, K8, V8), {ok, FN2} = leveled_cdb:cdb_complete(CDB1), - leveled_cdb:cdb_open_reader(FN2, #cdb_options{binary_mode=true}). + leveled_cdb:cdb_open_reader(FN2, #cdb_options{binary_mode = true}). check_single_file_test() -> RP = "test/test_area/", RS = leveled_codec:inker_reload_strategy([]), ok = filelib:ensure_dir(leveled_inker:filepath(RP, journal_dir)), {ok, CDB} = fetch_testcdb(RP), - LedgerSrv1 = [{8, {o, "Bucket", "Key1", null}}, - {2, {o, "Bucket", "Key2", null}}, - {3, {o, "Bucket", "Key3", null}}], + LedgerSrv1 = [ + {8, {o, "Bucket", "Key1", null}}, + {2, {o, "Bucket", "Key2", null}}, + {3, {o, "Bucket", "Key3", null}} + ], LedgerFun1 = fun(Srv, Key, ObjSQN) -> - case lists:keyfind(ObjSQN, 1, Srv) of - {ObjSQN, Key} -> - current; - _ -> - replaced - end end, + case lists:keyfind(ObjSQN, 1, Srv) of + {ObjSQN, Key} -> + current; + _ -> + replaced + end + end, Score1 = check_single_file(CDB, LedgerFun1, LedgerSrv1, 9, 8, 4, RS), ?assertMatch(37.5, Score1), LedgerFun2 = fun(_Srv, _Key, _ObjSQN) -> current end, @@ -1159,108 +1277,127 @@ check_single_file_test() -> ok = leveled_cdb:cdb_deletepending(CDB), ok = leveled_cdb:cdb_destroy(CDB). - compact_single_file_setup() -> RP = "test/test_area/", ok = filelib:ensure_dir(leveled_inker:filepath(RP, journal_dir)), {ok, CDB} = fetch_testcdb(RP), - Candidate = #candidate{journal = CDB, - low_sqn = 1, - filename = "test", - compaction_perc = 37.5}, - LedgerSrv1 = [{8, {o, "Bucket", "Key1", null}}, - {2, {o, "Bucket", "Key2", null}}, - {3, {o, "Bucket", "Key3", null}}], + Candidate = #candidate{ + journal = CDB, + low_sqn = 1, + filename = "test", + compaction_perc = 37.5 + }, + LedgerSrv1 = [ + {8, {o, "Bucket", "Key1", null}}, + {2, {o, "Bucket", "Key2", null}}, + {3, {o, "Bucket", "Key3", null}} + ], LedgerFun1 = fun(Srv, Key, ObjSQN) -> - case lists:keyfind(ObjSQN, 1, Srv) of - {ObjSQN, Key} -> - current; - _ -> - replaced - end end, + case lists:keyfind(ObjSQN, 1, Srv) of + {ObjSQN, Key} -> + current; + _ -> + replaced + end + end, CompactFP = leveled_inker:filepath(RP, journal_compact_dir), ok = filelib:ensure_dir(CompactFP), {Candidate, LedgerSrv1, LedgerFun1, CompactFP, CDB}. compact_single_file_recovr_test() -> - {Candidate, - LedgerSrv1, - LedgerFun1, - CompactFP, - CDB} = compact_single_file_setup(), - CDBOpts = #cdb_options{binary_mode=true}, + {Candidate, LedgerSrv1, LedgerFun1, CompactFP, CDB} = compact_single_file_setup(), + CDBOpts = #cdb_options{binary_mode = true}, [{LowSQN, FN, _PidOldR, LastKey}] = - compact_files([Candidate], - CDBOpts#cdb_options{file_path=CompactFP}, - LedgerFun1, - LedgerSrv1, - 9, - [{?STD_TAG, recovr}], - native), + compact_files( + [Candidate], + CDBOpts#cdb_options{file_path = CompactFP}, + LedgerFun1, + LedgerSrv1, + 9, + [{?STD_TAG, recovr}], + native + ), io:format("FN of ~s~n", [FN]), ?assertMatch(2, LowSQN), {ok, PidR} = leveled_cdb:cdb_reopen_reader(FN, LastKey, CDBOpts), - ?assertMatch(probably, - leveled_cdb:cdb_keycheck(PidR, - {8, - stnd, - test_ledgerkey("Key1")})), - ?assertMatch(missing, leveled_cdb:cdb_get(PidR, - {7, - stnd, - test_ledgerkey("Key1")})), - ?assertMatch(missing, leveled_cdb:cdb_get(PidR, - {1, - stnd, - test_ledgerkey("Key1")})), - RKV1 = leveled_cdb:cdb_get(PidR, - {2, - stnd, - test_ledgerkey("Key2")}), - ?assertMatch({{_, _}, {"Value2", {[], infinity}}}, - leveled_codec:from_inkerkv(RKV1)), + ?assertMatch( + probably, + leveled_cdb:cdb_keycheck( + PidR, + {8, stnd, test_ledgerkey("Key1")} + ) + ), + ?assertMatch( + missing, + leveled_cdb:cdb_get( + PidR, + {7, stnd, test_ledgerkey("Key1")} + ) + ), + ?assertMatch( + missing, + leveled_cdb:cdb_get( + PidR, + {1, stnd, test_ledgerkey("Key1")} + ) + ), + RKV1 = leveled_cdb:cdb_get( + PidR, + {2, stnd, test_ledgerkey("Key2")} + ), + ?assertMatch( + {{_, _}, {"Value2", {[], infinity}}}, + leveled_codec:from_inkerkv(RKV1) + ), ok = leveled_cdb:cdb_close(PidR), ok = leveled_cdb:cdb_deletepending(CDB), ok = leveled_cdb:cdb_destroy(CDB). - compact_single_file_retain_test() -> - {Candidate, - LedgerSrv1, - LedgerFun1, - CompactFP, - CDB} = compact_single_file_setup(), - CDBOpts = #cdb_options{binary_mode=true}, + {Candidate, LedgerSrv1, LedgerFun1, CompactFP, CDB} = compact_single_file_setup(), + CDBOpts = #cdb_options{binary_mode = true}, [{LowSQN, FN, _PidOldR, LastKey}] = - compact_files([Candidate], - CDBOpts#cdb_options{file_path=CompactFP}, - LedgerFun1, - LedgerSrv1, - 9, - [{?STD_TAG, retain}], - native), + compact_files( + [Candidate], + CDBOpts#cdb_options{file_path = CompactFP}, + LedgerFun1, + LedgerSrv1, + 9, + [{?STD_TAG, retain}], + native + ), io:format("FN of ~s~n", [FN]), ?assertMatch(1, LowSQN), {ok, PidR} = leveled_cdb:cdb_reopen_reader(FN, LastKey, CDBOpts), - ?assertMatch(probably, - leveled_cdb:cdb_keycheck(PidR, - {8, - stnd, - test_ledgerkey("Key1")})), - ?assertMatch(missing, leveled_cdb:cdb_get(PidR, - {7, - stnd, - test_ledgerkey("Key1")})), - ?assertMatch(missing, leveled_cdb:cdb_get(PidR, - {1, - stnd, - test_ledgerkey("Key1")})), - RKV1 = leveled_cdb:cdb_get(PidR, - {2, - stnd, - test_ledgerkey("Key2")}), - ?assertMatch({{_, _}, {"Value2", {[], infinity}}}, - leveled_codec:from_inkerkv(RKV1)), + ?assertMatch( + probably, + leveled_cdb:cdb_keycheck( + PidR, + {8, stnd, test_ledgerkey("Key1")} + ) + ), + ?assertMatch( + missing, + leveled_cdb:cdb_get( + PidR, + {7, stnd, test_ledgerkey("Key1")} + ) + ), + ?assertMatch( + missing, + leveled_cdb:cdb_get( + PidR, + {1, stnd, test_ledgerkey("Key1")} + ) + ), + RKV1 = leveled_cdb:cdb_get( + PidR, + {2, stnd, test_ledgerkey("Key2")} + ), + ?assertMatch( + {{_, _}, {"Value2", {[], infinity}}}, + leveled_codec:from_inkerkv(RKV1) + ), ok = leveled_cdb:cdb_close(PidR), ok = leveled_cdb:cdb_deletepending(CDB), ok = leveled_cdb:cdb_destroy(CDB). @@ -1270,13 +1407,15 @@ compact_empty_file_test() -> ok = filelib:ensure_dir(leveled_inker:filepath(RP, journal_dir)), FN1 = leveled_inker:filepath(RP, 1, new_journal), RS = leveled_codec:inker_reload_strategy([]), - CDBopts = #cdb_options{binary_mode=true}, + CDBopts = #cdb_options{binary_mode = true}, {ok, CDB1} = leveled_cdb:cdb_open_writer(FN1, CDBopts), {ok, FN2} = leveled_cdb:cdb_complete(CDB1), {ok, CDB2} = leveled_cdb:cdb_open_reader(FN2), - LedgerSrv1 = [{8, {o, "Bucket", "Key1", null}}, - {2, {o, "Bucket", "Key2", null}}, - {3, {o, "Bucket", "Key3", null}}], + LedgerSrv1 = [ + {8, {o, "Bucket", "Key1", null}}, + {2, {o, "Bucket", "Key2", null}}, + {3, {o, "Bucket", "Key3", null}} + ], LedgerFun1 = fun(_Srv, _Key, _ObjSQN) -> replaced end, Score1 = check_single_file(CDB2, LedgerFun1, LedgerSrv1, 9, 8, 4, RS), ?assert((+0.0 =:= Score1) orelse (-0.0 =:= Score1)), @@ -1286,16 +1425,19 @@ compact_empty_file_test() -> compare_candidate_test() -> DummyC = #candidate{ - low_sqn = 1, filename="dummy", journal=self(), compaction_perc = 0 + low_sqn = 1, + filename = "dummy", + journal = self(), + compaction_perc = 0 }, - Candidate1 = DummyC#candidate{low_sqn=1}, - Candidate2 = DummyC#candidate{low_sqn=2}, - Candidate3 = DummyC#candidate{low_sqn=3}, - Candidate4 = DummyC#candidate{low_sqn=4}, + Candidate1 = DummyC#candidate{low_sqn = 1}, + Candidate2 = DummyC#candidate{low_sqn = 2}, + Candidate3 = DummyC#candidate{low_sqn = 3}, + Candidate4 = DummyC#candidate{low_sqn = 4}, ?assertMatch( [Candidate1, Candidate2, Candidate3, Candidate4], sort_run([Candidate3, Candidate2, Candidate4, Candidate1]) - ). + ). compact_singlefile_totwosmallfiles_test_() -> {timeout, 60, fun compact_singlefile_totwosmallfiles_testto/0}. @@ -1305,41 +1447,46 @@ compact_singlefile_totwosmallfiles_testto() -> CP = "test/test_area/journal/journal_file/post_compact/", ok = filelib:ensure_dir(CP), FN1 = leveled_inker:filepath(RP, 1, new_journal), - CDBoptsLarge = #cdb_options{binary_mode=true, max_size=30000000}, + CDBoptsLarge = #cdb_options{binary_mode = true, max_size = 30000000}, {ok, CDB1} = leveled_cdb:cdb_open_writer(FN1, CDBoptsLarge), lists:foreach( fun(X) -> LK = test_ledgerkey("Key" ++ integer_to_list(X)), Value = crypto:strong_rand_bytes(1024), - {IK, IV} = - leveled_codec:to_inkerkv(LK, X, Value, - {[], infinity}, - native, true), + {IK, IV} = + leveled_codec:to_inkerkv( + LK, + X, + Value, + {[], infinity}, + native, + true + ), ok = leveled_cdb:cdb_put(CDB1, IK, IV) end, lists:seq(1, 1000) ), {ok, NewName} = leveled_cdb:cdb_complete(CDB1), {ok, CDBr} = leveled_cdb:cdb_open_reader(NewName), - CDBoptsSmall = - #cdb_options{binary_mode=true, max_size=400000, file_path=CP}, + CDBoptsSmall = + #cdb_options{binary_mode = true, max_size = 400000, file_path = CP}, BestRun1 = [ #candidate{ - low_sqn=1, - filename=leveled_cdb:cdb_filename(CDBr), - journal=CDBr, - compaction_perc=50.0 + low_sqn = 1, + filename = leveled_cdb:cdb_filename(CDBr), + journal = CDBr, + compaction_perc = 50.0 } ], FakeFilterFun = - fun(_FS, _LK, SQN) -> + fun(_FS, _LK, SQN) -> case SQN rem 2 of 0 -> current; _ -> replaced end end, - + ManifestSlice = compact_files( BestRun1, @@ -1362,23 +1509,28 @@ compact_singlefile_totwosmallfiles_testto() -> ok = leveled_cdb:cdb_destroy(CDBr). size_score_test() -> - KeySizeList = - [{{1, ?INKT_STND, {?STD_TAG, <<"B">>, <<"Key1">>, null}}, 104}, + KeySizeList = + [ + {{1, ?INKT_STND, {?STD_TAG, <<"B">>, <<"Key1">>, null}}, 104}, {{2, ?INKT_STND, {?STD_TAG, <<"B">>, <<"Key2">>, null}}, 124}, {{3, ?INKT_STND, {?STD_TAG, <<"B">>, <<"Key3">>, null}}, 144}, {{4, ?INKT_STND, {?STD_TAG, <<"B">>, <<"Key4">>, null}}, 154}, - {{5, - ?INKT_STND, - {?STD_TAG, <<"B">>, <<"Key5">>, <<"Subk1">>}, null}, - 164}, + { + {5, ?INKT_STND, {?STD_TAG, <<"B">>, <<"Key5">>, <<"Subk1">>}, + null}, + 164 + }, {{6, ?INKT_STND, {?STD_TAG, <<"B">>, <<"Key6">>, null}}, 174}, - {{7, ?INKT_STND, {?STD_TAG, <<"B">>, <<"Key7">>, null}}, 184}], + {{7, ?INKT_STND, {?STD_TAG, <<"B">>, <<"Key7">>, null}}, 184} + ], MaxSQN = 6, CurrentList = - [{?STD_TAG, <<"B">>, <<"Key1">>, null}, - {?STD_TAG, <<"B">>, <<"Key4">>, null}, - {?STD_TAG, <<"B">>, <<"Key5">>, <<"Subk1">>}, - {?STD_TAG, <<"B">>, <<"Key6">>, null}], + [ + {?STD_TAG, <<"B">>, <<"Key1">>, null}, + {?STD_TAG, <<"B">>, <<"Key4">>, null}, + {?STD_TAG, <<"B">>, <<"Key5">>, <<"Subk1">>}, + {?STD_TAG, <<"B">>, <<"Key6">>, null} + ], FilterFun = fun(L, K, _SQN) -> case lists:member(K, L) of @@ -1401,6 +1553,6 @@ size_score_test() -> coverage_cheat_test() -> {noreply, _State0} = handle_info(timeout, #state{}), {ok, _State1} = code_change(null, #state{}, null), - terminate(error, #state{}). + terminate(error, #state{}). -endif. diff --git a/src/leveled_imanifest.erl b/src/leveled_imanifest.erl index 919c240c..278ade38 100644 --- a/src/leveled_imanifest.erl +++ b/src/leveled_imanifest.erl @@ -1,24 +1,24 @@ %% -------- Inker Manifest --------- -%% +%% -module(leveled_imanifest). -export([ - generate_entry/1, - add_entry/3, - append_lastkey/3, - remove_entry/2, - find_entry/2, - find_persistedentries/2, - head_entry/1, - to_list/1, - from_list/1, - reader/2, - writer/3, - printer/1, - complete_filex/0, - get_cdbpids/1 - ]). + generate_entry/1, + add_entry/3, + append_lastkey/3, + remove_entry/2, + find_entry/2, + find_persistedentries/2, + head_entry/1, + to_list/1, + from_list/1, + reader/2, + writer/3, + printer/1, + complete_filex/0, + get_cdbpids/1 +]). -define(MANIFEST_FILEX, "man"). -define(PENDING_FILEX, "pnd"). @@ -68,23 +68,24 @@ add_entry(Manifest, Entry, ToEnd) -> true -> prepend_entry({SQN, StrippedName, PidR, LastKey}, Manifest); false -> - Man0 = [{SQN, StrippedName, PidR, LastKey}|to_list(Manifest)], + Man0 = [{SQN, StrippedName, PidR, LastKey} | to_list(Manifest)], Man1 = lists:reverse(lists:sort(Man0)), from_list(Man1) end. -spec append_lastkey( - manifest(), pid(), leveled_codec:journal_key()) -> manifest(). + manifest(), pid(), leveled_codec:journal_key() +) -> manifest(). %% @doc %% On discovery of the last key in the last journal entry, the manifest can %% be updated through this function to have the last key append_lastkey(Manifest, Pid, LastKey) -> - [{SQNMarker, SQNL}|ManifestTail] = Manifest, - [{E_SQN, E_FN, E_P, E_LK}|SQNL_Tail] = SQNL, - case {E_P, E_LK} of + [{SQNMarker, SQNL} | ManifestTail] = Manifest, + [{E_SQN, E_FN, E_P, E_LK} | SQNL_Tail] = SQNL, + case {E_P, E_LK} of {Pid, empty} -> UpdEntry = {E_SQN, E_FN, E_P, LastKey}, - [{SQNMarker, [UpdEntry|SQNL_Tail]}|ManifestTail]; + [{SQNMarker, [UpdEntry | SQNL_Tail]} | ManifestTail]; _ -> Manifest end. @@ -102,9 +103,9 @@ remove_entry(Manifest, Entry) -> %% @doc %% Given a SQN find the relevant manifest_entry, returning just the pid() of %% the journal file (which may be a string() in unit tests) -find_entry(SQN, [{SQNMarker, SubL}|_Tail]) when SQN >= SQNMarker -> +find_entry(SQN, [{SQNMarker, SubL} | _Tail]) when SQN >= SQNMarker -> find_subentry(SQN, SubL); -find_entry(SQN, [_TopEntry|Tail]) -> +find_entry(SQN, [_TopEntry | Tail]) -> find_entry(SQN, Tail). -spec find_persistedentries(integer(), list()) -> list(manifest_entry()). @@ -112,13 +113,13 @@ find_entry(SQN, [_TopEntry|Tail]) -> %% Find the entries in the manifest where all items are < than the persisted %% SQN in the ledger find_persistedentries(SQN, ManifestAsList) -> - DropFun = + DropFun = fun({ME_SQN, _FN, _ME_P, _LK}) -> ME_SQN > SQN end, Entries = lists:dropwhile(DropFun, ManifestAsList), - case Entries of - [_Head|Tail] -> + case Entries of + [_Head | Tail] -> Tail; [] -> [] @@ -128,10 +129,10 @@ find_persistedentries(SQN, ManifestAsList) -> %% @doc %% Return the head manifest entry (the most recent journal) head_entry(Manifest) -> - [{_SQNMarker, SQNL}|_Tail] = Manifest, - [HeadEntry|_SQNL_Tail] = SQNL, + [{_SQNMarker, SQNL} | _Tail] = Manifest, + [HeadEntry | _SQNL_Tail] = SQNL, HeadEntry. - + -spec to_list(manifest()) -> list(). %% @doc %% Convert the manifest to a flat list @@ -152,23 +153,31 @@ to_list(Manifest) -> reader(SQN, RootPath) -> ManifestPath = leveled_inker:filepath(RootPath, manifest_dir), leveled_log:log(i0015, [ManifestPath, SQN]), - {ok, MBin} = file:read_file(filename:join(ManifestPath, - integer_to_list(SQN) - ++ ".man")), + {ok, MBin} = file:read_file( + filename:join( + ManifestPath, + integer_to_list(SQN) ++ + ".man" + ) + ), from_list(lists:reverse(lists:sort(binary_to_term(MBin)))). - + -spec writer(manifest(), integer(), string()) -> ok. %% @doc %% Given a manifest and a manifest SQN and a file path, save the manifest to %% disk writer(Manifest, ManSQN, RootPath) -> ManPath = leveled_inker:filepath(RootPath, manifest_dir), - ok = filelib:ensure_dir(ManPath), - % When writing during backups, may not have been generated - NewFN = filename:join(ManPath, - integer_to_list(ManSQN) ++ "." ++ ?MANIFEST_FILEX), - TmpFN = filename:join(ManPath, - integer_to_list(ManSQN) ++ "." ++ ?PENDING_FILEX), + ok = filelib:ensure_dir(ManPath), + % When writing during backups, may not have been generated + NewFN = filename:join( + ManPath, + integer_to_list(ManSQN) ++ "." ++ ?MANIFEST_FILEX + ), + TmpFN = filename:join( + ManPath, + integer_to_list(ManSQN) ++ "." ++ ?PENDING_FILEX + ), %% TODO: This should support a CRC check, but issues with making the CRC %% check backwards compatible (so that the reader can read manifests both %% with and without a CRC check) @@ -176,9 +185,11 @@ writer(Manifest, ManSQN, RootPath) -> leveled_log:log(i0016, [ManSQN]), ok = leveled_util:safe_rename(TmpFN, NewFN, MBin, true), GC_SQN = ManSQN - ?MANIFESTS_TO_RETAIN, - GC_Man = filename:join(ManPath, - integer_to_list(GC_SQN) ++ "." ++ ?MANIFEST_FILEX), - ok = + GC_Man = filename:join( + ManPath, + integer_to_list(GC_SQN) ++ "." ++ ?MANIFEST_FILEX + ), + ok = case filelib:is_file(GC_Man) of true -> file:delete(GC_Man); @@ -194,7 +205,8 @@ printer(Manifest) -> fun({SQN, FN, _PID, _LK}) -> leveled_log:log(i0017, [SQN, FN]) end, - to_list(Manifest)). + to_list(Manifest) + ). -spec complete_filex() -> string(). %% @doc @@ -202,10 +214,9 @@ printer(Manifest) -> complete_filex() -> ?MANIFEST_FILEX. - -spec from_list(list()) -> manifest(). %% @doc -%% Convert from a flat list into a manifest with lookup jumps. +%% Convert from a flat list into a manifest with lookup jumps. %% The opposite of to_list/1 from_list(Manifest) -> % Manifest should already be sorted with the highest SQN at the head @@ -228,21 +239,20 @@ prepend_entry(Entry, AccL) -> case AccL of [] -> [{SQN, [Entry]}]; - [{SQNMarker, SubL}|Tail] -> + [{SQNMarker, SubL} | Tail] -> case length(SubL) < ?SKIP_WIDTH of true -> - [{SQNMarker, [Entry|SubL]}|Tail]; + [{SQNMarker, [Entry | SubL]} | Tail]; false -> - [{SQN, [Entry]}|AccL] + [{SQN, [Entry]} | AccL] end end. -find_subentry(SQN, [{ME_SQN, _FN, ME_P, _LK}|_Tail]) when SQN >= ME_SQN -> +find_subentry(SQN, [{ME_SQN, _FN, ME_P, _LK} | _Tail]) when SQN >= ME_SQN -> ME_P; -find_subentry(SQN, [_TopEntry|Tail]) -> +find_subentry(SQN, [_TopEntry | Tail]) -> find_subentry(SQN, Tail). - - + %%%============================================================================ %%% Test %%%============================================================================ @@ -315,30 +325,32 @@ buildrandomfashion_test() -> RandMapFun = fun(X) -> {rand:uniform(), X} - end, + end, ManL1 = lists:map(RandMapFun, ManL0), ManL2 = lists:sort(ManL1), - + FoldFun = fun({_R, E}, Man) -> add_entry(Man, E, false) end, Man0 = lists:foldl(FoldFun, [], ManL2), - + test_testmanifest(Man0), ?assertMatch(ManL0, to_list(Man0)), - + RandomEntry = lists:nth(rand:uniform(50), ManL0), Man1 = remove_entry(Man0, RandomEntry), Man2 = add_entry(Man1, RandomEntry, false), - + test_testmanifest(Man2), ?assertMatch(ManL0, to_list(Man2)). empty_active_journal_test() -> Path = "test/test_area/journal/journal_files/", ok = filelib:ensure_dir(Path), - {ok, ActJ} = leveled_cdb:cdb_open_writer(Path ++ "test_emptyactive_file.pnd"), + {ok, ActJ} = leveled_cdb:cdb_open_writer( + Path ++ "test_emptyactive_file.pnd" + ), ?assertMatch([], generate_entry(ActJ)), ?assertMatch(ok, file:delete(Path ++ "test_emptyactive_file.cdb")). diff --git a/src/leveled_inker.erl b/src/leveled_inker.erl index 8d129c02..156c19b5 100644 --- a/src/leveled_inker.erl +++ b/src/leveled_inker.erl @@ -1,6 +1,6 @@ %% -------- Inker --------- -%% -%% The Inker is responsible for managing access and updates to the Journal. +%% +%% The Inker is responsible for managing access and updates to the Journal. %% %% The Inker maintains a manifest of what files make up the Journal, and which %% file is the current append-only nursery log to accept new PUTs into the @@ -24,9 +24,9 @@ %% %% The current Journal is made up of a set of files referenced in the manifest. %% No PUTs are made to files which are not in the manifest. -%% +%% %% The Journal is ordered by sequence number from front to back both within -%% and across files. +%% and across files. %% %% On startup the Inker should open the manifest with the highest sequence %% number, and this will contain the list of filenames that make up the @@ -58,7 +58,7 @@ %% - An object (an Erlang term) which should be null for tomb types, and %% maybe null for keyd types %% - A set of Key Deltas associated with the change (which may be an -%% empty list ) +%% empty list ) %% %% Note that only the Inker key type of stnd is directly fetchable, other %% key types are to be found only in scans and so can be added without being @@ -86,41 +86,42 @@ -include("leveled.hrl"). --export([init/1, - handle_call/3, - handle_cast/2, - handle_info/2, - terminate/2, - code_change/3, - ink_start/1, - ink_snapstart/1, - ink_put/5, - ink_mput/3, - ink_get/3, - ink_fetch/3, - ink_keycheck/3, - ink_fold/4, - ink_loadpcl/5, - ink_registersnapshot/2, - ink_confirmdelete/3, - ink_compactjournal/3, - ink_clerkcomplete/3, - ink_compactionpending/1, - ink_trim/2, - ink_getmanifest/1, - ink_printmanifest/1, - ink_close/1, - ink_doom/1, - ink_roll/1, - ink_backup/2, - ink_checksqn/2, - ink_loglevel/2, - ink_addlogs/2, - ink_removelogs/2, - ink_getjournalsqn/1, - ink_getcdbpids/1, - ink_getclerkpid/1 - ]). +-export([ + init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3, + ink_start/1, + ink_snapstart/1, + ink_put/5, + ink_mput/3, + ink_get/3, + ink_fetch/3, + ink_keycheck/3, + ink_fold/4, + ink_loadpcl/5, + ink_registersnapshot/2, + ink_confirmdelete/3, + ink_compactjournal/3, + ink_clerkcomplete/3, + ink_compactionpending/1, + ink_trim/2, + ink_getmanifest/1, + ink_printmanifest/1, + ink_close/1, + ink_doom/1, + ink_roll/1, + ink_backup/2, + ink_checksqn/2, + ink_loglevel/2, + ink_addlogs/2, + ink_removelogs/2, + ink_getjournalsqn/1, + ink_getcdbpids/1, + ink_getclerkpid/1 +]). -export([filepath/2, filepath/3]). @@ -130,7 +131,6 @@ ). -endif. - -define(MANIFEST_FP, "journal_manifest"). -define(FILES_FP, "journal_files"). -define(COMPACT_FP, "post_compact"). @@ -140,39 +140,42 @@ -define(TEST_KC, {[], infinity}). -define(SHUTDOWN_LOOPS, 10). -define(SHUTDOWN_PAUSE, 10000). - % How long to wait for snapshots to be released on shutdown - % before forcing closure of snapshots - % 10s may not be long enough for all snapshots, but avoids crashes of - % short-lived queries racing with the shutdown - --record(state, {manifest = [] :: list(), - manifest_sqn = 0 :: integer(), - journal_sqn = 0 :: non_neg_integer(), - active_journaldb :: pid() | undefined, - pending_removals = [] :: list(), - registered_snapshots = [] :: list(registered_snapshot()), - root_path :: string() | undefined, - cdb_options :: #cdb_options{} | undefined, - clerk :: pid() | undefined, - compaction_pending = false :: boolean(), - bookie_monref :: reference() | undefined, - is_snapshot = false :: boolean(), - compression_method = native :: lz4|native|none, - compress_on_receipt = false :: boolean(), - snap_timeout = 0 :: non_neg_integer(), - % in seconds, 0 for snapshots - % (only relevant for primary Inker) - source_inker :: pid() | undefined, - shutdown_loops = ?SHUTDOWN_LOOPS :: non_neg_integer()}). - +% How long to wait for snapshots to be released on shutdown +% before forcing closure of snapshots +% 10s may not be long enough for all snapshots, but avoids crashes of +% short-lived queries racing with the shutdown + +-record(state, { + manifest = [] :: list(), + manifest_sqn = 0 :: integer(), + journal_sqn = 0 :: non_neg_integer(), + active_journaldb :: pid() | undefined, + pending_removals = [] :: list(), + registered_snapshots = [] :: list(registered_snapshot()), + root_path :: string() | undefined, + cdb_options :: #cdb_options{} | undefined, + clerk :: pid() | undefined, + compaction_pending = false :: boolean(), + bookie_monref :: reference() | undefined, + is_snapshot = false :: boolean(), + compression_method = native :: lz4 | native | none, + compress_on_receipt = false :: boolean(), + snap_timeout = 0 :: non_neg_integer(), + % in seconds, 0 for snapshots + % (only relevant for primary Inker) + source_inker :: pid() | undefined, + shutdown_loops = ?SHUTDOWN_LOOPS :: non_neg_integer() +}). -type inker_options() :: #inker_options{}. -type ink_state() :: #state{}. -type registered_snapshot() :: {pid(), erlang:timestamp(), integer()}. --type filterserver() :: pid()|list(tuple()). +-type filterserver() :: pid() | list(tuple()). +%% erlfmt:ignore - issues with editors when function definitions are split -type filterfun() :: - fun((filterserver(), leveled_codec:ledger_key(), leveled_codec:sqn()) -> - current|replaced|missing). + fun((filterserver(), leveled_codec:ledger_key(), leveled_codec:sqn()) + -> current | replaced | missing + ). -type filterclosefun() :: fun((filterserver()) -> ok). -type filterinitfun() :: fun((pid()) -> {filterserver(), leveled_codec:sqn()}). @@ -183,7 +186,7 @@ %%%============================================================================ -spec ink_start(inker_options()) -> {ok, pid()}. -%% @doc +%% @doc %% Startup an inker process - passing in options. %% %% The first options are root_path and start_snapshot - if the inker is to be a @@ -200,7 +203,8 @@ ink_start(InkerOpts) -> {ok, Inker} = gen_server:start_link( - ?MODULE, [leveled_log:get_opts(), InkerOpts], []), + ?MODULE, [leveled_log:get_opts(), InkerOpts], [] + ), {ok, Inker}. -spec ink_snapstart(inker_options()) -> {ok, pid()}. @@ -209,51 +213,59 @@ ink_start(InkerOpts) -> ink_snapstart(InkerOpts) -> {ok, Inker} = gen_server:start( - ?MODULE, [leveled_log:get_opts(), InkerOpts], []), + ?MODULE, [leveled_log:get_opts(), InkerOpts], [] + ), {ok, Inker}. --spec ink_put(pid(), - leveled_codec:ledger_key(), - any(), - leveled_codec:journal_keychanges(), - boolean()) -> - {ok, non_neg_integer(), pos_integer()}. +-spec ink_put( + pid(), + leveled_codec:ledger_key(), + any(), + leveled_codec:journal_keychanges(), + boolean() +) -> + {ok, non_neg_integer(), pos_integer()}. %% @doc %% PUT an object into the journal, returning the sequence number for the PUT %% as well as the size of the object (information required by the ledger). %% %% KeyChanges is a tuple of {KeyChanges, TTL} where the TTL is an %% expiry time (or infinity). A sync option can be passed, to override a -%% sync_strategy of none for this particular PUT. +%% sync_strategy of none for this particular PUT. ink_put(Pid, PrimaryKey, Object, KeyChanges, DataSync) -> - gen_server:call(Pid, - {put, PrimaryKey, Object, KeyChanges, DataSync}, - infinity). - + gen_server:call( + Pid, + {put, PrimaryKey, Object, KeyChanges, DataSync}, + infinity + ). --spec ink_mput(pid(), any(), {list(), integer()|infinity}) -> {ok, integer()}. +-spec ink_mput(pid(), any(), {list(), integer() | infinity}) -> {ok, integer()}. %% @doc -%% MPUT as series of object specifications, which will be converted into -%% objects in the Ledger. This should only be used when the Bookie is +%% MPUT as series of object specifications, which will be converted into +%% objects in the Ledger. This should only be used when the Bookie is %% running in head_only mode. The journal entries arekept only for handling %% consistency on startup ink_mput(Pid, PrimaryKey, ObjectChanges) -> gen_server:call(Pid, {mput, PrimaryKey, ObjectChanges}, infinity). --spec ink_get(pid(), - leveled_codec:ledger_key(), - integer()) -> - {{integer(), any()}, {any(), any()}}. +-spec ink_get( + pid(), + leveled_codec:ledger_key(), + integer() +) -> + {{integer(), any()}, {any(), any()}}. %% @doc %% Fetch the object as stored in the Journal. Will not mask errors, should be %% used only in tests ink_get(Pid, PrimaryKey, SQN) -> gen_server:call(Pid, {get, PrimaryKey, SQN}, infinity). --spec ink_fetch(pid(), - {atom(), any(), any(), any()}|string(), - integer()) -> - any(). +-spec ink_fetch( + pid(), + {atom(), any(), any(), any()} | string(), + integer() +) -> + any(). %% @doc %% Fetch the value that was stored for a given Key at a particular SQN (i.e. %% this must be a SQN of the write for this key). the full object is returned @@ -262,10 +274,12 @@ ink_get(Pid, PrimaryKey, SQN) -> ink_fetch(Pid, PrimaryKey, SQN) -> gen_server:call(Pid, {fetch, PrimaryKey, SQN}, infinity). --spec ink_keycheck(pid(), - leveled_codec:ledger_key(), - integer()) -> - probably|missing. +-spec ink_keycheck( + pid(), + leveled_codec:ledger_key(), + integer() +) -> + probably | missing. %% @doc %% Quick check to determine if key is probably present. Positive results have %% a very small false positive rate, as can be triggered through a hash @@ -320,16 +334,20 @@ ink_snapclose(Pid) -> ink_doom(Pid) -> gen_server:call(Pid, doom, infinity). --spec ink_fold(pid(), - integer(), - {leveled_cdb:filter_fun(), - fun((string(), leveled_codec:sqn()) -> term()), - fun((term(), term()) -> term())}, - term()) -> fun(() -> term()). +-spec ink_fold( + pid(), + integer(), + { + leveled_cdb:filter_fun(), + fun((string(), leveled_codec:sqn()) -> term()), + fun((term(), term()) -> term()) + }, + term() +) -> fun(() -> term()). %% @doc -%% Fold over the journal from a starting sequence number (MinSQN), passing +%% Fold over the journal from a starting sequence number (MinSQN), passing %% in three functions and a snapshot of the penciller. The Fold functions -%% should be +%% should be %% - a FilterFun to accumulate the objects and decided when to stop or loop %% - a InitAccFun to re-initialise for the fold over the accumulator %% - a FoldFun to actually perform the fold @@ -339,7 +357,7 @@ ink_doom(Pid) -> %% actual desired outcome by being applied on the batch. %% %% The FilterFun should be a five arity function which takes as inputs: -%% KeyInJournal +%% KeyInJournal %% ValueInJournal %% Position - the actual position within the CDB file of the object %% Acc - the bathc accumulator @@ -352,8 +370,8 @@ ink_doom(Pid) -> %% The FilterFun is required to call stop when MaxSQN is reached %% %% The InitAccFun should return an initial batch accumulator for each subfold. -%% It is a 2-arity function that takes a filename and a MinSQN as an input -%% potentially to be used in logging +%% It is a 2-arity function that takes a filename and a MinSQN as an input +%% potentially to be used in logging %% %% The BatchFun is a two arity function that should take as inputs: %% An overall accumulator @@ -363,18 +381,20 @@ ink_doom(Pid) -> %% type of the output of the function when called will depend on the type of %% the accumulator ink_fold(Pid, MinSQN, FoldFuns, Acc) -> - gen_server:call(Pid, - {fold, MinSQN, FoldFuns, Acc, by_runner}, - infinity). + gen_server:call( + Pid, + {fold, MinSQN, FoldFuns, Acc, by_runner}, + infinity + ). -spec ink_loadpcl( pid(), integer(), leveled_bookie:initial_loadfun(), fun((string(), non_neg_integer()) -> any()), - fun((any(), leveled_bookie:ledger_cache()) - -> leveled_bookie:ledger_cache())) - -> leveled_bookie:ledger_cache(). + fun((any(), leveled_bookie:ledger_cache()) -> leveled_bookie:ledger_cache()) +) -> + leveled_bookie:ledger_cache(). %% %% Function to prompt load of the Ledger at startup. The Penciller should %% have determined the lowest SQN not present in the Ledger, and the inker @@ -384,15 +404,14 @@ ink_fold(Pid, MinSQN, FoldFuns, Acc) -> %% The load fun should be a five arity function like: %% load_fun(KeyInJournal, ValueInJournal, _Position, Acc0, ExtractFun) ink_loadpcl(Pid, MinSQN, LoadFun, InitAccFun, BatchFun) -> - gen_server:call(Pid, - {fold, - MinSQN, - {LoadFun, InitAccFun, BatchFun}, - leveled_bookie:empty_ledgercache(), - as_ink}, - infinity). - --spec ink_compactjournal(pid(), pid(), integer()) -> {ok|busy, pid()}. + gen_server:call( + Pid, + {fold, MinSQN, {LoadFun, InitAccFun, BatchFun}, + leveled_bookie:empty_ledgercache(), as_ink}, + infinity + ). + +-spec ink_compactjournal(pid(), pid(), integer()) -> {ok | busy, pid()}. %% @doc %% Trigger a compaction event. the compaction event will use a sqn check %% against the Ledger to see if a value can be compacted - if the penciller @@ -408,13 +427,12 @@ ink_compactjournal(Pid, Bookie, _Timeout) -> CheckerCloseFun = fun leveled_penciller:pcl_close/1, CheckerFilterFun = wrap_checkfilterfun(fun leveled_penciller:pcl_checksequencenumber/3), - gen_server:call(Pid, - {compact, - Bookie, - CheckerInitiateFun, - CheckerCloseFun, - CheckerFilterFun}, - infinity). + gen_server:call( + Pid, + {compact, Bookie, CheckerInitiateFun, CheckerCloseFun, + CheckerFilterFun}, + infinity + ). -spec ink_clerkcomplete(pid(), list(), list()) -> ok. %% @doc @@ -432,7 +450,7 @@ ink_compactionpending(Pid) -> -spec ink_trim(pid(), integer()) -> ok. %% @doc -%% Trim the Journal to just those files that contain entries since the +%% Trim the Journal to just those files that contain entries since the %% Penciller's persisted SQN ink_trim(Pid, PersistedSQN) -> gen_server:call(Pid, {trim, PersistedSQN}, infinity). @@ -457,7 +475,7 @@ ink_getmanifest(Pid) -> gen_server:call(Pid, get_manifest, infinity). -spec ink_printmanifest(pid()) -> ok. -%% @doc +%% @doc %% Used in tests to print out the manifest ink_printmanifest(Pid) -> gen_server:call(Pid, print_manifest, infinity). @@ -505,43 +523,55 @@ ink_getcdbpids(Pid) -> ink_getclerkpid(Pid) -> gen_server:call(Pid, get_clerkpid). - %%%============================================================================ %%% gen_server callbacks %%%============================================================================ init([LogOpts, InkerOpts]) -> leveled_log:save(LogOpts), - case {InkerOpts#inker_options.root_path, + case + { + InkerOpts#inker_options.root_path, InkerOpts#inker_options.start_snapshot, - InkerOpts#inker_options.source_inker} of + InkerOpts#inker_options.source_inker + } + of {undefined, true, SrcInker} when ?IS_DEF(SrcInker) -> %% monitor the bookie, and close the snapshot when bookie %% exits - BookieMonitor = erlang:monitor(process, InkerOpts#inker_options.bookies_pid), - {Manifest, - ActiveJournalDB, - JournalSQN} = ink_registersnapshot(SrcInker, self()), - {ok, #state{manifest = Manifest, - active_journaldb = ActiveJournalDB, - source_inker = SrcInker, - journal_sqn = JournalSQN, - bookie_monref = BookieMonitor, - is_snapshot = true}}; - %% Need to do something about timeout + BookieMonitor = erlang:monitor( + process, InkerOpts#inker_options.bookies_pid + ), + {Manifest, ActiveJournalDB, JournalSQN} = ink_registersnapshot( + SrcInker, self() + ), + {ok, #state{ + manifest = Manifest, + active_journaldb = ActiveJournalDB, + source_inker = SrcInker, + journal_sqn = JournalSQN, + bookie_monref = BookieMonitor, + is_snapshot = true + }}; + %% Need to do something about timeout {_RootPath, false, _SrcInker} -> start_from_file(InkerOpts) end. - -handle_call({put, Key, Object, KeyChanges, DataSync}, _From, - State=#state{is_snapshot=Snap}) when Snap == false -> +handle_call( + {put, Key, Object, KeyChanges, DataSync}, + _From, + State = #state{is_snapshot = Snap} +) when Snap == false -> case put_object(Key, Object, KeyChanges, DataSync, State) of {_, UpdState, ObjSize} -> {reply, {ok, UpdState#state.journal_sqn, ObjSize}, UpdState} end; -handle_call({mput, Key, ObjChanges}, _From, - State=#state{is_snapshot=Snap}) when Snap == false -> +handle_call( + {mput, Key, ObjChanges}, + _From, + State = #state{is_snapshot = Snap} +) when Snap == false -> case put_object(Key, head_only, ObjChanges, false, State) of {_, UpdState, _ObjSize} -> {reply, {ok, UpdState#state.journal_sqn}, UpdState} @@ -558,17 +588,19 @@ handle_call({get, Key, SQN}, _From, State) -> {reply, get_object(Key, SQN, State#state.manifest), State}; handle_call({key_check, Key, SQN}, _From, State) -> {reply, key_check(Key, SQN, State#state.manifest), State}; -handle_call({fold, - StartSQN, {FilterFun, InitAccFun, FoldFun}, Acc, By}, - _From, State) -> +handle_call( + {fold, StartSQN, {FilterFun, InitAccFun, FoldFun}, Acc, By}, + _From, + State +) -> Manifest = lists:reverse(leveled_imanifest:to_list(State#state.manifest)), - Folder = + Folder = fun() -> fold_from_sequence( StartSQN, State#state.journal_sqn, - {FilterFun, InitAccFun, FoldFun}, - Acc, + {FilterFun, InitAccFun, FoldFun}, + Acc, Manifest ) end, @@ -578,16 +610,23 @@ handle_call({fold, by_runner -> {reply, Folder, State} end; -handle_call({register_snapshot, Requestor}, - _From , State=#state{is_snapshot=Snap}) when Snap == false -> - Rs = [{Requestor, - os:timestamp(), - State#state.manifest_sqn}|State#state.registered_snapshots], +handle_call( + {register_snapshot, Requestor}, + _From, + State = #state{is_snapshot = Snap} +) when Snap == false -> + Rs = [ + {Requestor, os:timestamp(), State#state.manifest_sqn} + | State#state.registered_snapshots + ], leveled_log:log(i0002, [Requestor, State#state.manifest_sqn]), - {reply, {State#state.manifest, - State#state.active_journaldb, - State#state.journal_sqn}, - State#state{registered_snapshots=Rs}}; + {reply, + { + State#state.manifest, + State#state.active_journaldb, + State#state.journal_sqn + }, + State#state{registered_snapshots = Rs}}; handle_call(get_manifest, _From, State) -> {reply, leveled_imanifest:to_list(State#state.manifest), State}; handle_call(print_manifest, _From, State) -> @@ -596,44 +635,57 @@ handle_call(print_manifest, _From, State) -> handle_call( {compact, Checker, InitiateFun, CloseFun, FilterFun}, _From, - State=#state{is_snapshot=Snap}) - when Snap == false -> + State = #state{is_snapshot = Snap} +) when + Snap == false +-> Clerk = State#state.clerk, Manifest = leveled_imanifest:to_list(State#state.manifest), leveled_iclerk:clerk_compact( - Clerk, Checker, InitiateFun, CloseFun, FilterFun, Manifest), - {reply, {ok, Clerk}, State#state{compaction_pending=true}}; + Clerk, Checker, InitiateFun, CloseFun, FilterFun, Manifest + ), + {reply, {ok, Clerk}, State#state{compaction_pending = true}}; handle_call(compaction_pending, _From, State) -> {reply, State#state.compaction_pending, State}; handle_call( - {trim, PersistedSQN}, _From, State=#state{is_snapshot=Snap}) - when Snap == false -> + {trim, PersistedSQN}, _From, State = #state{is_snapshot = Snap} +) when + Snap == false +-> Manifest = leveled_imanifest:to_list(State#state.manifest), ok = leveled_iclerk:clerk_trim(State#state.clerk, PersistedSQN, Manifest), {reply, ok, State}; -handle_call(roll, _From, State=#state{is_snapshot=Snap}) when Snap == false -> +handle_call(roll, _From, State = #state{is_snapshot = Snap}) when + Snap == false +-> case leveled_cdb:cdb_lastkey(State#state.active_journaldb) of empty -> {reply, ok, State}; _ -> NewSQN = State#state.journal_sqn + 1, SWroll = os:timestamp(), - {NewJournalP, Manifest1, NewManSQN} = - roll_active(State#state.active_journaldb, - State#state.manifest, - NewSQN, - State#state.cdb_options, - State#state.root_path, - State#state.manifest_sqn), + {NewJournalP, Manifest1, NewManSQN} = + roll_active( + State#state.active_journaldb, + State#state.manifest, + NewSQN, + State#state.cdb_options, + State#state.root_path, + State#state.manifest_sqn + ), leveled_log:log_timer(i0024, [NewSQN], SWroll), - {reply, ok, State#state{journal_sqn = NewSQN, - manifest = Manifest1, - manifest_sqn = NewManSQN, - active_journaldb = NewJournalP}} + {reply, ok, State#state{ + journal_sqn = NewSQN, + manifest = Manifest1, + manifest_sqn = NewManSQN, + active_journaldb = NewJournalP + }} end; handle_call( - {backup, BackupPath}, _from, State) - when State#state.is_snapshot == true -> + {backup, BackupPath}, _from, State +) when + State#state.is_snapshot == true +-> SW = os:timestamp(), BackupJFP = filepath(filename:join(BackupPath, ?JOURNAL_FP), journal_dir), ok = filelib:ensure_dir(BackupJFP), @@ -647,32 +699,38 @@ handle_call( ExtendedBaseFN = BaseFN ++ "." ++ ?JOURNAL_FILEX, BackupName = filename:join(BackupJFP, BaseFN), true = leveled_cdb:finished_rolling(PidR), - case file:make_link(FN ++ "." ++ ?JOURNAL_FILEX, - BackupName ++ "." ++ ?JOURNAL_FILEX) of + case + file:make_link( + FN ++ "." ++ ?JOURNAL_FILEX, + BackupName ++ "." ++ ?JOURNAL_FILEX + ) + of ok -> ok; {error, eexist} -> ok end, - {[{SQN, BackupName, PidR, LastKey}|ManAcc], - [ExtendedBaseFN|FTRAcc]}; + {[{SQN, BackupName, PidR, LastKey} | ManAcc], [ + ExtendedBaseFN | FTRAcc + ]}; false -> leveled_log:log(i0021, [FN, SQN, State#state.journal_sqn]), {ManAcc, FTRAcc} end end, - {BackupManifest, FilesToRetain} = - lists:foldr(BackupFun, - {[], []}, - leveled_imanifest:to_list(State#state.manifest)), - + {BackupManifest, FilesToRetain} = + lists:foldr( + BackupFun, + {[], []}, + leveled_imanifest:to_list(State#state.manifest) + ), + FilesToRemove = lists:subtract(CurrentFNs, FilesToRetain), - RemoveFun = - fun(RFN) -> + RemoveFun = + fun(RFN) -> leveled_log:log(i0022, [RFN]), RemoveFile = filename:join(BackupJFP, RFN), - case filelib:is_file(RemoveFile) - andalso not filelib:is_dir(RemoveFile) of + case filelib:is_regular(RemoveFile) of true -> ok = file:delete(RemoveFile); false -> @@ -680,13 +738,16 @@ handle_call( end end, lists:foreach(RemoveFun, FilesToRemove), - leveled_imanifest:writer(leveled_imanifest:from_list(BackupManifest), - State#state.manifest_sqn, - filename:join(BackupPath, ?JOURNAL_FP)), + leveled_imanifest:writer( + leveled_imanifest:from_list(BackupManifest), + State#state.manifest_sqn, + filename:join(BackupPath, ?JOURNAL_FP) + ), leveled_log:log_timer( i0020, - [filename:join(BackupPath, ?JOURNAL_FP), length(BackupManifest)], - SW), + [filename:join(BackupPath, ?JOURNAL_FP), length(BackupManifest)], + SW + ), {reply, ok, State}; handle_call({check_sqn, LedgerSQN}, _From, State) -> case State#state.journal_sqn of @@ -700,15 +761,19 @@ handle_call(get_journalsqn, _From, State) -> {reply, {ok, State#state.journal_sqn}, State}; handle_call(get_cdbpids, _From, State) -> CDBPids = leveled_imanifest:get_cdbpids(State#state.manifest), - {reply, [State#state.active_journaldb|CDBPids], State}; + {reply, [State#state.active_journaldb | CDBPids], State}; handle_call(get_clerkpid, _From, State) -> {reply, State#state.clerk, State}; -handle_call(close, _From, State=#state{is_snapshot=Snap}) when Snap == true -> +handle_call(close, _From, State = #state{is_snapshot = Snap}) when + Snap == true +-> ok = ink_releasesnapshot(State#state.source_inker, self()), {stop, normal, ok, State}; handle_call( - ShutdownType, From, State = #state{clerk = Clerk}) - when ?IS_DEF(Clerk) -> + ShutdownType, From, State = #state{clerk = Clerk} +) when + ?IS_DEF(Clerk) +-> case ShutdownType of doom -> leveled_log:log(i0018, []); @@ -717,25 +782,28 @@ handle_call( end, leveled_log:log(i0005, [ShutdownType]), leveled_log:log( - i0006, [State#state.journal_sqn, State#state.manifest_sqn]), + i0006, [State#state.journal_sqn, State#state.manifest_sqn] + ), ok = leveled_iclerk:clerk_stop(State#state.clerk), gen_server:cast(self(), {maybe_defer_shutdown, ShutdownType, From}), {noreply, State}. handle_cast( {clerk_complete, ManifestSnippet, FilesToDelete}, - State = #state{cdb_options = CDBOpts}) - when ?IS_DEF(CDBOpts) -> + State = #state{cdb_options = CDBOpts} +) when + ?IS_DEF(CDBOpts) +-> DropFun = fun(E, Acc) -> leveled_imanifest:remove_entry(Acc, E) end, - Man0 = lists:foldl(DropFun, State#state.manifest, FilesToDelete), + Man0 = lists:foldl(DropFun, State#state.manifest, FilesToDelete), AddFun = fun(ManEntry, Acc) -> {LowSQN, FN, _, LK_RO} = ManEntry, - % At this stage the FN has a .cdb extension, which will be - % stripped during add_entry - so need to add the .cdb here + % At this stage the FN has a .cdb extension, which will be + % stripped during add_entry - so need to add the .cdb here {ok, Pid} = leveled_cdb:cdb_reopen_reader(FN, LK_RO, CDBOpts), UpdEntry = {LowSQN, FN, Pid, LK_RO}, leveled_imanifest:add_entry(Acc, UpdEntry, false) @@ -750,19 +818,21 @@ handle_cast( end, FilesToDelete ), - {noreply, State#state{manifest=Man1, - manifest_sqn=NewManifestSQN, - pending_removals=FilesToDelete, - compaction_pending=false}}; + {noreply, State#state{ + manifest = Man1, + manifest_sqn = NewManifestSQN, + pending_removals = FilesToDelete, + compaction_pending = false + }}; handle_cast({confirm_delete, ManSQN, CDB}, State) -> % Check there are no snapshots that may be aware of the file process that % is waiting to delete itself. - CheckSQNFun = + CheckSQNFun = fun({_R, _TS, SnapSQN}, Bool) -> % If the Snapshot SQN was at the same point the file was set to - % delete (or after), then the snapshot would not have been told - % of the file, and the snapshot should not hold up its deletion - (SnapSQN >= ManSQN) and Bool + % delete (or after), then the snapshot would not have been told of + % the file, and the snapshot should not hold up its deletion + SnapSQN >= ManSQN andalso Bool end, CheckSnapshotExpiryFun = fun({_R, TS, _SnapSQN}) -> @@ -785,10 +855,10 @@ handle_cast({release_snapshot, Snapshot}, State) -> leveled_log:log(i0003, [Snapshot]), case lists:keydelete(Snapshot, 1, State#state.registered_snapshots) of [] -> - {noreply, State#state{registered_snapshots=[]}}; + {noreply, State#state{registered_snapshots = []}}; Rs -> leveled_log:log(i0004, [length(Rs)]), - {noreply, State#state{registered_snapshots=Rs}} + {noreply, State#state{registered_snapshots = Rs}} end; handle_cast({log_level, LogLevel}, State) -> case State#state.clerk of @@ -796,7 +866,7 @@ handle_cast({log_level, LogLevel}, State) -> ok; INC -> leveled_iclerk:clerk_loglevel(INC, LogLevel) - end, + end, ok = leveled_log:set_loglevel(LogLevel), CDBopts0 = update_cdb_logoptions(State#state.cdb_options), {noreply, State#state{cdb_options = CDBopts0}}; @@ -816,7 +886,7 @@ handle_cast({remove_logs, ForcedLogs}, State) -> ok; INC -> leveled_iclerk:clerk_removelogs(INC, ForcedLogs) - end, + end, ok = leveled_log:remove_forcedlogs(ForcedLogs), CDBopts0 = update_cdb_logoptions(State#state.cdb_options), {noreply, State#state{cdb_options = CDBopts0}}; @@ -834,28 +904,34 @@ handle_cast({maybe_defer_shutdown, ShutdownType, From}, State) -> leveled_log:log(i0026, [N]), timer:sleep(?SHUTDOWN_PAUSE div ?SHUTDOWN_LOOPS), gen_server:cast( - self(), {maybe_defer_shutdown, ShutdownType, From}), + self(), {maybe_defer_shutdown, ShutdownType, From} + ), {noreply, State#state{shutdown_loops = LoopCount - 1}}; 0 -> gen_server:cast( - self(), {complete_shutdown, ShutdownType, From}), + self(), {complete_shutdown, ShutdownType, From} + ), {noreply, State} end - end; + end; handle_cast({complete_shutdown, ShutdownType, From}, State) -> lists:foreach( fun(SnapPid) -> ok = ink_snapclose(SnapPid) end, lists:map( - fun(Snapshot) -> element(1, Snapshot) end, - State#state.registered_snapshots)), + fun(Snapshot) -> element(1, Snapshot) end, + State#state.registered_snapshots + ) + ), shutdown_manifest(State#state.manifest), case ShutdownType of doom -> FPs = - [filepath(State#state.root_path, journal_dir), + [ + filepath(State#state.root_path, journal_dir), filepath(State#state.root_path, manifest_dir), filepath(State#state.root_path, journal_compact_dir), - filepath(State#state.root_path, journal_waste_dir)], + filepath(State#state.root_path, journal_waste_dir) + ], gen_server:reply(From, {ok, FPs}); close -> gen_server:reply(From, ok) @@ -865,15 +941,17 @@ handle_cast({complete_shutdown, ShutdownType, From}, State) -> %% handle the bookie stopping and stop this snapshot handle_info( {'DOWN', BookieMonRef, process, _BookiePid, _Info}, - State=#state{bookie_monref = BookieMonRef, source_inker = SrcInker}) - when ?IS_DEF(SrcInker) -> + State = #state{bookie_monref = BookieMonRef, source_inker = SrcInker} +) when + ?IS_DEF(SrcInker) +-> %% Monitor only registered on snapshots ok = ink_releasesnapshot(State#state.source_inker, self()), {stop, normal, State}; handle_info(_Info, State) -> {noreply, State}. -terminate(Reason, _State=#state{is_snapshot=Snap}) when Snap == true -> +terminate(Reason, _State = #state{is_snapshot = Snap}) when Snap == true -> leveled_log:log(i0027, [Reason]); terminate(Reason, _State) -> leveled_log:log(i0028, [Reason]). @@ -881,24 +959,24 @@ terminate(Reason, _State) -> code_change(_OldVsn, State, _Extra) -> {ok, State}. - %%%============================================================================ %%% Internal functions %%%============================================================================ - -spec start_from_file(inker_options()) -> {ok, ink_state()}. %% @doc -%% Start an Inker from the state on disk (i.e. not a snapshot). +%% Start an Inker from the state on disk (i.e. not a snapshot). start_from_file( InkOpts = - #inker_options{root_path = RootPath, snaptimeout_long = SnapTimeout}) - when ?IS_DEF(RootPath), ?IS_DEF(SnapTimeout) -> + #inker_options{root_path = RootPath, snaptimeout_long = SnapTimeout} +) when + ?IS_DEF(RootPath), ?IS_DEF(SnapTimeout) +-> % Setting the correct CDB options is important when starting the inker, in - % particular for waste retention which is determined by the CDB options + % particular for waste retention which is determined by the CDB options % with which the file was last opened CDBopts = get_cdbopts(InkOpts), - + % Determine filepaths RootPath = InkOpts#inker_options.root_path, JournalFP = filepath(RootPath, journal_dir), @@ -910,7 +988,7 @@ start_from_file( % The IClerk must start files with the compaction file path so that they % will be stored correctly in this folder IClerkCDBOpts = CDBopts#cdb_options{file_path = CompactFP}, - + WRP = InkOpts#inker_options.waste_retention_period, ReloadStrategy = InkOpts#inker_options.reload_strategy, MRL = InkOpts#inker_options.max_run_length, @@ -921,38 +999,38 @@ start_from_file( SnapTimeout = InkOpts#inker_options.snaptimeout_long, ScoreOneIn = InkOpts#inker_options.score_onein, - IClerkOpts = - #iclerk_options{inker = self(), - cdb_options=IClerkCDBOpts, - waste_retention_period = WRP, - reload_strategy = ReloadStrategy, - compression_method = PressMethod, - max_run_length = MRL, - singlefile_compactionperc = SFL_CompactPerc, - maxrunlength_compactionperc = MRL_CompactPerc, - score_onein = ScoreOneIn}, - + IClerkOpts = + #iclerk_options{ + inker = self(), + cdb_options = IClerkCDBOpts, + waste_retention_period = WRP, + reload_strategy = ReloadStrategy, + compression_method = PressMethod, + max_run_length = MRL, + singlefile_compactionperc = SFL_CompactPerc, + maxrunlength_compactionperc = MRL_CompactPerc, + score_onein = ScoreOneIn + }, + {ok, Clerk} = leveled_iclerk:clerk_new(IClerkOpts), - - % The building of the manifest will load all the CDB files, starting a + + % The building of the manifest will load all the CDB files, starting a % new leveled_cdb process for each file {ok, ManifestFilenames} = file:list_dir(ManifestFP), - {Manifest, - ManifestSQN, - JournalSQN, - ActiveJournal} = - build_manifest(ManifestFilenames, RootPath, CDBopts), - {ok, #state{manifest = Manifest, - manifest_sqn = ManifestSQN, - journal_sqn = JournalSQN, - active_journaldb = ActiveJournal, - root_path = RootPath, - cdb_options = CDBopts, - compression_method = PressMethod, - compress_on_receipt = PressOnReceipt, - snap_timeout = SnapTimeout, - clerk = Clerk}}. - + {Manifest, ManifestSQN, JournalSQN, ActiveJournal} = + build_manifest(ManifestFilenames, RootPath, CDBopts), + {ok, #state{ + manifest = Manifest, + manifest_sqn = ManifestSQN, + journal_sqn = JournalSQN, + active_journaldb = ActiveJournal, + root_path = RootPath, + cdb_options = CDBopts, + compression_method = PressMethod, + compress_on_receipt = PressOnReceipt, + snap_timeout = SnapTimeout, + clerk = Clerk + }}. -spec shutdown_manifest(leveled_imanifest:manifest()) -> ok. %% @doc @@ -966,35 +1044,37 @@ shutdown_manifest(Manifest) -> -spec get_cdbopts(inker_options()) -> #cdb_options{}. %% @doc %% Extract the options for the indibvidal Journal files from the Inker options -get_cdbopts(InkOpts)-> +get_cdbopts(InkOpts) -> CDBopts = InkOpts#inker_options.cdb_options, - WasteFP = - case InkOpts#inker_options.waste_retention_period of + WasteFP = + case InkOpts#inker_options.waste_retention_period of undefined -> % If the waste retention period is undefined, there will % be no retention of waste. This is triggered by making % the waste path undefined undefined; _WRP -> - WFP = filepath(InkOpts#inker_options.root_path, - journal_waste_dir), + WFP = filepath( + InkOpts#inker_options.root_path, + journal_waste_dir + ), filelib:ensure_dir(WFP), WFP end, CDBopts#cdb_options{waste_path = WasteFP}. - -spec put_object( - leveled_codec:primary_key(), - any(), + leveled_codec:primary_key(), + any(), leveled_codec:journal_keychanges(), boolean(), - ink_state()) - -> {ok|rolling, ink_state(), integer()}. + ink_state() +) -> + {ok | rolling, ink_state(), integer()}. %% @doc -%% Add the object to the current journal if it fits. If it doesn't fit, a new +%% Add the object to the current journal if it fits. If it doesn't fit, a new %% journal must be started, and the old journal is set to "roll" into a read -%% only Journal. +%% only Journal. %% The reply contains the byte_size of the object, using the size calculated %% to store the object. put_object( @@ -1007,10 +1087,12 @@ put_object( active_journaldb = ActiveJournal, cdb_options = CDBOpts, root_path = RP - }) - when ?IS_DEF(ActiveJournal), ?IS_DEF(CDBOpts), ?IS_DEF(RP) -> + } +) when + ?IS_DEF(ActiveJournal), ?IS_DEF(CDBOpts), ?IS_DEF(RP) +-> NewSQN = State#state.journal_sqn + 1, - {JournalKey, JournalBin} = + {JournalKey, JournalBin} = leveled_codec:to_inkerkv( LedgerKey, NewSQN, @@ -1022,13 +1104,13 @@ put_object( PutR = leveled_cdb:cdb_put(ActiveJournal, JournalKey, JournalBin, Sync), case PutR of ok -> - {ok, State#state{journal_sqn=NewSQN}, byte_size(JournalBin)}; + {ok, State#state{journal_sqn = NewSQN}, byte_size(JournalBin)}; roll -> SWroll = os:timestamp(), - {NewJournalP, Manifest1, NewManSQN} = + {NewJournalP, Manifest1, NewManSQN} = roll_active( - ActiveJournal, - State#state.manifest, + ActiveJournal, + State#state.manifest, NewSQN, State#state.cdb_options, State#state.root_path, @@ -1037,23 +1119,25 @@ put_object( leveled_log:log_timer(i0008, [], SWroll), ok = leveled_cdb:cdb_put( - NewJournalP, JournalKey, JournalBin), + NewJournalP, JournalKey, JournalBin + ), {rolling, State#state{ - journal_sqn=NewSQN, - manifest=Manifest1, + journal_sqn = NewSQN, + manifest = Manifest1, manifest_sqn = NewManSQN, - active_journaldb=NewJournalP}, + active_journaldb = NewJournalP + }, byte_size(JournalBin)} end. - -spec get_object( - leveled_codec:ledger_key(), - integer(), - leveled_imanifest:manifest()) -> any(). + leveled_codec:ledger_key(), + integer(), + leveled_imanifest:manifest() +) -> any(). %% @doc -%% Find the SQN in the manifest and then fetch the object from the Journal, +%% Find the SQN in the manifest and then fetch the object from the Journal, %% in the manifest. If the fetch is in response to a user GET request then %% the KeyChanges are irrelevant, so no need to process them. In this case %% the KeyChanges are processed (as ToIgnoreKeyChanges will be set to false). @@ -1066,41 +1150,44 @@ get_object(LedgerKey, SQN, Manifest, ToIgnoreKeyChanges) -> Obj = leveled_cdb:cdb_get(JournalP, InkerKey), leveled_codec:from_inkerkv(Obj, ToIgnoreKeyChanges). - -spec roll_active( pid(), - leveled_imanifest:manifest(), + leveled_imanifest:manifest(), integer(), #cdb_options{}, string(), - integer()) -> {pid(), leveled_imanifest:manifest(), integer()}. + integer() +) -> {pid(), leveled_imanifest:manifest(), integer()}. %% @doc -%% Roll the active journal, and start a new active journal, updating the +%% Roll the active journal, and start a new active journal, updating the %% manifest roll_active(ActiveJournal, Manifest, NewSQN, CDBopts, RootPath, ManifestSQN) -> case leveled_cdb:cdb_lastkey(ActiveJournal) of LastKey when LastKey =/= empty -> ok = leveled_cdb:cdb_roll(ActiveJournal), - Manifest0 = - leveled_imanifest:append_lastkey(Manifest, ActiveJournal, LastKey), - ManEntry = + Manifest0 = + leveled_imanifest:append_lastkey( + Manifest, ActiveJournal, LastKey + ), + ManEntry = start_new_activejournal(NewSQN, RootPath, CDBopts), {_, _, NewJournalP, _} = ManEntry, Manifest1 = leveled_imanifest:add_entry(Manifest0, ManEntry, true), ok = leveled_imanifest:writer(Manifest1, ManifestSQN + 1, RootPath), - + {NewJournalP, Manifest1, ManifestSQN + 1} end. -spec key_check( - leveled_codec:primary_key(), - integer(), - leveled_imanifest:manifest()) -> missing|probably. + leveled_codec:primary_key(), + integer(), + leveled_imanifest:manifest() +) -> missing | probably. %% @doc -%% Checks for the presence of the key at that SQN withing the journal, +%% Checks for the presence of the key at that SQN withing the journal, %% avoiding the cost of actually reading the object from disk. -%% a KeyCheck is not absolute proof of the existence of the object - there +%% a KeyCheck is not absolute proof of the existence of the object - there %% could be a hash collision, or the on-disk object could be corrupted. So %% the positive answer is 'probably' not 'true' key_check(LedgerKey, SQN, Manifest) -> @@ -1108,11 +1195,10 @@ key_check(LedgerKey, SQN, Manifest) -> InkerKey = leveled_codec:to_inkerkey(LedgerKey, SQN), leveled_cdb:cdb_keycheck(JournalP, InkerKey). - --spec build_manifest(list(), list(), #cdb_options{}) -> - {leveled_imanifest:manifest(), integer(), integer(), pid()}. +-spec build_manifest(list(), list(), #cdb_options{}) -> + {leveled_imanifest:manifest(), integer(), integer(), pid()}. %% @doc -%% Selects the correct manifest to open, and then starts a process for each +%% Selects the correct manifest to open, and then starts a process for each %% file in the manifest, storing the PID for that process within the manifest. %% Opens an active journal if one is not present. build_manifest(ManifestFilenames, RootPath, CDBopts) -> @@ -1121,7 +1207,8 @@ build_manifest(ManifestFilenames, RootPath, CDBopts) -> ManifestRegex = "(?[0-9]+)\\." ++ leveled_imanifest:complete_filex(), ValidManSQNs = sequencenumbers_fromfilenames( - ManifestFilenames, ManifestRegex, 'MSQN'), + ManifestFilenames, ManifestRegex, 'MSQN' + ), {Manifest, ManifestSQN} = case length(ValidManSQNs) of 0 -> @@ -1131,11 +1218,11 @@ build_manifest(ManifestFilenames, RootPath, CDBopts) -> M1 = leveled_imanifest:reader(PersistedManSQN, RootPath), {M1, PersistedManSQN} end, - + % Open the manifest files, completing if necessary and ensure there is % a valid active journal at the head of the manifest OpenManifest = open_all_manifest(Manifest, RootPath, CDBopts), - + {ActiveLowSQN, _FN, ActiveJournal, _LK} = leveled_imanifest:head_entry(OpenManifest), JournalSQN = @@ -1145,12 +1232,12 @@ build_manifest(ManifestFilenames, RootPath, CDBopts) -> {JSQN, _Type, _LastKey} -> JSQN end, - - % Update the manifest if it has been changed by the process of loading + + % Update the manifest if it has been changed by the process of loading % the manifest (must also increment the manifest SQN). UpdManifestSQN = if - length(OpenManifest) > length(Manifest) -> + length(OpenManifest) > length(Manifest) -> leveled_log:log(i0009, []), leveled_imanifest:printer(OpenManifest), NextSQN = ManifestSQN + 1, @@ -1163,33 +1250,34 @@ build_manifest(ManifestFilenames, RootPath, CDBopts) -> end, {OpenManifest, UpdManifestSQN, JournalSQN, ActiveJournal}. - -spec close_allmanifest(list()) -> ok. %% @doc %% Close every file in the manifest. Will cause deletion of any delete_pending %% files. close_allmanifest([]) -> ok; -close_allmanifest([H|ManifestT]) -> +close_allmanifest([H | ManifestT]) -> {_, _, Pid, _} = H, ok = leveled_cdb:cdb_close(Pid), close_allmanifest(ManifestT). - -spec open_all_manifest( - leveled_imanifest:manifest(), list(), #cdb_options{}) - -> leveled_imanifest:manifest(). + leveled_imanifest:manifest(), list(), #cdb_options{} +) -> + leveled_imanifest:manifest(). %% @doc %% Open all the files in the manifets, and updating the manifest with the PIDs %% of the opened files open_all_manifest([], RootPath, CDBOpts) -> leveled_log:log(i0011, []), - leveled_imanifest:add_entry([], - start_new_activejournal(0, RootPath, CDBOpts), - true); + leveled_imanifest:add_entry( + [], + start_new_activejournal(0, RootPath, CDBOpts), + true + ); open_all_manifest(Man0, RootPath, CDBOpts) -> Man1 = leveled_imanifest:to_list(Man0), - [{HeadSQN, HeadFN, _IgnorePid, HeadLK}|ManifestTail] = Man1, + [{HeadSQN, HeadFN, _IgnorePid, HeadLK} | ManifestTail] = Man1, OpenJournalFun = fun(ManEntry) -> {LowSQN, FN, _, LK_RO} = ManEntry, @@ -1197,7 +1285,7 @@ open_all_manifest(Man0, RootPath, CDBOpts) -> PFN = FN ++ "." ++ ?PENDING_FILEX, case filelib:is_file(CFN) of true -> - {ok, Pid} = + {ok, Pid} = leveled_cdb:cdb_reopen_reader(CFN, LK_RO, CDBOpts), {LowSQN, FN, Pid, LK_RO}; false -> @@ -1230,41 +1318,42 @@ open_all_manifest(Man0, RootPath, CDBOpts) -> {ok, HeadW} = leveled_cdb:cdb_open_writer(PendingHeadFN, CDBOpts), leveled_imanifest:add_entry( - OpenedTail, {HeadSQN, HeadFN, HeadW, HeadLK}, true) + OpenedTail, {HeadSQN, HeadFN, HeadW, HeadLK}, true + ) end. - start_new_activejournal(SQN, RootPath, CDBOpts) -> Filename = filepath(RootPath, SQN, new_journal), {ok, PidW} = leveled_cdb:cdb_open_writer(Filename, CDBOpts), {SQN, Filename, PidW, empty}. - - -spec fold_from_sequence( non_neg_integer(), pos_integer(), {fun(), fun(), fun()}, any(), - list()) -> any(). + list() +) -> any(). %% @doc %% %% Scan from the starting sequence number to the end of the Journal. Apply %% the FilterFun as it scans over the CDB file to build up a Batch of relevant -%% objects - and then apply the FoldFun to the batch once the batch is +%% objects - and then apply the FoldFun to the batch once the batch is %% complete %% %% Inputs - MinSQN, JournalSQN, FoldFuns, OverallAccumulator, Inker's Manifest %% %% The fold loops over all the CDB files in the Manifest. Each file is looped %% over in batches using foldfile_between_sequence/7. The batch is a range of -%% sequence numbers (so the batch size may be << ?LOADING_BATCH) in compacted +%% sequence numbers (so the batch size may be << ?LOADING_BATCH) in compacted %% files fold_from_sequence(_MinSQN, _JournalSQN, _FoldFuns, Acc, []) -> Acc; fold_from_sequence( - MinSQN, JournalSQN, FoldFuns, Acc, [{LowSQN, FN, Pid, _LK}|Rest]) - when LowSQN >= MinSQN -> + MinSQN, JournalSQN, FoldFuns, Acc, [{LowSQN, FN, Pid, _LK} | Rest] +) when + LowSQN >= MinSQN +-> {NextMinSQN, Acc0} = foldfile_between_sequence( MinSQN, @@ -1278,10 +1367,11 @@ fold_from_sequence( ), fold_from_sequence(NextMinSQN, JournalSQN, FoldFuns, Acc0, Rest); fold_from_sequence( - MinSQN, JournalSQN, FoldFuns, Acc, [{_LowSQN, FN, Pid, _LK}|Rest]) -> - % If this file has a LowSQN less than the minimum, we can skip it if the + MinSQN, JournalSQN, FoldFuns, Acc, [{_LowSQN, FN, Pid, _LK} | Rest] +) -> + % If this file has a LowSQN less than the minimum, we can skip it if the % next file also has a LowSQN below the minimum - {NextMinSQN, Acc0} = + {NextMinSQN, Acc0} = case Rest of [] -> foldfile_between_sequence( @@ -1294,7 +1384,9 @@ fold_from_sequence( undefined, FN ); - [{NextSQN, _NxtFN, _NxtPid, _NxtLK}|_Rest] when NextSQN > MinSQN -> + [{NextSQN, _NxtFN, _NxtPid, _NxtLK} | _Rest] when + NextSQN > MinSQN + -> foldfile_between_sequence( MinSQN, MinSQN + ?LOADING_BATCH, @@ -1306,20 +1398,22 @@ fold_from_sequence( FN ); _ -> - {MinSQN, Acc} + {MinSQN, Acc} end, fold_from_sequence(NextMinSQN, JournalSQN, FoldFuns, Acc0, Rest). foldfile_between_sequence( - MinSQN, MaxSQN, JournalSQN, FoldFuns, Acc, CDBpid, StartPos, FN) -> + MinSQN, MaxSQN, JournalSQN, FoldFuns, Acc, CDBpid, StartPos, FN +) -> {FilterFun, InitAccFun, FoldFun} = FoldFuns, InitBatchAcc = {MinSQN, MaxSQN, InitAccFun(FN, MinSQN)}, - + case leveled_cdb:cdb_scan(CDBpid, FilterFun, InitBatchAcc, StartPos) of {eof, {AccMinSQN, _AccMaxSQN, BatchAcc}} -> {AccMinSQN, FoldFun(BatchAcc, Acc)}; - {_LastPosition, {AccMinSQN, _AccMaxSQN, BatchAcc}} - when AccMinSQN >= JournalSQN -> + {_LastPosition, {AccMinSQN, _AccMaxSQN, BatchAcc}} when + AccMinSQN >= JournalSQN + -> {AccMinSQN, FoldFun(BatchAcc, Acc)}; {LastPosition, {_AccMinSQN, _AccMaxSQN, BatchAcc}} -> UpdAcc = FoldFun(BatchAcc, Acc), @@ -1335,13 +1429,17 @@ foldfile_between_sequence( FN ) end. - + sequencenumbers_fromfilenames(Filenames, Regex, IntName) -> lists:foldl( fun(FN, Acc) -> - case re:run(FN, - Regex, - [{capture, [IntName], list}]) of + case + re:run( + FN, + Regex, + [{capture, [IntName], list}] + ) + of nomatch -> Acc; {match, [Int]} when is_list(Int) -> @@ -1349,7 +1447,8 @@ sequencenumbers_fromfilenames(Filenames, Regex, IntName) -> end end, [], - Filenames). + Filenames + ). filepath(RootPath, journal_dir) -> RootPath ++ "/" ++ ?FILES_FP ++ "/"; @@ -1361,22 +1460,24 @@ filepath(RootPath, journal_waste_dir) -> filepath(RootPath, journal_dir) ++ "/" ++ ?WASTE_FP ++ "/". filepath(RootPath, NewSQN, new_journal) -> - filename:join(filepath(RootPath, journal_dir), - integer_to_list(NewSQN) ++ "_" - ++ leveled_util:generate_uuid() - ++ "." ++ ?PENDING_FILEX); + filename:join( + filepath(RootPath, journal_dir), + integer_to_list(NewSQN) ++ "_" ++ + leveled_util:generate_uuid() ++ + "." ++ ?PENDING_FILEX + ); filepath(CompactFilePath, NewSQN, compact_journal) -> - filename:join(CompactFilePath, - integer_to_list(NewSQN) ++ "_" - ++ leveled_util:generate_uuid() - ++ "." ++ ?PENDING_FILEX). - + filename:join( + CompactFilePath, + integer_to_list(NewSQN) ++ "_" ++ + leveled_util:generate_uuid() ++ + "." ++ ?PENDING_FILEX + ). initiate_penciller_snapshot(LedgerSnap) -> MaxSQN = leveled_penciller:pcl_getstartupsequencenumber(LedgerSnap), {LedgerSnap, MaxSQN}. - -spec wrap_checkfilterfun(fun()) -> fun(). %% @doc %% Make a check of the validity of the key being passed into the CheckFilterFun @@ -1390,9 +1491,9 @@ wrap_checkfilterfun(CheckFilterFun) -> end end. - -spec update_cdb_logoptions( - #cdb_options{}|undefined) -> #cdb_options{}|undefined. + #cdb_options{} | undefined +) -> #cdb_options{} | undefined. update_cdb_logoptions(undefined) -> undefined; update_cdb_logoptions(CDBopts) -> @@ -1410,7 +1511,8 @@ update_cdb_logoptions(CDBopts) -> %% penciller ink_compactjournal(Pid, Checker, InitiateFun, CloseFun, FilterFun, _Timeout) -> gen_server:call( - Pid, {compact, Checker, InitiateFun, CloseFun, FilterFun}, infinity). + Pid, {compact, Checker, InitiateFun, CloseFun, FilterFun}, infinity + ). create_value_for_journal(Obj, Comp) -> leveled_codec:create_value_for_journal(Obj, Comp, native). @@ -1433,51 +1535,69 @@ build_dummy_journal(KeyConvertF) -> {ok, J1} = leveled_cdb:cdb_open_writer(F1), {K1, V1} = {KeyConvertF("Key1"), "TestValue1"}, {K2, V2} = {KeyConvertF("Key2"), "TestValue2"}, - ok = - leveled_cdb:cdb_put(J1, - {1, stnd, K1}, - create_value_for_journal({V1, ?TEST_KC}, false)), - ok = - leveled_cdb:cdb_put(J1, - {2, stnd, K2}, - create_value_for_journal({V2, ?TEST_KC}, false)), + ok = + leveled_cdb:cdb_put( + J1, + {1, stnd, K1}, + create_value_for_journal({V1, ?TEST_KC}, false) + ), + ok = + leveled_cdb:cdb_put( + J1, + {2, stnd, K2}, + create_value_for_journal({V2, ?TEST_KC}, false) + ), ok = leveled_cdb:cdb_roll(J1), LK1 = leveled_cdb:cdb_lastkey(J1), - lists:foldl(fun(X, Closed) -> - case Closed of - true -> true; - false -> - case leveled_cdb:cdb_checkhashtable(J1) of - true -> leveled_cdb:cdb_close(J1), true; - false -> timer:sleep(X), false - end - end - end, - false, - lists:seq(1, 5)), + lists:foldl( + fun(X, Closed) -> + case Closed of + true -> + true; + false -> + case leveled_cdb:cdb_checkhashtable(J1) of + true -> + leveled_cdb:cdb_close(J1), + true; + false -> + timer:sleep(X), + false + end + end + end, + false, + lists:seq(1, 5) + ), F2 = filename:join(JournalFP, "nursery_3.pnd"), {ok, J2} = leveled_cdb:cdb_open_writer(F2), {K1, V3} = {KeyConvertF("Key1"), "TestValue3"}, {K4, V4} = {KeyConvertF("Key4"), "TestValue4"}, - ok = - leveled_cdb:cdb_put(J2, - {3, stnd, K1}, - create_value_for_journal({V3, ?TEST_KC}, false)), - ok = - leveled_cdb:cdb_put(J2, - {4, stnd, K4}, - create_value_for_journal({V4, ?TEST_KC}, false)), + ok = + leveled_cdb:cdb_put( + J2, + {3, stnd, K1}, + create_value_for_journal({V3, ?TEST_KC}, false) + ), + ok = + leveled_cdb:cdb_put( + J2, + {4, stnd, K4}, + create_value_for_journal({V4, ?TEST_KC}, false) + ), LK2 = leveled_cdb:cdb_lastkey(J2), ok = leveled_cdb:cdb_close(J2), - Manifest = [{1, "test/test_area/journal/journal_files/nursery_1", "pid1", LK1}, - {3, "test/test_area/journal/journal_files/nursery_3", "pid2", LK2}], + Manifest = [ + {1, "test/test_area/journal/journal_files/nursery_1", "pid1", LK1}, + {3, "test/test_area/journal/journal_files/nursery_3", "pid2", LK2} + ], ManifestBin = term_to_binary(Manifest), - {ok, MF1} = file:open(filename:join(ManifestFP, "1.man"), - [binary, raw, read, write]), + {ok, MF1} = file:open( + filename:join(ManifestFP, "1.man"), + [binary, raw, read, write] + ), ok = file:write(MF1, ManifestBin), ok = file:close(MF1). - clean_testdir(RootPath) -> clean_subdir(filepath(RootPath, journal_dir)), clean_subdir(filepath(RootPath, journal_compact_dir)), @@ -1487,23 +1607,27 @@ clean_testdir(RootPath) -> clean_subdir(DirPath) -> ok = filelib:ensure_dir(DirPath), {ok, Files} = file:list_dir(DirPath), - lists:foreach(fun(FN) -> - File = filename:join(DirPath, FN), - case file:delete(File) of - ok -> io:format("Success deleting ~s~n", [File]); - _ -> io:format("Error deleting ~s~n", [File]) - end - end, - Files). + lists:foreach( + fun(FN) -> + File = filename:join(DirPath, FN), + case file:delete(File) of + ok -> io:format("Success deleting ~s~n", [File]); + _ -> io:format("Error deleting ~s~n", [File]) + end + end, + Files + ). simple_inker_test() -> RootPath = "test/test_area/journal", build_dummy_journal(), - CDBopts = #cdb_options{max_size=300000, binary_mode=true}, - {ok, Ink1} = ink_start(#inker_options{root_path=RootPath, - cdb_options=CDBopts, - compression_method=native, - compress_on_receipt=true}), + CDBopts = #cdb_options{max_size = 300000, binary_mode = true}, + {ok, Ink1} = ink_start(#inker_options{ + root_path = RootPath, + cdb_options = CDBopts, + compression_method = native, + compress_on_receipt = true + }), Obj1 = ink_get(Ink1, key_converter("Key1"), 1), ?assertMatch(Obj1, {{1, key_converter("Key1")}, {"TestValue1", ?TEST_KC}}), Obj3 = ink_get(Ink1, key_converter("Key1"), 3), @@ -1516,7 +1640,7 @@ simple_inker_test() -> simple_inker_completeactivejournal_test() -> RootPath = "test/test_area/journal", build_dummy_journal(), - CDBopts = #cdb_options{max_size=300000, binary_mode=true}, + CDBopts = #cdb_options{max_size = 300000, binary_mode = true}, JournalFP = filepath(RootPath, journal_dir), F2 = filename:join(JournalFP, "nursery_3.pnd"), {ok, PidW} = leveled_cdb:cdb_open_writer(F2), @@ -1524,17 +1648,19 @@ simple_inker_completeactivejournal_test() -> F1 = filename:join(JournalFP, "nursery_1.cdb"), F1r = filename:join(JournalFP, "nursery_1.pnd"), ok = file:rename(F1, F1r), - {ok, Ink1} = ink_start(#inker_options{root_path=RootPath, - cdb_options=CDBopts, - compression_method=native, - compress_on_receipt=true}), + {ok, Ink1} = ink_start(#inker_options{ + root_path = RootPath, + cdb_options = CDBopts, + compression_method = native, + compress_on_receipt = true + }), Obj1 = ink_get(Ink1, key_converter("Key1"), 1), ?assertMatch(Obj1, {{1, key_converter("Key1")}, {"TestValue1", ?TEST_KC}}), Obj2 = ink_get(Ink1, key_converter("Key4"), 4), ?assertMatch(Obj2, {{4, key_converter("Key4")}, {"TestValue4", ?TEST_KC}}), ink_close(Ink1), clean_testdir(RootPath). - + test_ledgerkey(Key) -> {o, "Bucket", Key, null}. @@ -1546,84 +1672,101 @@ compact_journal_wastediscarded_test_() -> compact_journal_testto(WRP, ExpectedFiles) -> RootPath = "test/test_area/journal", - CDBopts = #cdb_options{max_size=300000, sync_strategy=none}, + CDBopts = #cdb_options{max_size = 300000, sync_strategy = none}, RStrategy = [{?STD_TAG, recovr}], - InkOpts = #inker_options{root_path=RootPath, - cdb_options=CDBopts, - reload_strategy=RStrategy, - waste_retention_period=WRP, - singlefile_compactionperc=40.0, - maxrunlength_compactionperc=70.0, - compression_method=native, - compress_on_receipt=false}, - + InkOpts = #inker_options{ + root_path = RootPath, + cdb_options = CDBopts, + reload_strategy = RStrategy, + waste_retention_period = WRP, + singlefile_compactionperc = 40.0, + maxrunlength_compactionperc = 70.0, + compression_method = native, + compress_on_receipt = false + }, + build_dummy_journal(fun test_ledgerkey/1), {ok, Ink1} = ink_start(InkOpts), - - {ok, NewSQN1, ObjSize} = ink_put(Ink1, - test_ledgerkey("KeyAA"), - "TestValueAA", - {[], infinity}, - true), + + {ok, NewSQN1, ObjSize} = ink_put( + Ink1, + test_ledgerkey("KeyAA"), + "TestValueAA", + {[], infinity}, + true + ), ?assertMatch(NewSQN1, 5), ok = ink_printmanifest(Ink1), R0 = ink_get(Ink1, test_ledgerkey("KeyAA"), 5), - ?assertMatch(R0, - {{5, test_ledgerkey("KeyAA")}, - {"TestValueAA", {[], infinity}}}), + ?assertMatch( + R0, + {{5, test_ledgerkey("KeyAA")}, {"TestValueAA", {[], infinity}}} + ), FunnyLoop = lists:seq(1, 48), - Checker = lists:map(fun(X) -> - PK = "KeyZ" ++ integer_to_list(X), - {ok, SQN, _} = ink_put(Ink1, - test_ledgerkey(PK), - crypto:strong_rand_bytes(10000), - {[], infinity}, - false), - {SQN, test_ledgerkey(PK)} - end, - FunnyLoop), - {ok, NewSQN2, ObjSize} = ink_put(Ink1, - test_ledgerkey("KeyBB"), - "TestValueBB", - {[], infinity}, - true), + Checker = lists:map( + fun(X) -> + PK = "KeyZ" ++ integer_to_list(X), + {ok, SQN, _} = ink_put( + Ink1, + test_ledgerkey(PK), + crypto:strong_rand_bytes(10000), + {[], infinity}, + false + ), + {SQN, test_ledgerkey(PK)} + end, + FunnyLoop + ), + {ok, NewSQN2, ObjSize} = ink_put( + Ink1, + test_ledgerkey("KeyBB"), + "TestValueBB", + {[], infinity}, + true + ), ?assertMatch(NewSQN2, 54), ActualManifest = ink_getmanifest(Ink1), ok = ink_printmanifest(Ink1), ?assertMatch(3, length(ActualManifest)), - {ok, _ICL1} = ink_compactjournal(Ink1, - Checker, - fun(X) -> {X, 55} end, - fun(_F) -> ok end, - fun(L, K, SQN) -> - case lists:member({SQN, K}, L) of - true -> current; - false -> replaced - end - end, - 5000), + {ok, _ICL1} = ink_compactjournal( + Ink1, + Checker, + fun(X) -> {X, 55} end, + fun(_F) -> ok end, + fun(L, K, SQN) -> + case lists:member({SQN, K}, L) of + true -> current; + false -> replaced + end + end, + 5000 + ), timer:sleep(1000), CompactedManifest1 = ink_getmanifest(Ink1), ?assertMatch(2, length(CompactedManifest1)), Checker2 = lists:sublist(Checker, 16), - {ok, _ICL2} = ink_compactjournal(Ink1, - Checker2, - fun(X) -> {X, 55} end, - fun(_F) -> ok end, - fun(L, K, SQN) -> - case lists:member({SQN, K}, L) of - true -> current; - false -> replaced - end - end, - 5000), + {ok, _ICL2} = ink_compactjournal( + Ink1, + Checker2, + fun(X) -> {X, 55} end, + fun(_F) -> ok end, + fun(L, K, SQN) -> + case lists:member({SQN, K}, L) of + true -> current; + false -> replaced + end + end, + 5000 + ), timer:sleep(1000), CompactedManifest2 = ink_getmanifest(Ink1), {ok, PrefixTest} = re:compile(?COMPACT_FP), - lists:foreach(fun({_SQN, FN, _P, _LK}) -> - nomatch = re:run(FN, PrefixTest) - end, - CompactedManifest2), + lists:foreach( + fun({_SQN, FN, _P, _LK}) -> + nomatch = re:run(FN, PrefixTest) + end, + CompactedManifest2 + ), ?assertMatch(2, length(CompactedManifest2)), ink_close(Ink1), % Need to wait for delete_pending files to timeout @@ -1636,44 +1779,50 @@ compact_journal_testto(WRP, ExpectedFiles) -> empty_manifest_test() -> RootPath = "test/test_area/journal", clean_testdir(RootPath), - CDBopts = #cdb_options{max_size=300000}, - {ok, Ink1} = ink_start(#inker_options{root_path=RootPath, - cdb_options=CDBopts, - compression_method=native, - compress_on_receipt=true}), + CDBopts = #cdb_options{max_size = 300000}, + {ok, Ink1} = ink_start(#inker_options{ + root_path = RootPath, + cdb_options = CDBopts, + compression_method = native, + compress_on_receipt = true + }), ?assertMatch(not_present, ink_fetch(Ink1, key_converter("Key1"), 1)), - + CheckFun = fun(L, K, SQN) -> lists:member({SQN, key_converter(K)}, L) end, ?assertMatch(false, CheckFun([], "key", 1)), - {ok, _ICL1} = ink_compactjournal(Ink1, - [], - fun(X) -> {X, 55} end, - fun(_F) -> ok end, - CheckFun, - 5000), + {ok, _ICL1} = ink_compactjournal( + Ink1, + [], + fun(X) -> {X, 55} end, + fun(_F) -> ok end, + CheckFun, + 5000 + ), timer:sleep(1000), ?assertMatch(1, length(ink_getmanifest(Ink1))), ok = ink_close(Ink1), - + % Add pending manifest to be ignored FN = filepath(RootPath, manifest_dir) ++ "999.pnd", ok = file:write_file(FN, term_to_binary("Hello")), - - {ok, Ink2} = ink_start(#inker_options{root_path=RootPath, - cdb_options=CDBopts, - compression_method=native, - compress_on_receipt=false}), + + {ok, Ink2} = ink_start(#inker_options{ + root_path = RootPath, + cdb_options = CDBopts, + compression_method = native, + compress_on_receipt = false + }), ?assertMatch(not_present, ink_fetch(Ink2, key_converter("Key1"), 1)), - {ok, SQN, Size} = + {ok, SQN, Size} = ink_put(Ink2, key_converter("Key1"), "Value1", {[], infinity}, false), - ?assertMatch(1, SQN), % This is the first key - so should have SQN of 1 + % This is the first key - so should have SQN of 1 + ?assertMatch(1, SQN), ?assertMatch(true, Size > 0), {ok, V} = ink_fetch(Ink2, key_converter("Key1"), 1), ?assertMatch("Value1", V), ink_close(Ink2), clean_testdir(RootPath). - wrapper_test() -> KeyNotTuple = [?STD_TAG, <<"B">>, <<"K">>, null], TagNotAtom = {"tag", <<"B">>, <<"K">>, null}, @@ -1681,7 +1830,6 @@ wrapper_test() -> WrappedFun = wrap_checkfilterfun(CheckFilterFun), ?assertMatch(false, WrappedFun(null, KeyNotTuple, 1)), ?assertMatch(false, WrappedFun(null, TagNotAtom, 1)). - coverage_cheat_test() -> {noreply, _State0} = handle_info(timeout, #state{}), @@ -1690,19 +1838,23 @@ coverage_cheat_test() -> handle_down_test() -> RootPath = "test/test_area/journal", build_dummy_journal(), - CDBopts = #cdb_options{max_size=300000, binary_mode=true}, - {ok, Ink1} = ink_start(#inker_options{root_path=RootPath, - cdb_options=CDBopts, - compression_method=native, - compress_on_receipt=true}), + CDBopts = #cdb_options{max_size = 300000, binary_mode = true}, + {ok, Ink1} = ink_start(#inker_options{ + root_path = RootPath, + cdb_options = CDBopts, + compression_method = native, + compress_on_receipt = true + }), FakeBookie = spawn(fun loop/0), Mon = erlang:monitor(process, FakeBookie), - SnapOpts = #inker_options{start_snapshot=true, - bookies_pid = FakeBookie, - source_inker=Ink1}, + SnapOpts = #inker_options{ + start_snapshot = true, + bookies_pid = FakeBookie, + source_inker = Ink1 + }, {ok, Snap1} = ink_snapstart(SnapOpts), CheckSnapDiesFun = @@ -1748,18 +1900,21 @@ close_no_crash_test_() -> close_no_crash_tester() -> RootPath = "test/test_area/journal", build_dummy_journal(), - CDBopts = #cdb_options{max_size=300000, binary_mode=true}, + CDBopts = #cdb_options{max_size = 300000, binary_mode = true}, {ok, Inker} = ink_start( #inker_options{ - root_path=RootPath, - cdb_options=CDBopts, - compression_method=native, - compress_on_receipt=true}), + root_path = RootPath, + cdb_options = CDBopts, + compression_method = native, + compress_on_receipt = true + } + ), SnapOpts = #inker_options{ - start_snapshot=true, bookies_pid = self(), source_inker=Inker}, + start_snapshot = true, bookies_pid = self(), source_inker = Inker + }, {ok, InkSnap} = ink_snapstart(SnapOpts), exit(InkSnap, kill), diff --git a/src/leveled_log.erl b/src/leveled_log.erl index d370dfbc..3d4ec5fb 100644 --- a/src/leveled_log.erl +++ b/src/leveled_log.erl @@ -1,12 +1,12 @@ %% -------- LOG --------- %% %% Centralised logging, to make it easier to change log implementation. -%% +%% %% The use of a ?LOGBASE map is a personal preference. With formatting of code %% using maximum column widths, I prefer not to have log text within the code %% itself, as there may be a temptation to make log text misleadingly terse to %% make the code more readable. -%% +%% %% This means that using logger's capability to add actual code line references %% to the log becomes mute - as all logs log from the same code line. However, %% the process does enforce the use of log references to provide a simple way @@ -15,35 +15,40 @@ %% releases (whereas log references stay stable). The log references %% themselves can be helpful when optimising query times in splunk-like log %% indexing tools. -%% +%% %% There are overheads with the approach (e.g. the maps:get/2 call for each log %% ). However, the eprof testing of leveled indicates that this is not a -%% relatively significant overhead. +%% relatively significant overhead. -module(leveled_log). -include_lib("kernel/include/logger.hrl"). --export([log/2, - log_timer/3, - log_randomtimer/4]). +-export([ + log/2, + log_timer/3, + log_randomtimer/4 +]). -export([log/5, log_timer/6]). --export([set_loglevel/1, - set_databaseid/1, - add_forcedlogs/1, - remove_forcedlogs/1, - get_opts/0, - save/1, - return_settings/0]). - --record(log_options, - {log_level = info :: log_level(), - forced_logs = [] :: [atom()], - database_id :: non_neg_integer()|undefined}). - --type log_level() :: debug | info | warning | error | critical. +-export([ + set_loglevel/1, + set_databaseid/1, + add_forcedlogs/1, + remove_forcedlogs/1, + get_opts/0, + save/1, + return_settings/0 +]). + +-record(log_options, { + log_level = info :: log_level(), + forced_logs = [] :: [atom()], + database_id :: non_neg_integer() | undefined +}). + +-type log_level() :: debug | info | warning | error | critical. -type log_options() :: #log_options{}. -type log_base() :: #{atom() => {log_level(), binary()}}. @@ -52,294 +57,333 @@ -define(LOG_LEVELS, [debug, info, warning, error, critical]). -define(DEFAULT_LOG_LEVEL, error). --define(LOGBASE, - #{ - g0001 => - {info, <<"Generic log point">>}, - g0002 => - {info, <<"Generic log point with term ~w">>}, - d0001 => - {info, <<"Generic debug log">>}, - b0001 => - {info, <<"Bookie starting with Ink ~w Pcl ~w">>}, - b0002 => - {info, <<"Snapshot starting with Ink ~w Pcl ~w">>}, - b0003 => - {info, <<"Bookie closing for reason ~w">>}, - b0004 => - {warning, <<"Bookie snapshot exiting as master store ~w is down for reason ~p">>}, - b0005 => - {info, <<"LedgerSQN=~w at startup">>}, - b0006 => - {info, <<"Reached end of load batch with SQN ~w">>}, - b0008 => - {info, <<"Bucket list finds no more results">>}, - b0009 => - {debug, <<"Bucket list finds Bucket ~w">>}, - b0011 => - {warning, <<"Call to destroy the store and so all files to be removed">>}, - b0013 => - {warning, <<"Long running task took ~w microseconds with task_type=~w">>}, - b0015 => - {info, <<"Put timing with sample_count=~w ink_time=~w prep_time=~w mem_time=~w with total_object_size=~w with sample_period=~w seconds">>}, - b0016 => - {info, <<"Get timing with sample_count=~w and head_time=~w body_time=~w with fetch_count=~w with sample_period=~w seconds">>}, - b0017 => - {info, <<"Snapshot timing with sample_count=~w and bookie_time=~w pcl_time=~w with sample_period=~w seconds">>}, - b0018 => - {info, <<"Positive HEAD responses timed with sample_count=~w and cache_count=~w found_count=~w fetch_ledger_time=~w fetch_ledgercache_time=~w rsp_time=~w notfound_time=~w with sample_period=~w seconds">>}, - b0019 => - {warning, <<"Use of book_indexfold with constraint of Bucket ~w with no StartKey is deprecated">>}, - b0020 => - {warning, <<"Ratio of penciller cache size ~w to bookie's memory cache size ~w is larger than expected">>}, - r0001 => - {debug, <<"Object fold to process batch of ~w objects">>}, - p0001 => - {debug, <<"Ledger snapshot ~w registered">>}, - p0003 => - {debug, <<"Ledger snapshot ~w released">>}, - p0004 => - {debug, <<"Remaining ledger snapshots are ~w">>}, - p0005 => - {debug, <<"Delete confirmed as file ~s is removed from Manifest">>}, - p0007 => - {debug, <<"Shutdown complete for cloned Penciller for reason ~w">>}, - p0008 => - {info, <<"Penciller closing for reason ~w">>}, - p0010 => - {info, <<"level zero discarded_count=~w on close of Penciller">>}, - p0011 => - {debug, <<"Shutdown complete for Penciller for reason ~w">>}, - p0012 => - {info, <<"Store to be started based on manifest sequence number of ~w">>}, - p0013 => - {info, <<"Seqence number of 0 indicates no valid manifest">>}, - p0014 => - {info, <<"Maximum sequence number of ~w found in nonzero levels">>}, - p0015 => - {info, <<"L0 file found ~s">>}, - p0016 => - {info, <<"L0 file had maximum sequence number of ~w">>}, - p0017 => - {info, <<"No L0 file found">>}, - p0018 => - {info, <<"Response to push_mem of returned with cache_size=~w L0_pending=~w merge_backlog=~w cachelines_full=~w">>}, - p0019 => - {info, <<"Rolling level zero to filename ~s at ledger sqn ~w">>}, - p0024 => - {info, <<"Outstanding compaction work items of ~w with backlog status of ~w L0 full ~w">>}, - p0029 => - {info, <<"L0 completion confirmed and will transition to not pending">>}, - p0030 => - {warning, <<"We're doomed - intention recorded to destroy all files">>}, - p0031 => - {info, <<"Completion of update to levelzero with cache_size=~w level0_due=~w change_pending=~w MinSQN=~w MaxSQN=~w">>}, - p0032 => - {info, <<"Fetch head timing with sample_count=~w and level timings of foundmem_time=~w found0_time=~w found1_time=~w found2_time=~w found3_time=~w foundlower_time=~w missed_time=~w with counts of foundmem_count=~w found0_count=~w found1_count=~w found2_count=~w found3_count=~w foundlower_count=~w missed_count=~w with sample_period=~w seconds">>}, - p0033 => - {error, <<"Corrupted manifest file at path ~s to be ignored due to error ~s">>}, - p0035 => - {info, <<"Startup with Manifest SQN of ~w">>}, - p0037 => - {debug, <<"Merging of penciller L0 tree from size ~w complete">>}, - p0038 => - {info, <<"Timeout of snapshot with pid=~w at SQN=~w at TS ~w set to timeout=~w">>}, - p0039 => - {debug, <<"Failed to release pid=~w leaving SnapshotCount=~w and MinSQN=~w">>}, - p0040 => - {info, <<"Archiving filename ~s as unused at startup">>}, - p0041 => - {info, <<"Penciller manifest switched from SQN ~w to ~w">>}, - p0042 => - {info, <<"Deferring shutdown due to snapshot_count=~w">>}, - pc001 => - {info, <<"Penciller's clerk ~w started with owner ~w">>}, - pc005 => - {info, <<"Penciller's Clerk ~w shutdown now complete for reason ~w">>}, - pc007 => - {debug, <<"Clerk prompting Penciller regarding manifest change">>}, - pc008 => - {info, <<"Merge from level ~w to merge into ~w files below">>}, - pc009 => - {debug, <<"File ~s to simply switch levels to level ~w">>}, - pc010 => - {info, <<"Merge to be commenced for FileToMerge=~s with MSN=~w">>}, - pc011 => - {info, <<"Merge completed with MSN=~w to Level=~w and FileCounter=~w merge_type=~w">>}, - pc012 => - {debug, <<"File to be created as part of MSN=~w Filename=~s IsBasement=~w">>}, - pc013 => - {warning, <<"Merge resulted in empty file ~s">>}, - pc015 => - {info, <<"File created">>}, - pc016 => - {info, <<"Slow fetch from SFT ~w of ~w us at level ~w with result ~w">>}, - pc017 => - {debug, <<"Notified clerk of manifest change">>}, - pc018 => - {info, <<"Saved manifest file">>}, - pc019 => - {debug, <<"After ~s level ~w is ~w">>}, - pc021 => - {debug, <<"Prompting deletions at ManifestSQN=~w">>}, - pc022 => - {debug, <<"Storing reference to deletions at ManifestSQN=~w">>}, - pc023 => - {info, <<"At level=~w file_count=~w avg_mem=~w file with most memory fn=~s p=~w mem=~w">>}, - pc024 => - {info, <<"Grooming compaction picked file with tomb_count=~w">>}, - pc025 => - {info, <<"At level=~w file_count=~w average words for heap_block_size=~w heap_size=~w recent_size=~w bin_vheap_size=~w">>}, - pc026 => - {info, <<"Performing potential partial to level=~w merge as FileCounter=~w restricting to MaxAdditions=~w">>}, - pm002 => - {info, <<"Completed dump of L0 cache to list of l0cache_size=~w">>}, - sst03 => - {info, <<"Opening SST file with filename ~s slot_count=~w and max sqn ~w">>}, - sst04 => - {debug, <<"Exit called for reason ~w on filename ~s">>}, - sst05 => - {warning, <<"Rename rogue filename ~s to ~s">>}, - sst06 => - {debug, <<"File ~s has been set for delete">>}, - sst07 => - {info, <<"Exit called and now clearing ~s">>}, - sst08 => - {info, <<"Completed creation of ~s at level ~w with max sqn ~w">>}, - sst09 => - {warning, <<"Read request exposes slot with bad CRC">>}, - sst10 => - {debug, <<"Expansion sought to support pointer to pid ~w status ~w">>}, - sst11 => - {info, <<"Level zero creation timings in microseconds pmem_fetch=~w merge_lists=~w build_slots=~w build_summary=~w read_switch=~w">>}, - sst12 => - {info, <<"SST Timings at level=~w for sample_count=~w at timing points notfound_time=~w fetchcache_time=~w slotcached_time=~w slotnoncached_time=~w exiting at points notfound_count=~w fetchcache_count=~w slotcached_count=~w slotnoncached_count=~w with sample_period=~w seconds">>}, - sst13 => - {info, <<"SST merge list build timings of fold_toslot=~w slot_hashlist=~w slot_serialise=~w slot_finish=~w is_basement=~w level=~w">>}, - sst14 => - {debug, <<"File ~s has completed BIC">>}, - sst15 => - {warning, <<"Default returned from block due to handling error ~0p">>}, - i0001 => - {info, <<"Unexpected failure to fetch value for Key=~w SQN=~w with reason ~w">>}, - i0002 => - {debug, <<"Journal snapshot ~w registered at SQN ~w">>}, - i0003 => - {debug, <<"Journal snapshot ~w released">>}, - i0004 => - {info, <<"Remaining number of journal snapshots is ~w">>}, - i0005 => - {info, <<"Inker closing journal for reason ~w">>}, - i0006 => - {info, <<"Close triggered with journal_sqn=~w and manifest_sqn=~w">>}, - i0007 => - {info, <<"Inker manifest when closing is:">>}, - i0008 => - {info, <<"Put to new active journal required roll and manifest write">>}, - i0009 => - {info, <<"Updated manifest on startup:">>}, - i0010 => - {info, <<"Unchanged manifest on startup:">>}, - i0011 => - {info, <<"Manifest is empty, starting from manifest SQN 1">>}, - i0012 => - {info, <<"Head manifest entry ~s is complete so new active journal required">>}, - i0013 => - {info, <<"File ~s to be removed from manifest">>}, - i0014 => - {info, <<"On startup loading from filename ~s from SQN ~w">>}, - i0015 => - {info, <<"Opening manifest file at ~s with SQN ~w">>}, - i0016 => - {info, <<"Writing new version of manifest for manifestSQN=~w">>}, - i0017 => - {debug, <<"At SQN=~w journal has filename ~s">>}, - i0018 => - {warning, <<"We're doomed - intention recorded to destroy all files">>}, - i0020 => - {info, <<"Journal backup completed to path=~s with file_count=~w">>}, - i0021 => - {info, <<"Ingoring filename=~s with SQN=~w and JournalSQN=~w">>}, - i0022 => - {info, <<"Removing filename=~s from backup folder as not in backup">>}, - i0023 => - {info, <<"Backup commencing into folder with ~w existing files">>}, - i0024 => - {info, <<"Prompted roll at NewSQN=~w">>}, - i0025 => - {warning, <<"Journal SQN of ~w is below Ledger SQN of ~w anti-entropy will be required">>}, - i0026 => - {info, <<"Deferring shutdown due to snapshot_count=~w">>}, - i0027 => - {debug, <<"Shutdown complete for cloned Inker for reason ~w">>}, - i0028 => - {debug, <<"Shutdown complete for Inker for reason ~w">>}, - ic001 => - {info, <<"Closed for reason ~w so maybe leaving garbage">>}, - ic002 => - {info, <<"Clerk updating Inker as compaction complete of ~w files">>}, - ic003 => - {info, <<"Scoring of compaction runs complete with highest score=~w with run of run_length=~w">>}, - ic004 => - {info, <<"Score=~w with mean_byte_jump=~w for filename ~s">>}, - ic005 => - {info, <<"Compaction to be performed on file_count=~w with compaction_score=~w">>}, - ic006 => - {info, <<"Filename ~s is part of compaction run">>}, - ic007 => - {info, <<"Clerk has completed compaction process">>}, - ic008 => - {info, <<"Compaction source ~s has yielded ~w positions">>}, - ic009 => - {info, <<"Generate journal for compaction with filename ~s">>}, - ic010 => - {info, <<"Clearing journal with filename ~s">>}, - ic011 => - {info, <<"Not clearing filename ~s as modified delta is only ~w seconds">>}, - ic012 => - {warning, <<"Tag ~w not found in Strategy ~w - maybe corrupted">>}, - ic013 => - {warning, <<"File with name ~s to be ignored in manifest as scanning for first key returned empty - maybe corrupted">>}, - ic014 => - {info, <<"Compaction to be run with strategy ~w and max_run_length ~w">>}, - cdb01 => - {info, <<"Opening file for writing with filename ~s">>}, - cdb02 => - {info, <<"Opening file for reading with filename ~s">>}, - cdb03 => - {info, <<"Re-opening file for reading with filename ~s">>}, - cdb04 => - {info, <<"Deletion confirmed for file ~s at ManifestSQN ~w">>}, - cdb05 => - {info, <<"Closing of filename ~s from state ~w for reason ~w">>}, - cdb06 => - {warning, <<"File to be truncated at last position of ~w with end of file at ~w">>}, - cdb07 => - {info, <<"Hashtree index computed">>}, - cdb08 => - {info, <<"Renaming file from ~s to ~s for which existence is ~w">>}, - cdb09 => - {info, <<"Failure to read Key/Value at Position ~w in scan this may be the end of the file">>}, - cdb10 => - {warning, <<"CRC check failed due to error=~s">>}, - cdb12 => - {info, <<"Hashtree index written">>}, - cdb13 => - {debug, <<"Write options of ~w">>}, - cdb14 => - {info, <<"Microsecond timings for hashtree build of to_list=~w sort=~w build=~w">>}, - cdb15 => - {info, <<"Collision in search for hash ~w">>}, - cdb18 => - {info, <<"Handled return and write of hashtable">>}, - cdb19 => - {info, <<"Sample timings in microseconds for sample_count=~w with totals of cycle_count=~w index_time=~w read_time=~w with sample_period=~w seconds">>}, - cdb20 => - {warning, <<"Error ~w caught when safe reading a file to length ~w">>}, - cdb21 => - {warning, <<"File ~s to be deleted but already gone">>} - }). - +-define(LOGBASE, #{ + g0001 => + {info, <<"Generic log point">>}, + g0002 => + {info, <<"Generic log point with term ~w">>}, + d0001 => + {info, <<"Generic debug log">>}, + b0001 => + {info, <<"Bookie starting with Ink ~w Pcl ~w">>}, + b0002 => + {info, <<"Snapshot starting with Ink ~w Pcl ~w">>}, + b0003 => + {info, <<"Bookie closing for reason ~w">>}, + b0004 => + {warning, + <<"Bookie snapshot exiting as master store ~w is down for reason ~p">>}, + b0005 => + {info, <<"LedgerSQN=~w at startup">>}, + b0006 => + {info, <<"Reached end of load batch with SQN ~w">>}, + b0008 => + {info, <<"Bucket list finds no more results">>}, + b0009 => + {debug, <<"Bucket list finds Bucket ~w">>}, + b0011 => + {warning, + <<"Call to destroy the store and so all files to be removed">>}, + b0013 => + {warning, + <<"Long running task took ~w microseconds with task_type=~w">>}, + b0015 => + {info, + <<"Put timing with sample_count=~w ink_time=~w prep_time=~w mem_time=~w with total_object_size=~w with sample_period=~w seconds">>}, + b0016 => + {info, + <<"Get timing with sample_count=~w and head_time=~w body_time=~w with fetch_count=~w with sample_period=~w seconds">>}, + b0017 => + {info, + <<"Snapshot timing with sample_count=~w and bookie_time=~w pcl_time=~w with sample_period=~w seconds">>}, + b0018 => + {info, + <<"Positive HEAD responses timed with sample_count=~w and cache_count=~w found_count=~w fetch_ledger_time=~w fetch_ledgercache_time=~w rsp_time=~w notfound_time=~w with sample_period=~w seconds">>}, + b0019 => + {warning, + <<"Use of book_indexfold with constraint of Bucket ~w with no StartKey is deprecated">>}, + b0020 => + {warning, + <<"Ratio of penciller cache size ~w to bookie's memory cache size ~w is larger than expected">>}, + r0001 => + {debug, <<"Object fold to process batch of ~w objects">>}, + p0001 => + {debug, <<"Ledger snapshot ~w registered">>}, + p0003 => + {debug, <<"Ledger snapshot ~w released">>}, + p0004 => + {debug, <<"Remaining ledger snapshots are ~w">>}, + p0005 => + {debug, <<"Delete confirmed as file ~s is removed from Manifest">>}, + p0007 => + {debug, <<"Shutdown complete for cloned Penciller for reason ~w">>}, + p0008 => + {info, <<"Penciller closing for reason ~w">>}, + p0010 => + {info, <<"level zero discarded_count=~w on close of Penciller">>}, + p0011 => + {debug, <<"Shutdown complete for Penciller for reason ~w">>}, + p0012 => + {info, + <<"Store to be started based on manifest sequence number of ~w">>}, + p0013 => + {info, <<"Seqence number of 0 indicates no valid manifest">>}, + p0014 => + {info, <<"Maximum sequence number of ~w found in nonzero levels">>}, + p0015 => + {info, <<"L0 file found ~s">>}, + p0016 => + {info, <<"L0 file had maximum sequence number of ~w">>}, + p0017 => + {info, <<"No L0 file found">>}, + p0018 => + {info, + <<"Response to push_mem of returned with cache_size=~w L0_pending=~w merge_backlog=~w cachelines_full=~w">>}, + p0019 => + {info, <<"Rolling level zero to filename ~s at ledger sqn ~w">>}, + p0024 => + {info, + <<"Outstanding compaction work items of ~w with backlog status of ~w L0 full ~w">>}, + p0029 => + {info, + <<"L0 completion confirmed and will transition to not pending">>}, + p0030 => + {warning, <<"We're doomed - intention recorded to destroy all files">>}, + p0031 => + {info, + <<"Completion of update to levelzero with cache_size=~w level0_due=~w change_pending=~w MinSQN=~w MaxSQN=~w">>}, + p0032 => + {info, + <<"Fetch head timing with sample_count=~w and level timings of foundmem_time=~w found0_time=~w found1_time=~w found2_time=~w found3_time=~w foundlower_time=~w missed_time=~w with counts of foundmem_count=~w found0_count=~w found1_count=~w found2_count=~w found3_count=~w foundlower_count=~w missed_count=~w with sample_period=~w seconds">>}, + p0033 => + {error, + <<"Corrupted manifest file at path ~s to be ignored due to error ~s">>}, + p0035 => + {info, <<"Startup with Manifest SQN of ~w">>}, + p0037 => + {debug, <<"Merging of penciller L0 tree from size ~w complete">>}, + p0038 => + {info, + <<"Timeout of snapshot with pid=~w at SQN=~w at TS ~w set to timeout=~w">>}, + p0039 => + {debug, + <<"Failed to release pid=~w leaving SnapshotCount=~w and MinSQN=~w">>}, + p0040 => + {info, <<"Archiving filename ~s as unused at startup">>}, + p0041 => + {info, <<"Penciller manifest switched from SQN ~w to ~w">>}, + p0042 => + {info, <<"Deferring shutdown due to snapshot_count=~w">>}, + pc001 => + {info, <<"Penciller's clerk ~w started with owner ~w">>}, + pc005 => + {info, <<"Penciller's Clerk ~w shutdown now complete for reason ~w">>}, + pc007 => + {debug, <<"Clerk prompting Penciller regarding manifest change">>}, + pc008 => + {info, <<"Merge from level ~w to merge into ~w files below">>}, + pc009 => + {debug, <<"File ~s to simply switch levels to level ~w">>}, + pc010 => + {info, <<"Merge to be commenced for FileToMerge=~s with MSN=~w">>}, + pc011 => + {info, + <<"Merge completed with MSN=~w to Level=~w and FileCounter=~w merge_type=~w">>}, + pc012 => + {debug, + <<"File to be created as part of MSN=~w Filename=~s IsBasement=~w">>}, + pc013 => + {warning, <<"Merge resulted in empty file ~s">>}, + pc015 => + {info, <<"File created">>}, + pc016 => + {info, + <<"Slow fetch from SFT ~w of ~w us at level ~w with result ~w">>}, + pc017 => + {debug, <<"Notified clerk of manifest change">>}, + pc018 => + {info, <<"Saved manifest file">>}, + pc019 => + {debug, <<"After ~s level ~w is ~w">>}, + pc021 => + {debug, <<"Prompting deletions at ManifestSQN=~w">>}, + pc022 => + {debug, <<"Storing reference to deletions at ManifestSQN=~w">>}, + pc023 => + {info, + <<"At level=~w file_count=~w avg_mem=~w file with most memory fn=~s p=~w mem=~w">>}, + pc024 => + {info, <<"Grooming compaction picked file with tomb_count=~w">>}, + pc025 => + {info, + <<"At level=~w file_count=~w average words for heap_block_size=~w heap_size=~w recent_size=~w bin_vheap_size=~w">>}, + pc026 => + {info, + <<"Performing potential partial to level=~w merge as FileCounter=~w restricting to MaxAdditions=~w">>}, + pm002 => + {info, <<"Completed dump of L0 cache to list of l0cache_size=~w">>}, + sst03 => + {info, + <<"Opening SST file with filename ~s slot_count=~w and max sqn ~w">>}, + sst04 => + {debug, <<"Exit called for reason ~w on filename ~s">>}, + sst05 => + {warning, <<"Rename rogue filename ~s to ~s">>}, + sst06 => + {debug, <<"File ~s has been set for delete">>}, + sst07 => + {info, <<"Exit called and now clearing ~s">>}, + sst08 => + {info, <<"Completed creation of ~s at level ~w with max sqn ~w">>}, + sst09 => + {warning, <<"Read request exposes slot with bad CRC">>}, + sst10 => + {debug, <<"Expansion sought to support pointer to pid ~w status ~w">>}, + sst11 => + {info, + <<"Level zero creation timings in microseconds pmem_fetch=~w merge_lists=~w build_slots=~w build_summary=~w read_switch=~w">>}, + sst12 => + {info, + <<"SST Timings at level=~w for sample_count=~w at timing points notfound_time=~w fetchcache_time=~w slotcached_time=~w slotnoncached_time=~w exiting at points notfound_count=~w fetchcache_count=~w slotcached_count=~w slotnoncached_count=~w with sample_period=~w seconds">>}, + sst13 => + {info, + <<"SST merge list build timings of fold_toslot=~w slot_hashlist=~w slot_serialise=~w slot_finish=~w is_basement=~w level=~w">>}, + sst14 => + {debug, <<"File ~s has completed BIC">>}, + sst15 => + {warning, <<"Default returned from block due to handling error ~0p">>}, + i0001 => + {info, + <<"Unexpected failure to fetch value for Key=~w SQN=~w with reason ~w">>}, + i0002 => + {debug, <<"Journal snapshot ~w registered at SQN ~w">>}, + i0003 => + {debug, <<"Journal snapshot ~w released">>}, + i0004 => + {info, <<"Remaining number of journal snapshots is ~w">>}, + i0005 => + {info, <<"Inker closing journal for reason ~w">>}, + i0006 => + {info, <<"Close triggered with journal_sqn=~w and manifest_sqn=~w">>}, + i0007 => + {info, <<"Inker manifest when closing is:">>}, + i0008 => + {info, + <<"Put to new active journal required roll and manifest write">>}, + i0009 => + {info, <<"Updated manifest on startup:">>}, + i0010 => + {info, <<"Unchanged manifest on startup:">>}, + i0011 => + {info, <<"Manifest is empty, starting from manifest SQN 1">>}, + i0012 => + {info, + <<"Head manifest entry ~s is complete so new active journal required">>}, + i0013 => + {info, <<"File ~s to be removed from manifest">>}, + i0014 => + {info, <<"On startup loading from filename ~s from SQN ~w">>}, + i0015 => + {info, <<"Opening manifest file at ~s with SQN ~w">>}, + i0016 => + {info, <<"Writing new version of manifest for manifestSQN=~w">>}, + i0017 => + {debug, <<"At SQN=~w journal has filename ~s">>}, + i0018 => + {warning, <<"We're doomed - intention recorded to destroy all files">>}, + i0020 => + {info, <<"Journal backup completed to path=~s with file_count=~w">>}, + i0021 => + {info, <<"Ingoring filename=~s with SQN=~w and JournalSQN=~w">>}, + i0022 => + {info, <<"Removing filename=~s from backup folder as not in backup">>}, + i0023 => + {info, <<"Backup commencing into folder with ~w existing files">>}, + i0024 => + {info, <<"Prompted roll at NewSQN=~w">>}, + i0025 => + {warning, + <<"Journal SQN of ~w is below Ledger SQN of ~w anti-entropy will be required">>}, + i0026 => + {info, <<"Deferring shutdown due to snapshot_count=~w">>}, + i0027 => + {debug, <<"Shutdown complete for cloned Inker for reason ~w">>}, + i0028 => + {debug, <<"Shutdown complete for Inker for reason ~w">>}, + ic001 => + {info, <<"Closed for reason ~w so maybe leaving garbage">>}, + ic002 => + {info, <<"Clerk updating Inker as compaction complete of ~w files">>}, + ic003 => + {info, + <<"Scoring of compaction runs complete with highest score=~w with run of run_length=~w">>}, + ic004 => + {info, <<"Score=~w with mean_byte_jump=~w for filename ~s">>}, + ic005 => + {info, + <<"Compaction to be performed on file_count=~w with compaction_score=~w">>}, + ic006 => + {info, <<"Filename ~s is part of compaction run">>}, + ic007 => + {info, <<"Clerk has completed compaction process">>}, + ic008 => + {info, <<"Compaction source ~s has yielded ~w positions">>}, + ic009 => + {info, <<"Generate journal for compaction with filename ~s">>}, + ic010 => + {info, <<"Clearing journal with filename ~s">>}, + ic011 => + {info, + <<"Not clearing filename ~s as modified delta is only ~w seconds">>}, + ic012 => + {warning, <<"Tag ~w not found in Strategy ~w - maybe corrupted">>}, + ic013 => + {warning, + <<"File with name ~s to be ignored in manifest as scanning for first key returned empty - maybe corrupted">>}, + ic014 => + {info, + <<"Compaction to be run with strategy ~w and max_run_length ~w">>}, + cdb01 => + {info, <<"Opening file for writing with filename ~s">>}, + cdb02 => + {info, <<"Opening file for reading with filename ~s">>}, + cdb03 => + {info, <<"Re-opening file for reading with filename ~s">>}, + cdb04 => + {info, <<"Deletion confirmed for file ~s at ManifestSQN ~w">>}, + cdb05 => + {info, <<"Closing of filename ~s from state ~w for reason ~w">>}, + cdb06 => + {warning, + <<"File to be truncated at last position of ~w with end of file at ~w">>}, + cdb07 => + {info, <<"Hashtree index computed">>}, + cdb08 => + {info, <<"Renaming file from ~s to ~s for which existence is ~w">>}, + cdb09 => + {info, + <<"Failure to read Key/Value at Position ~w in scan this may be the end of the file">>}, + cdb10 => + {warning, <<"CRC check failed due to error=~s">>}, + cdb12 => + {info, <<"Hashtree index written">>}, + cdb13 => + {debug, <<"Write options of ~w">>}, + cdb14 => + {info, + <<"Microsecond timings for hashtree build of to_list=~w sort=~w build=~w">>}, + cdb15 => + {info, <<"Collision in search for hash ~w">>}, + cdb18 => + {info, <<"Handled return and write of hashtable">>}, + cdb19 => + {info, + <<"Sample timings in microseconds for sample_count=~w with totals of cycle_count=~w index_time=~w read_time=~w with sample_period=~w seconds">>}, + cdb20 => + {warning, <<"Error ~w caught when safe reading a file to length ~w">>}, + cdb21 => + {warning, <<"File ~s to be deleted but already gone">>} +}). %%%============================================================================ %%% Manage Log Options @@ -444,24 +488,26 @@ should_i_log(LogLevel, Levels, LogRef, LogOpts) -> true -> true; false -> - if CurLevel == LogLevel -> + if + CurLevel == LogLevel -> true; - true -> + true -> is_active_level(Levels, CurLevel, LogLevel) end end. -is_active_level([L|_], L, _) -> true; -is_active_level([L|_], _, L) -> false; -is_active_level([_|T], C, L) -> is_active_level(T, C, L). +is_active_level([L | _], L, _) -> true; +is_active_level([L | _], _, L) -> false; +is_active_level([_ | T], C, L) -> is_active_level(T, C, L). -spec log_timer(atom(), list(), erlang:timestamp()) -> ok. log_timer(LogReference, Subs, StartTime) -> log_timer(LogReference, Subs, StartTime, ?LOG_LEVELS, ?LOGBASE, backend). -spec log_timer( - atom(), list(), erlang:timestamp(), list(log_level()), log_base(), atom()) - -> ok. + atom(), list(), erlang:timestamp(), list(log_level()), log_base(), atom() +) -> + ok. log_timer(LogRef, Subs, StartTime, SupportedLevels, LogBase, Tag) -> {LogLevel, Log} = maps:get(LogRef, LogBase), LogOpts = get_opts(), @@ -492,22 +538,31 @@ log_randomtimer(LogReference, Subs, StartTime, RandomProb) -> ok end. --spec log_prefix(atom(), non_neg_integer()|undefined, pid()) -> io_lib:chars(). +-spec log_prefix(atom(), non_neg_integer() | undefined, pid()) -> + io_lib:chars(). log_prefix(LogRef, undefined, Pid) -> ["log_ref=", atom_to_list(LogRef), " pid=", pid_to_list(Pid), " "]; log_prefix(LogRef, DBid, Pid) -> [ - "log_ref=", atom_to_list(LogRef), - " db_id=", integer_to_list(DBid), - " pid=", pid_to_list(Pid), " " + "log_ref=", + atom_to_list(LogRef), + " db_id=", + integer_to_list(DBid), + " pid=", + pid_to_list(Pid), + " " ]. -spec duration_text(erlang:timestamp()) -> io_lib:chars(). duration_text(StartTime) -> case timer:now_diff(os:timestamp(), StartTime) of US when US > 1000 -> - [" with us_duration=", integer_to_list(US), - " or ms_duration=", integer_to_list(US div 1000)]; + [ + " with us_duration=", + integer_to_list(US), + " or ms_duration=", + integer_to_list(US div 1000) + ]; US -> [" with us_duration=", integer_to_list(US)] end. @@ -534,12 +589,12 @@ log_wrongkey_test() -> ?assertException( error, {badkey, wrong0001}, - log(wrong0001, [],[warning, error], ?LOGBASE, backend) + log(wrong0001, [], [warning, error], ?LOGBASE, backend) ). logtimer_wrongkey_test() -> ST = os:timestamp(), - % Note - + % Note - % An issue with cover means issues with ?assertException, where the % function being tested is split across lines, the closing bracket on the % next line is not recognised as being covered. We want 100% coverage, so @@ -568,5 +623,4 @@ badloglevel_test() -> ?assertMatch(true, is_active_level(?LOG_LEVELS, debug, unsupported)), ?assertMatch(true, is_active_level(?LOG_LEVELS, critical, unsupported)). - -endif. diff --git a/src/leveled_monitor.erl b/src/leveled_monitor.erl index 4e776f78..ae38e4f3 100644 --- a/src/leveled_monitor.erl +++ b/src/leveled_monitor.erl @@ -2,16 +2,16 @@ %% %% The bookie's monitor is a process dedicated to gathering and reporting %% stats related to performance of the leveled store. -%% +%% %% Depending on the sample frequency, a process will randomly determine whether %% or not to take a timing of a transaction. If a timing is taken the result %% is cast to the moniitor. -%% +%% %% The monitor gathers stats across the store, and then on a timing loop logs %% out the gathered stats for one of the monitored stat types once every %% ?LOG_FREQUENCY_SECONDS. On each timing trigger the monitor should move on %% to the next timing stat in its list. -%% +%% %% The different types of timing stats are defined within the ?LOG_LIST. Each %% type of timing stat has its own record maintained in the monitor loop state. @@ -25,7 +25,8 @@ handle_cast/2, handle_info/2, terminate/2, - code_change/3]). + code_change/3 +]). -export([ monitor_start/2, @@ -37,23 +38,30 @@ log_level/2, log_add/2, log_remove/2, - get_defaults/0]). - --define(LOG_LIST, - [bookie_get, bookie_put, bookie_head, bookie_snap, - pcl_fetch, sst_fetch, cdb_get]). + get_defaults/0 +]). + +-define(LOG_LIST, [ + bookie_get, + bookie_put, + bookie_head, + bookie_snap, + pcl_fetch, + sst_fetch, + cdb_get +]). -define(LOG_FREQUENCY_SECONDS, 30). - --record(bookie_get_timings, - {sample_count = 0 :: non_neg_integer(), +-record(bookie_get_timings, { + sample_count = 0 :: non_neg_integer(), head_time = 0 :: non_neg_integer(), body_time = 0 :: non_neg_integer(), fetch_count = 0 :: non_neg_integer(), - sample_start_time = os:timestamp() :: erlang:timestamp()}). + sample_start_time = os:timestamp() :: erlang:timestamp() +}). --record(bookie_head_timings, - {sample_count = 0 :: non_neg_integer(), +-record(bookie_head_timings, { + sample_count = 0 :: non_neg_integer(), cache_count = 0 :: non_neg_integer(), found_count = 0 :: non_neg_integer(), cache_hits = 0 :: non_neg_integer(), @@ -61,24 +69,27 @@ fetch_ledgercache_time = 0 :: non_neg_integer(), rsp_time = 0 :: non_neg_integer(), notfound_time = 0 :: non_neg_integer(), - sample_start_time = os:timestamp() :: erlang:timestamp()}). + sample_start_time = os:timestamp() :: erlang:timestamp() +}). --record(bookie_put_timings, - {sample_count = 0 :: non_neg_integer(), +-record(bookie_put_timings, { + sample_count = 0 :: non_neg_integer(), ink_time = 0 :: non_neg_integer(), prep_time = 0 :: non_neg_integer(), mem_time = 0 :: non_neg_integer(), total_size = 0 :: non_neg_integer(), - sample_start_time = os:timestamp() :: erlang:timestamp()}). + sample_start_time = os:timestamp() :: erlang:timestamp() +}). --record(bookie_snap_timings, - {sample_count = 0 :: non_neg_integer(), +-record(bookie_snap_timings, { + sample_count = 0 :: non_neg_integer(), bookie_time = 0 :: non_neg_integer(), pcl_time = 0 :: non_neg_integer(), - sample_start_time = os:timestamp() :: erlang:timestamp()}). + sample_start_time = os:timestamp() :: erlang:timestamp() +}). --record(pcl_fetch_timings, - {sample_count = 0 :: non_neg_integer(), +-record(pcl_fetch_timings, { + sample_count = 0 :: non_neg_integer(), foundmem_time = 0 :: non_neg_integer(), found0_time = 0 :: non_neg_integer(), found1_time = 0 :: non_neg_integer(), @@ -93,10 +104,11 @@ found3_count = 0 :: non_neg_integer(), foundlower_count = 0 :: non_neg_integer(), notfound_count = 0 :: non_neg_integer(), - sample_start_time = os:timestamp() :: erlang:timestamp()}). + sample_start_time = os:timestamp() :: erlang:timestamp() +}). --record(sst_fetch_timings, - {sample_count = 0 :: non_neg_integer(), +-record(sst_fetch_timings, { + sample_count = 0 :: non_neg_integer(), fetchcache_time = 0 :: non_neg_integer(), slotcached_time = 0 :: non_neg_integer(), slotnoncached_time = 0 :: non_neg_integer(), @@ -105,17 +117,19 @@ slotcached_count = 0 :: non_neg_integer(), slotnoncached_count = 0 :: non_neg_integer(), notfound_count = 0 :: non_neg_integer(), - sample_start_time = os:timestamp() :: erlang:timestamp()}). + sample_start_time = os:timestamp() :: erlang:timestamp() +}). --record(cdb_get_timings, - {sample_count = 0 :: non_neg_integer(), +-record(cdb_get_timings, { + sample_count = 0 :: non_neg_integer(), cycle_count = 0 :: non_neg_integer(), index_time = 0 :: non_neg_integer(), read_time = 0 :: non_neg_integer(), - sample_start_time = os:timestamp() :: erlang:timestamp()}). + sample_start_time = os:timestamp() :: erlang:timestamp() +}). --record(state, - {bookie_get_timings = #bookie_get_timings{} :: bookie_get_timings(), +-record(state, { + bookie_get_timings = #bookie_get_timings{} :: bookie_get_timings(), bookie_head_timings = #bookie_head_timings{} :: bookie_head_timings(), bookie_put_timings = #bookie_put_timings{} :: bookie_put_timings(), bookie_snap_timings = #bookie_snap_timings{} :: bookie_snap_timings(), @@ -123,8 +137,8 @@ sst_fetch_timings = [] :: list(sst_fetch_timings()), cdb_get_timings = #cdb_get_timings{} :: cdb_get_timings(), log_frequency = ?LOG_FREQUENCY_SECONDS :: pos_integer(), - log_order = [] :: list(log_type())}). - + log_order = [] :: list(log_type()) +}). -type bookie_get_timings() :: #bookie_get_timings{}. -type bookie_head_timings() :: #bookie_head_timings{}. @@ -135,35 +149,44 @@ -type sst_fetch_timings() :: {leveled_pmanifest:lsm_level(), #sst_fetch_timings{}}. -type log_type() :: - bookie_head|bookie_get|bookie_put|bookie_snap|pcl_fetch|sst_fetch|cdb_get. --type pcl_level() :: memory|leveled_pmanifest:lsm_level(). + bookie_head + | bookie_get + | bookie_put + | bookie_snap + | pcl_fetch + | sst_fetch + | cdb_get. +-type pcl_level() :: memory | leveled_pmanifest:lsm_level(). -type sst_fetch_type() :: - fetch_cache|slot_cachedblock|slot_noncachedblock|not_found. + fetch_cache | slot_cachedblock | slot_noncachedblock | not_found. -type microsecs() :: pos_integer(). -type byte_size() :: pos_integer(). --type monitor() :: {no_monitor, 0}|{pid(), 0..100}. --type timing() :: no_timing|microsecs(). - +-type monitor() :: {no_monitor, 0} | {pid(), 0..100}. +-type timing() :: no_timing | microsecs(). -type bookie_get_update() :: - {bookie_get_update, microsecs(), microsecs()|not_found}. + {bookie_get_update, microsecs(), microsecs() | not_found}. -type bookie_head_update() :: - {bookie_head_update, microsecs(), microsecs()|not_found, 0..1}. + {bookie_head_update, microsecs(), microsecs() | not_found, 0..1}. -type bookie_put_update() :: {bookie_put_update, microsecs(), microsecs(), microsecs(), byte_size()}. -type bookie_snap_update() :: {bookie_snap_update, microsecs(), microsecs()}. -type pcl_fetch_update() :: - {pcl_fetch_update, not_found|pcl_level(), microsecs()}. + {pcl_fetch_update, not_found | pcl_level(), microsecs()}. -type sst_fetch_update() :: - {sst_fetch_update, - leveled_pmanifest:lsm_level(), sst_fetch_type(), microsecs()}. + {sst_fetch_update, leveled_pmanifest:lsm_level(), sst_fetch_type(), + microsecs()}. -type cdb_get_update() :: {cdb_get_update, pos_integer(), microsecs(), microsecs()}. -type statistic() :: - bookie_get_update()|bookie_head_update()|bookie_put_update()| - bookie_snap_update()| - pcl_fetch_update()|sst_fetch_update()|cdb_get_update(). + bookie_get_update() + | bookie_head_update() + | bookie_put_update() + | bookie_snap_update() + | pcl_fetch_update() + | sst_fetch_update() + | cdb_get_update(). -export_type([monitor/0, timing/0, sst_fetch_type/0, log_type/0]). @@ -175,7 +198,8 @@ monitor_start(LogFreq, LogOrder) -> {ok, Monitor} = gen_server:start_link( - ?MODULE, [leveled_log:get_opts(), LogFreq, LogOrder], []), + ?MODULE, [leveled_log:get_opts(), LogFreq, LogOrder], [] + ), {ok, Monitor}. -spec add_stat(pid(), statistic()) -> ok. @@ -186,7 +210,7 @@ add_stat(Watcher, Statistic) -> report_stats(Watcher, StatsType) -> gen_server:cast(Watcher, {report_stats, StatsType}). --spec monitor_close(pid()|no_monitor) -> ok. +-spec monitor_close(pid() | no_monitor) -> ok. monitor_close(no_monitor) -> ok; monitor_close(Watcher) -> @@ -204,7 +228,7 @@ log_add(Pid, ForcedLogs) -> log_remove(Pid, ForcedLogs) -> gen_server:cast(Pid, {log_remove, ForcedLogs}). --spec maybe_time(monitor()) -> erlang:timestamp()|no_timing. +-spec maybe_time(monitor()) -> erlang:timestamp() | no_timing. maybe_time({_Pid, TimingProbability}) -> case rand:uniform(100) of N when N =< TimingProbability -> @@ -214,8 +238,9 @@ maybe_time({_Pid, TimingProbability}) -> end. -spec step_time( - erlang:timestamp()|no_timing) -> - {pos_integer(), erlang:timestamp()}|{no_timing, no_timing}. + erlang:timestamp() | no_timing +) -> + {pos_integer(), erlang:timestamp()} | {no_timing, no_timing}. step_time(no_timing) -> {no_timing, no_timing}; step_time(TS) -> @@ -232,14 +257,17 @@ get_defaults() -> init([LogOpts, LogFrequency, LogOrder]) -> leveled_log:save(LogOpts), - RandomLogOrder = + RandomLogOrder = lists:map( fun({_R, SL}) -> SL end, lists:keysort( 1, lists:map( fun(L) -> {rand:uniform(), L} end, - LogOrder))), + LogOrder + ) + ) + ), InitialJitter = rand:uniform(2 * 1000 * LogFrequency), erlang:send_after(InitialJitter, self(), report_next_stats), {ok, #state{log_frequency = LogFrequency, log_order = RandomLogOrder}}. @@ -283,13 +311,17 @@ handle_cast({bookie_get_update, HeadTime, BodyTime}, State) -> {FC0, HT0, BT0} = case BodyTime of not_found -> - {Timings#bookie_get_timings.fetch_count, + { + Timings#bookie_get_timings.fetch_count, Timings#bookie_get_timings.head_time + HeadTime, - Timings#bookie_get_timings.body_time}; + Timings#bookie_get_timings.body_time + }; BodyTime -> - {Timings#bookie_get_timings.fetch_count + 1, + { + Timings#bookie_get_timings.fetch_count + 1, Timings#bookie_get_timings.head_time + HeadTime, - Timings#bookie_get_timings.body_time + BodyTime} + Timings#bookie_get_timings.body_time + BodyTime + } end, UpdTimings = Timings#bookie_get_timings{ @@ -374,14 +406,14 @@ handle_cast({pcl_fetch_update, Level, FetchTime}, State) -> found3_time = Timings#pcl_fetch_timings.found3_time + FetchTime }; - N when N > 3 -> + N when N > 3 -> Timings#pcl_fetch_timings{ foundlower_count = Timings#pcl_fetch_timings.foundlower_count + 1, foundlower_time = Timings#pcl_fetch_timings.foundlower_time + FetchTime } - end, + end, UpdTimings0 = UpdTimings#pcl_fetch_timings{sample_count = SC0}, {noreply, State#state{pcl_fetch_timings = UpdTimings0}}; handle_cast({sst_fetch_update, Level, FetchPoint, FetchTime}, State) -> @@ -425,8 +457,8 @@ handle_cast({sst_fetch_update, Level, FetchPoint, FetchTime}, State) -> } end, UpdLevel = {Level, UpdTimings#sst_fetch_timings{sample_count = SC0}}, - UpdLevels = - lists:ukeysort(1, [UpdLevel|State#state.sst_fetch_timings]), + UpdLevels = + lists:ukeysort(1, [UpdLevel | State#state.sst_fetch_timings]), {noreply, State#state{sst_fetch_timings = UpdLevels}}; handle_cast({cdb_get_update, CycleCount, IndexTime, ReadTime}, State) -> Timings = State#state.cdb_get_timings, @@ -447,25 +479,30 @@ handle_cast({report_stats, bookie_get}, State) -> SamplePeriod = timer:now_diff( os:timestamp(), - Timings#bookie_get_timings.sample_start_time) div 1000000, + Timings#bookie_get_timings.sample_start_time + ) div 1000000, leveled_log:log( b0016, - [Timings#bookie_get_timings.sample_count, + [ + Timings#bookie_get_timings.sample_count, Timings#bookie_get_timings.head_time, Timings#bookie_get_timings.body_time, Timings#bookie_get_timings.fetch_count, SamplePeriod - ]), + ] + ), {noreply, State#state{bookie_get_timings = #bookie_get_timings{}}}; handle_cast({report_stats, bookie_head}, State) -> Timings = State#state.bookie_head_timings, SamplePeriod = timer:now_diff( os:timestamp(), - Timings#bookie_head_timings.sample_start_time) div 1000000, + Timings#bookie_head_timings.sample_start_time + ) div 1000000, leveled_log:log( b0018, - [Timings#bookie_head_timings.sample_count, + [ + Timings#bookie_head_timings.sample_count, Timings#bookie_head_timings.cache_count, Timings#bookie_head_timings.found_count, Timings#bookie_head_timings.fetch_ledger_time, @@ -473,47 +510,56 @@ handle_cast({report_stats, bookie_head}, State) -> Timings#bookie_head_timings.rsp_time, Timings#bookie_head_timings.notfound_time, SamplePeriod - ]), + ] + ), {noreply, State#state{bookie_head_timings = #bookie_head_timings{}}}; handle_cast({report_stats, bookie_put}, State) -> Timings = State#state.bookie_put_timings, SamplePeriod = timer:now_diff( os:timestamp(), - Timings#bookie_put_timings.sample_start_time) div 1000000, + Timings#bookie_put_timings.sample_start_time + ) div 1000000, leveled_log:log( b0015, - [Timings#bookie_put_timings.sample_count, + [ + Timings#bookie_put_timings.sample_count, Timings#bookie_put_timings.ink_time, Timings#bookie_put_timings.prep_time, Timings#bookie_put_timings.mem_time, Timings#bookie_put_timings.total_size, SamplePeriod - ]), + ] + ), {noreply, State#state{bookie_put_timings = #bookie_put_timings{}}}; handle_cast({report_stats, bookie_snap}, State) -> Timings = State#state.bookie_snap_timings, SamplePeriod = timer:now_diff( os:timestamp(), - Timings#bookie_snap_timings.sample_start_time) div 1000000, + Timings#bookie_snap_timings.sample_start_time + ) div 1000000, leveled_log:log( b0017, - [Timings#bookie_snap_timings.sample_count, + [ + Timings#bookie_snap_timings.sample_count, Timings#bookie_snap_timings.bookie_time, Timings#bookie_snap_timings.pcl_time, SamplePeriod - ]), + ] + ), {noreply, State#state{bookie_snap_timings = #bookie_snap_timings{}}}; handle_cast({report_stats, pcl_fetch}, State) -> Timings = State#state.pcl_fetch_timings, SamplePeriod = timer:now_diff( os:timestamp(), - Timings#pcl_fetch_timings.sample_start_time) div 1000000, + Timings#pcl_fetch_timings.sample_start_time + ) div 1000000, leveled_log:log( p0032, - [Timings#pcl_fetch_timings.sample_count, + [ + Timings#pcl_fetch_timings.sample_count, Timings#pcl_fetch_timings.foundmem_time, Timings#pcl_fetch_timings.found0_time, Timings#pcl_fetch_timings.found1_time, @@ -529,7 +575,8 @@ handle_cast({report_stats, pcl_fetch}, State) -> Timings#pcl_fetch_timings.foundlower_count, Timings#pcl_fetch_timings.notfound_count, SamplePeriod - ]), + ] + ), {noreply, State#state{pcl_fetch_timings = #pcl_fetch_timings{}}}; handle_cast({report_stats, sst_fetch}, State) -> LogFun = @@ -537,10 +584,12 @@ handle_cast({report_stats, sst_fetch}, State) -> SamplePeriod = timer:now_diff( os:timestamp(), - Timings#sst_fetch_timings.sample_start_time) div 1000000, + Timings#sst_fetch_timings.sample_start_time + ) div 1000000, leveled_log:log( sst12, - [Level, + [ + Level, Timings#sst_fetch_timings.sample_count, Timings#sst_fetch_timings.notfound_time, Timings#sst_fetch_timings.fetchcache_time, @@ -551,7 +600,8 @@ handle_cast({report_stats, sst_fetch}, State) -> Timings#sst_fetch_timings.slotcached_count, Timings#sst_fetch_timings.slotnoncached_count, SamplePeriod - ]) + ] + ) end, lists:foreach(LogFun, State#state.sst_fetch_timings), {noreply, State#state{sst_fetch_timings = []}}; @@ -560,15 +610,18 @@ handle_cast({report_stats, cdb_get}, State) -> SamplePeriod = timer:now_diff( os:timestamp(), - Timings#cdb_get_timings.sample_start_time) div 1000000, + Timings#cdb_get_timings.sample_start_time + ) div 1000000, leveled_log:log( cdb19, - [Timings#cdb_get_timings.sample_count, + [ + Timings#cdb_get_timings.sample_count, Timings#cdb_get_timings.cycle_count, Timings#cdb_get_timings.index_time, Timings#cdb_get_timings.read_time, SamplePeriod - ]), + ] + ), {noreply, State#state{cdb_get_timings = #cdb_get_timings{}}}; handle_cast({log_level, LogLevel}, State) -> ok = leveled_log:set_loglevel(LogLevel), @@ -582,22 +635,22 @@ handle_cast({log_remove, ForcedLogs}, State) -> handle_info(report_next_stats, State) -> erlang:send_after( - State#state.log_frequency * 1000, self(), report_next_stats), + State#state.log_frequency * 1000, self(), report_next_stats + ), case State#state.log_order of [] -> {noreply, State}; - [NextStat|TailLogOrder] -> + [NextStat | TailLogOrder] -> ok = report_stats(self(), NextStat), {noreply, State#state{log_order = TailLogOrder ++ [NextStat]}} end. terminate(_Reason, _State) -> ok. - + code_change(_OldVsn, State, _Extra) -> {ok, State}. - %%%============================================================================ %%% Test %%%============================================================================ @@ -615,4 +668,4 @@ coverage_cheat_test() -> % Can close, so empty log_order hasn't crashed ok = monitor_close(M). --endif. \ No newline at end of file +-endif. diff --git a/src/leveled_pclerk.erl b/src/leveled_pclerk.erl index 245ce98b..5e0af8df 100644 --- a/src/leveled_pclerk.erl +++ b/src/leveled_pclerk.erl @@ -25,34 +25,35 @@ -include("leveled.hrl"). -export([ - init/1, - handle_call/3, - handle_cast/2, - handle_info/2, - terminate/2, - code_change/3 - ]). + init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3 +]). -export([ - clerk_new/3, - clerk_prompt/1, - clerk_push/2, - clerk_close/1, - clerk_promptdeletions/2, - clerk_loglevel/2, - clerk_addlogs/2, - clerk_removelogs/2 - ]). + clerk_new/3, + clerk_prompt/1, + clerk_push/2, + clerk_close/1, + clerk_promptdeletions/2, + clerk_loglevel/2, + clerk_addlogs/2, + clerk_removelogs/2 +]). -define(MAX_TIMEOUT, 2000). -define(MIN_TIMEOUT, 200). -define(GROOMING_PERC, 50). --record(state, {owner :: pid()|undefined, - root_path :: string()|undefined, - pending_deletions = dict:new() :: dict:dict(), - sst_options :: sst_options() - }). +-record(state, { + owner :: pid() | undefined, + root_path :: string() | undefined, + pending_deletions = dict:new() :: dict:dict(), + sst_options :: sst_options() +}). -type sst_options() :: #sst_options{}. @@ -61,13 +62,18 @@ %%%============================================================================ -spec clerk_new( - pid(), string(), sst_options()) -> {ok, pid()}. + pid(), string(), sst_options() +) -> {ok, pid()}. clerk_new(Owner, RootPath, OptsSST) -> - {ok, Pid} = - gen_server:start_link(?MODULE, - [leveled_log:get_opts(), - {sst_options, OptsSST}], - []), + {ok, Pid} = + gen_server:start_link( + ?MODULE, + [ + leveled_log:get_opts(), + {sst_options, OptsSST} + ], + [] + ), ok = gen_server:call(Pid, {load, Owner, RootPath}, infinity), leveled_log:log(pc001, [Pid, Owner]), {ok, Pid}. @@ -81,8 +87,9 @@ clerk_promptdeletions(Pid, ManifestSQN) -> gen_server:cast(Pid, {prompt_deletions, ManifestSQN}). -spec clerk_push( - pid(), {leveled_pmanifest:lsm_level(), leveled_pmanifest:manifest()}) -> - ok. + pid(), {leveled_pmanifest:lsm_level(), leveled_pmanifest:manifest()} +) -> + ok. clerk_push(Pid, Work) -> gen_server:cast(Pid, {push_work, Work}). @@ -117,23 +124,27 @@ init([LogOpts, {sst_options, OptsSST}]) -> {ok, #state{sst_options = OptsSST}}. handle_call({load, Owner, RootPath}, _From, State) -> - {reply, ok, State#state{owner=Owner, root_path=RootPath}, ?MIN_TIMEOUT}; + {reply, ok, State#state{owner = Owner, root_path = RootPath}, ?MIN_TIMEOUT}; handle_call(close, _From, State) -> {stop, normal, ok, State}. handle_cast(prompt, State) -> handle_info(timeout, State); handle_cast( - {push_work, Work}, State = #state{root_path = RP, owner = PCL}) - when ?IS_DEF(RP), is_pid(PCL) -> + {push_work, Work}, State = #state{root_path = RP, owner = PCL} +) when + ?IS_DEF(RP), is_pid(PCL) +-> {ManifestSQN, Deletions} = handle_work(Work, RP, State#state.sst_options, PCL), PDs = dict:store(ManifestSQN, Deletions, State#state.pending_deletions), leveled_log:log(pc022, [ManifestSQN]), {noreply, State#state{pending_deletions = PDs}, ?MIN_TIMEOUT}; handle_cast( - {prompt_deletions, ManifestSQN}, State = #state{owner = PCL}) - when is_pid(PCL) -> + {prompt_deletions, ManifestSQN}, State = #state{owner = PCL} +) when + is_pid(PCL) +-> {Deletions, UpdD} = return_deletions(ManifestSQN, State#state.pending_deletions), ok = notify_deletions(Deletions, PCL), @@ -154,7 +165,7 @@ handle_cast({remove_logs, ForcedLogs}, State) -> SSTopts0 = SSTopts#sst_options{log_options = leveled_log:get_opts()}, {noreply, State#state{sst_options = SSTopts0}}. -handle_info(timeout, State = #state{owner = PCL}) when is_pid(PCL) -> +handle_info(timeout, State = #state{owner = PCL}) when is_pid(PCL) -> ok = leveled_penciller:pcl_workforclerk(PCL), % When handling work, the clerk can collect a large number of binary % references, so proactively GC this process before receiving any future @@ -169,18 +180,21 @@ terminate(Reason, _State) -> code_change(_OldVsn, State, _Extra) -> {ok, State}. - %%%============================================================================ %%% Internal functions %%%============================================================================ -spec handle_work( {leveled_pmanifest:lsm_level(), leveled_pmanifest:manifest()}, - string(), sst_options(), pid()) -> - {pos_integer(), list(leveled_pmanifest:manifest_entry())}. + string(), + sst_options(), + pid() +) -> + {pos_integer(), list(leveled_pmanifest:manifest_entry())}. handle_work( - {SrcLevel, Manifest}, RootPath, SSTOpts, Owner) -> - {UpdManifest, EntriesToDelete} = + {SrcLevel, Manifest}, RootPath, SSTOpts, Owner +) -> + {UpdManifest, EntriesToDelete} = merge(SrcLevel, Manifest, RootPath, SSTOpts), leveled_log:log(pc007, []), SWMC = os:timestamp(), @@ -192,10 +206,12 @@ handle_work( {leveled_pmanifest:get_manifest_sqn(UpdManifest), EntriesToDelete}. -spec merge( - leveled_pmanifest:lsm_level(), leveled_pmanifest:manifest(), - string(), sst_options()) -> - {leveled_pmanifest:manifest(), - list(leveled_pmanifest:manifest_entry())}. + leveled_pmanifest:lsm_level(), + leveled_pmanifest:manifest(), + string(), + sst_options() +) -> + {leveled_pmanifest:manifest(), list(leveled_pmanifest:manifest_entry())}. merge(SrcLevel, Manifest, RootPath, OptsSST) -> case leveled_pmanifest:report_manifest_level(Manifest, SrcLevel + 1) of {0, 0, undefined, 0, 0, 0, 0} -> @@ -203,10 +219,12 @@ merge(SrcLevel, Manifest, RootPath, OptsSST) -> {FCnt, MnMem, {MaxFN, MaxP, MaxMem}, MnHBS, MnHS, MnLHS, MnBVHS} -> leveled_log:log( pc023, - [SrcLevel + 1, FCnt, MnMem, MaxFN, MaxP, MaxMem]), + [SrcLevel + 1, FCnt, MnMem, MaxFN, MaxP, MaxMem] + ), leveled_log:log( pc025, - [SrcLevel + 1, FCnt, MnHBS, MnHS, MnLHS, MnBVHS]) + [SrcLevel + 1, FCnt, MnHBS, MnHS, MnLHS, MnBVHS] + ) end, SelectMethod = case rand:uniform(100) of @@ -248,25 +266,30 @@ merge(SrcLevel, Manifest, RootPath, OptsSST) -> {Man0, []}; _ -> SST_RP = leveled_penciller:sst_rootpath(RootPath), - perform_merge(Manifest, - Src, SinkList, SrcLevel, - SST_RP, NewSQN, OptsSST) + perform_merge( + Manifest, + Src, + SinkList, + SrcLevel, + SST_RP, + NewSQN, + OptsSST + ) end. -spec notify_deletions(list(leveled_pmanifest:manifest_entry()), pid()) -> ok. notify_deletions([], _Penciller) -> ok; -notify_deletions([Head|Tail], Penciller) -> +notify_deletions([Head | Tail], Penciller) -> ok = leveled_sst:sst_setfordelete( leveled_pmanifest:entry_owner(Head), Penciller ), notify_deletions(Tail, Penciller). - %% Assumption is that there is a single SST from a higher level that needs -%% to be merged into multiple SSTs at a lower level. +%% to be merged into multiple SSTs at a lower level. %% %% SrcLevel is the level of the src sst file, the sink should be srcLevel + 1 perform_merge(Manifest, Src, SinkList, SrcLevel, RootPath, NewSQN, OptsSST) -> @@ -280,11 +303,15 @@ perform_merge(Manifest, Src, SinkList, SrcLevel, RootPath, NewSQN, OptsSST) -> SinkBasement = leveled_pmanifest:is_basement(Manifest, SinkLevel), MaxMergeBelow = OptsSST#sst_options.max_mergebelow, MergeLimit = merge_limit(SrcLevel, length(SinkList), MaxMergeBelow), - {L2Additions, L1Additions, L2FileRemainder} = + {L2Additions, L1Additions, L2FileRemainder} = do_merge( - SrcList, SinkList, - SinkLevel, SinkBasement, - RootPath, NewSQN, MaxSQN, + SrcList, + SinkList, + SinkLevel, + SinkBasement, + RootPath, + NewSQN, + MaxSQN, OptsSST, [], MergeLimit @@ -321,12 +348,15 @@ perform_merge(Manifest, Src, SinkList, SrcLevel, RootPath, NewSQN, OptsSST) -> PartialFiles ) end, - {Man1, [Src|SinkManifestRemovals]}. + {Man1, [Src | SinkManifestRemovals]}. -spec merge_limit( - non_neg_integer(), non_neg_integer(), pos_integer()|infinity) - -> pos_integer()|infinity. -merge_limit(SrcLevel, SinkListLength, MMB) when SrcLevel =< 1; SinkListLength < MMB -> + non_neg_integer(), non_neg_integer(), pos_integer() | infinity +) -> + pos_integer() | infinity. +merge_limit(SrcLevel, SinkListLength, MMB) when + SrcLevel =< 1; SinkListLength < MMB +-> infinity; merge_limit(SrcLevel, SinkListLength, MMB) when is_integer(MMB) -> AdditionsLimit = max(1, MMB div 2), @@ -334,11 +364,11 @@ merge_limit(SrcLevel, SinkListLength, MMB) when is_integer(MMB) -> AdditionsLimit. -type merge_maybe_expanded_pointer() :: - leveled_codec:ledger_kv()| - leveled_sst:slot_pointer()| - leveled_sst:sst_pointer(). - % Different to leveled_sst:maybe_expanded_pointer/0 - % No sst_closed_pointer() + leveled_codec:ledger_kv() + | leveled_sst:slot_pointer() + | leveled_sst:sst_pointer(). +% Different to leveled_sst:maybe_expanded_pointer/0 +% No sst_closed_pointer() -spec do_merge( list(merge_maybe_expanded_pointer()), @@ -350,19 +380,23 @@ merge_limit(SrcLevel, SinkListLength, MMB) when is_integer(MMB) -> pos_integer(), leveled_sst:sst_options(), list(leveled_pmanifest:manifest_entry()), - pos_integer()|infinity) -> - { - list(leveled_pmanifest:manifest_entry()), - list(leveled_pmanifest:manifest_entry()), - list(leveled_sst:sst_pointer()) - }. + pos_integer() | infinity +) -> + { + list(leveled_pmanifest:manifest_entry()), + list(leveled_pmanifest:manifest_entry()), + list(leveled_sst:sst_pointer()) + }. do_merge( - [], [], SinkLevel, _SinkB, _RP, NewSQN, _MaxSQN, _Opts, Additions, _Max) -> + [], [], SinkLevel, _SinkB, _RP, NewSQN, _MaxSQN, _Opts, Additions, _Max +) -> leveled_log:log(pc011, [NewSQN, SinkLevel, length(Additions), full]), {lists:reverse(Additions), [], []}; do_merge( - KL1, KL2, SinkLevel, SinkB, RP, NewSQN, MaxSQN, OptsSST, Additions, Max) - when length(Additions) >= Max -> + KL1, KL2, SinkLevel, SinkB, RP, NewSQN, MaxSQN, OptsSST, Additions, Max +) when + length(Additions) >= Max +-> leveled_log:log(pc011, [NewSQN, SinkLevel, length(Additions), partial]), FNSrc = leveled_penciller:sst_filename( @@ -376,14 +410,14 @@ do_merge( {ExpandedKL2, L2FilePointersRem} = split_unexpanded_files(KL2), TS1 = os:timestamp(), InfOpts = OptsSST#sst_options{max_sstslots = infinity}, - % Need to be careful to make sure all the remainder goes in one file, - % could be situations whereby the max_sstslots has been changed between - % restarts - and so there is too much data for one file in the - % remainder ... but don't want to loop round and consider more complex - % scenarios here. + % Need to be careful to make sure all the remainder goes in one file, + % could be situations whereby the max_sstslots has been changed between + % restarts - and so there is too much data for one file in the + % remainder ... but don't want to loop round and consider more complex + % scenarios here. NewMergeKL1 = leveled_sst:sst_newmerge( - RP, FNSrc,ExpandedKL1, [], false, SinkLevel - 1, MaxSQN, InfOpts + RP, FNSrc, ExpandedKL1, [], false, SinkLevel - 1, MaxSQN, InfOpts ), TS2 = os:timestamp(), NewMergeKL2 = @@ -394,7 +428,8 @@ do_merge( {KL2Additions, [], []} = add_entry(NewMergeKL2, FNSnk, TS2, Additions), {lists:reverse(KL2Additions), KL1Additions, L2FilePointersRem}; do_merge( - KL1, KL2, SinkLevel, SinkB, RP, NewSQN, MaxSQN, OptsSST, Additions, Max) -> + KL1, KL2, SinkLevel, SinkB, RP, NewSQN, MaxSQN, OptsSST, Additions, Max +) -> FileName = leveled_penciller:sst_filename( NewSQN, SinkLevel, length(Additions) @@ -403,7 +438,8 @@ do_merge( TS1 = os:timestamp(), NewMerge = leveled_sst:sst_newmerge( - RP, FileName, KL1, KL2, SinkB, SinkLevel, MaxSQN, OptsSST), + RP, FileName, KL1, KL2, SinkB, SinkLevel, MaxSQN, OptsSST + ), {UpdAdditions, KL1Rem, KL2Rem} = add_entry(NewMerge, FileName, TS1, Additions), do_merge( @@ -426,45 +462,48 @@ add_entry({ok, Pid, Reply, Bloom}, FileName, TS1, Additions) -> {{KL1Rem, KL2Rem}, SmallestKey, HighestKey} = Reply, Entry = leveled_pmanifest:new_entry( - SmallestKey, HighestKey, Pid, FileName, Bloom), + SmallestKey, HighestKey, Pid, FileName, Bloom + ), leveled_log:log_timer(pc015, [], TS1), - {[Entry|Additions], KL1Rem, KL2Rem}. - + {[Entry | Additions], KL1Rem, KL2Rem}. -spec split_unexpanded_files( - list(merge_maybe_expanded_pointer())) -> - { - list(leveled_codec:ledger_kv()|leveled_sst:slot_pointer()), - list(leveled_sst:sst_pointer()) - }. + list(merge_maybe_expanded_pointer()) +) -> + { + list(leveled_codec:ledger_kv() | leveled_sst:slot_pointer()), + list(leveled_sst:sst_pointer()) + }. split_unexpanded_files(Pointers) -> split_unexpanded_files(Pointers, [], []). -spec split_unexpanded_files( list(merge_maybe_expanded_pointer()), - list(leveled_codec:ledger_kv()|leveled_sst:slot_pointer()), - list(leveled_sst:sst_pointer())) -> - { - list(leveled_codec:ledger_kv()|leveled_sst:slot_pointer()), - list(leveled_sst:sst_pointer()) - }. + list(leveled_codec:ledger_kv() | leveled_sst:slot_pointer()), + list(leveled_sst:sst_pointer()) +) -> + { + list(leveled_codec:ledger_kv() | leveled_sst:slot_pointer()), + list(leveled_sst:sst_pointer()) + }. split_unexpanded_files([], MaybeExpanded, FilePointers) -> {lists:reverse(MaybeExpanded), lists:reverse(FilePointers)}; -split_unexpanded_files([{next, P, SK}|Rest], MaybeExpanded, FilePointers) -> - split_unexpanded_files(Rest, MaybeExpanded, [{next, P, SK}|FilePointers]); -split_unexpanded_files([{LK, LV}|Rest], MaybeExpanded, []) -> - % Should never see this, once a FilePointer has been seen - split_unexpanded_files(Rest, [{LK, LV}|MaybeExpanded], []); -split_unexpanded_files([{pointer, P, SIV, SK, EK}|Rest], MaybeExpanded, []) -> - % Should never see this, once a FilePointer has been seen +split_unexpanded_files([{next, P, SK} | Rest], MaybeExpanded, FilePointers) -> + split_unexpanded_files(Rest, MaybeExpanded, [{next, P, SK} | FilePointers]); +split_unexpanded_files([{LK, LV} | Rest], MaybeExpanded, []) -> + % Should never see this, once a FilePointer has been seen + split_unexpanded_files(Rest, [{LK, LV} | MaybeExpanded], []); +split_unexpanded_files([{pointer, P, SIV, SK, EK} | Rest], MaybeExpanded, []) -> + % Should never see this, once a FilePointer has been seen split_unexpanded_files( - Rest, [{pointer, P, SIV, SK, EK}|MaybeExpanded], [] + Rest, [{pointer, P, SIV, SK, EK} | MaybeExpanded], [] ). -spec grooming_scorer( - list(leveled_pmanifest:manifest_entry())) - -> leveled_pmanifest:manifest_entry(). -grooming_scorer([ME | MEs]) -> + list(leveled_pmanifest:manifest_entry()) +) -> + leveled_pmanifest:manifest_entry(). +grooming_scorer([ME | MEs]) -> InitTombCount = leveled_sst:sst_gettombcount(leveled_pmanifest:entry_owner(ME)), {HighestTC, BestME} = grooming_scorer(InitTombCount, ME, MEs), @@ -472,7 +511,7 @@ grooming_scorer([ME | MEs]) -> BestME. grooming_scorer(HighestTC, BestME, []) -> - {HighestTC, BestME}; + {HighestTC, BestME}; grooming_scorer(HighestTC, BestME, [ME | MEs]) -> TombCount = leveled_sst:sst_gettombcount(leveled_pmanifest:entry_owner(ME)), @@ -481,7 +520,7 @@ grooming_scorer(HighestTC, BestME, [ME | MEs]) -> grooming_scorer(TombCount, ME, MEs); false -> grooming_scorer(HighestTC, BestME, MEs) - end. + end. return_deletions(ManifestSQN, PendingDeletionD) -> % The returning of deletions had been seperated out as a failure to fetch @@ -511,11 +550,15 @@ generate_randomkeys(0, Acc, _BucketLow, _BucketHigh) -> generate_randomkeys(Count, Acc, BucketLow, BRange) -> BNumber = lists:flatten( - io_lib:format("~4..0B", - [BucketLow + rand:uniform(BRange)])), + io_lib:format( + "~4..0B", + [BucketLow + rand:uniform(BRange)] + ) + ), KNumber = lists:flatten( - io_lib:format("~4..0B", [rand:uniform(1000)])), + io_lib:format("~4..0B", [rand:uniform(1000)]) + ), K = { o, @@ -523,12 +566,11 @@ generate_randomkeys(Count, Acc, BucketLow, BRange) -> list_to_binary("Key" ++ KNumber), null }, - RandKey = {K, {Count + 1, - {active, infinity}, - leveled_codec:segment_hash(K), - null}}, - generate_randomkeys(Count - 1, [RandKey|Acc], BucketLow, BRange). - + RandKey = + {K, { + Count + 1, {active, infinity}, leveled_codec:segment_hash(K), null + }}, + generate_randomkeys(Count - 1, [RandKey | Acc], BucketLow, BRange). grooming_score_test() -> ok = filelib:ensure_dir("test/test_area/ledger_files/"), @@ -536,41 +578,51 @@ grooming_score_test() -> KL2_L3 = lists:sort(generate_randomkeys(2000, 101, 250)), KL3_L3 = lists:sort(generate_randomkeys(2000, 251, 300)), KL4_L3 = lists:sort(generate_randomkeys(2000, 301, 400)), - [{HeadK, HeadV}|RestKL2] = KL2_L3, - - {ok, PidL3_1, _, _} = - leveled_sst:sst_newmerge("test/test_area/ledger_files/", - "1_L3.sst", - KL1_L3, - [{HeadK, setelement(2, HeadV, tomb)} - |RestKL2], - false, - 3, - 999999, - #sst_options{}, - true), - {ok, PidL3_1B, _, _} = - leveled_sst:sst_newmerge("test/test_area/ledger_files/", - "1B_L3.sst", - KL1_L3, - [{HeadK, setelement(2, HeadV, tomb)} - |RestKL2], - true, - 3, - 999999, - #sst_options{}, - true), - - {ok, PidL3_2, _, _} = - leveled_sst:sst_newmerge("test/test_area/ledger_files/", - "2_L3.sst", - KL3_L3, - KL4_L3, - false, - 3, - 999999, - #sst_options{}, - true), + [{HeadK, HeadV} | RestKL2] = KL2_L3, + + {ok, PidL3_1, _, _} = + leveled_sst:sst_newmerge( + "test/test_area/ledger_files/", + "1_L3.sst", + KL1_L3, + [ + {HeadK, setelement(2, HeadV, tomb)} + | RestKL2 + ], + false, + 3, + 999999, + #sst_options{}, + true + ), + {ok, PidL3_1B, _, _} = + leveled_sst:sst_newmerge( + "test/test_area/ledger_files/", + "1B_L3.sst", + KL1_L3, + [ + {HeadK, setelement(2, HeadV, tomb)} + | RestKL2 + ], + true, + 3, + 999999, + #sst_options{}, + true + ), + + {ok, PidL3_2, _, _} = + leveled_sst:sst_newmerge( + "test/test_area/ledger_files/", + "2_L3.sst", + KL3_L3, + KL4_L3, + false, + 3, + 999999, + #sst_options{}, + true + ), DSK = {o, <<"B">>, <<"SK">>, null}, DEK = {o, <<"E">>, <<"EK">>, null}, ME1 = leveled_pmanifest:new_entry(DSK, DEK, PidL3_1, "dummyL3_1", none), @@ -578,20 +630,21 @@ grooming_score_test() -> ME2 = leveled_pmanifest:new_entry(DSK, DEK, PidL3_2, "dummyL3_2", none), ?assertMatch(ME1, grooming_scorer([ME1, ME2])), ?assertMatch(ME1, grooming_scorer([ME2, ME1])), - % prefer the file with the tombstone + % prefer the file with the tombstone ?assertMatch(ME1B, grooming_scorer([ME1B, ME2])), ?assertMatch(ME2, grooming_scorer([ME2, ME1B])), - % If the file with the tombstone is in the basement, it will have - % no tombstone so the first file will be chosen - - lists:foreach(fun(P) -> leveled_sst:sst_clear(P) end, - [PidL3_1, PidL3_1B, PidL3_2]). + % If the file with the tombstone is in the basement, it will have + % no tombstone so the first file will be chosen + lists:foreach( + fun(P) -> leveled_sst:sst_clear(P) end, + [PidL3_1, PidL3_1B, PidL3_2] + ). merge_file_test() -> ok = filelib:ensure_dir("test/test_area/ledger_files/"), KL1_L1 = lists:sort(generate_randomkeys(8000, 0, 1000)), - {ok, PidL1_1, _, _} = + {ok, PidL1_1, _, _} = leveled_sst:sst_new( "test/test_area/ledger_files/", "KL1_L1.sst", @@ -601,7 +654,7 @@ merge_file_test() -> #sst_options{} ), KL1_L2 = lists:sort(generate_randomkeys(8000, 0, 250)), - {ok, PidL2_1, _, _} = + {ok, PidL2_1, _, _} = leveled_sst:sst_new( "test/test_area/ledger_files/", "KL1_L2.sst", @@ -611,7 +664,7 @@ merge_file_test() -> #sst_options{} ), KL2_L2 = lists:sort(generate_randomkeys(8000, 250, 250)), - {ok, PidL2_2, _, _} = + {ok, PidL2_2, _, _} = leveled_sst:sst_new( "test/test_area/ledger_files/", "KL2_L2.sst", @@ -621,7 +674,7 @@ merge_file_test() -> #sst_options{press_method = lz4} ), KL3_L2 = lists:sort(generate_randomkeys(8000, 500, 250)), - {ok, PidL2_3, _, _} = + {ok, PidL2_3, _, _} = leveled_sst:sst_new( "test/test_area/ledger_files/", "KL3_L2.sst", @@ -631,7 +684,7 @@ merge_file_test() -> #sst_options{press_method = lz4} ), KL4_L2 = lists:sort(generate_randomkeys(8000, 750, 250)), - {ok, PidL2_4, _, _} = + {ok, PidL2_4, _, _} = leveled_sst:sst_new( "test/test_area/ledger_files/", "KL4_L2.sst", @@ -640,7 +693,7 @@ merge_file_test() -> 999999, #sst_options{press_method = lz4} ), - E1 = + E1 = leveled_pmanifest:new_entry( lists:nth(1, KL1_L1), lists:last(KL1_L1), @@ -648,7 +701,7 @@ merge_file_test() -> "./KL1_L1.sst", none ), - E2 = + E2 = leveled_pmanifest:new_entry( lists:nth(1, KL1_L2), lists:last(KL1_L2), @@ -656,7 +709,7 @@ merge_file_test() -> "./KL1_L2.sst", none ), - E3 = + E3 = leveled_pmanifest:new_entry( lists:nth(1, KL2_L2), lists:last(KL2_L2), @@ -664,7 +717,7 @@ merge_file_test() -> "./KL2_L2.sst", none ), - E4 = + E4 = leveled_pmanifest:new_entry( lists:nth(1, KL3_L2), lists:last(KL3_L2), @@ -672,7 +725,7 @@ merge_file_test() -> "./KL3_L2.sst", none ), - E5 = + E5 = leveled_pmanifest:new_entry( lists:nth(1, KL4_L2), lists:last(KL4_L2), @@ -680,27 +733,37 @@ merge_file_test() -> "./KL4_L2.sst", none ), - + Man0 = leveled_pmanifest:new_manifest(), Man1 = leveled_pmanifest:insert_manifest_entry(Man0, 1, 2, E2), Man2 = leveled_pmanifest:insert_manifest_entry(Man1, 1, 2, E3), Man3 = leveled_pmanifest:insert_manifest_entry(Man2, 1, 2, E4), Man4 = leveled_pmanifest:insert_manifest_entry(Man3, 1, 2, E5), Man5 = leveled_pmanifest:insert_manifest_entry(Man4, 2, 1, E1), - PointerList = lists:map(fun(ME) -> {next, ME, all} end, - [E2, E3, E4, E5]), - {Man6, _Dels} = - perform_merge(Man5, E1, PointerList, 1, - "test/test_area/ledger_files/", - 3, #sst_options{}), - + PointerList = lists:map( + fun(ME) -> {next, ME, all} end, + [E2, E3, E4, E5] + ), + {Man6, _Dels} = + perform_merge( + Man5, + E1, + PointerList, + 1, + "test/test_area/ledger_files/", + 3, + #sst_options{} + ), + ?assertMatch(3, leveled_pmanifest:get_manifest_sqn(Man6)), - - lists:foreach(fun(P) -> leveled_sst:sst_clear(P) end, - [PidL1_1, PidL2_1, PidL2_2, PidL2_3, PidL2_4]). + + lists:foreach( + fun(P) -> leveled_sst:sst_clear(P) end, + [PidL1_1, PidL2_1, PidL2_2, PidL2_3, PidL2_4] + ). coverage_cheat_test() -> {ok, _State1} = - code_change(null, #state{sst_options=#sst_options{}}, null). + code_change(null, #state{sst_options = #sst_options{}}, null). -endif. diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index 0a432ae3..b2ee31a5 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -4,7 +4,7 @@ %% persisted, ordered view of non-recent Keys and Metadata which have been %% added to the store. %% - The penciller maintains a manifest of all the files within the current -%% Ledger. +%% Ledger. %% - The Penciller provides re-write (compaction) work up to be managed by %% the Penciller's Clerk %% - The Penciller can be cloned and maintains a register of clones who have @@ -26,7 +26,7 @@ %% The Ledger is divided into many levels %% - L0: New keys are received from the Bookie and and kept in the levelzero %% cache, until that cache is the size of a SST file, and it is then persisted -%% as a SST file at this level. L0 SST files can be larger than the normal +%% as a SST file at this level. L0 SST files can be larger than the normal %% maximum size - so we don't have to consider problems of either having more %% than one L0 file (and handling what happens on a crash between writing the %% files when the second may have overlapping sequence numbers), or having a @@ -127,7 +127,7 @@ %% and then write a new manifest file that represents that state with using %% the next Manifest sequence number as the filename: %% - nonzero_.pnd -%% +%% %% The Penciller on accepting the change should rename the manifest file to - %% - nonzero_.crr %% @@ -161,44 +161,47 @@ -include("leveled.hrl"). -export([ - init/1, - handle_call/3, - handle_cast/2, - handle_info/2, - terminate/2, - code_change/3, - format_status/1]). + init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3, + format_status/1 +]). -export([ - pcl_snapstart/1, - pcl_start/1, - pcl_pushmem/2, - pcl_fetchlevelzero/3, - pcl_fetch/4, - pcl_fetchkeys/5, - pcl_fetchkeys/6, - pcl_fetchkeysbysegment/8, - pcl_fetchnextkey/5, - pcl_checksequencenumber/3, - pcl_workforclerk/1, - pcl_manifestchange/2, - pcl_confirml0complete/5, - pcl_confirmdelete/3, - pcl_close/1, - pcl_doom/1, - pcl_releasesnapshot/2, - pcl_registersnapshot/5, - pcl_getstartupsequencenumber/1, - pcl_checkbloomtest/2, - pcl_checkforwork/1, - pcl_persistedsqn/1, - pcl_loglevel/2, - pcl_addlogs/2, - pcl_removelogs/2]). + pcl_snapstart/1, + pcl_start/1, + pcl_pushmem/2, + pcl_fetchlevelzero/3, + pcl_fetch/4, + pcl_fetchkeys/5, + pcl_fetchkeys/6, + pcl_fetchkeysbysegment/8, + pcl_fetchnextkey/5, + pcl_checksequencenumber/3, + pcl_workforclerk/1, + pcl_manifestchange/2, + pcl_confirml0complete/5, + pcl_confirmdelete/3, + pcl_close/1, + pcl_doom/1, + pcl_releasesnapshot/2, + pcl_registersnapshot/5, + pcl_getstartupsequencenumber/1, + pcl_checkbloomtest/2, + pcl_checkforwork/1, + pcl_persistedsqn/1, + pcl_loglevel/2, + pcl_addlogs/2, + pcl_removelogs/2 +]). -export([ - sst_rootpath/1, - sst_filename/3]). + sst_rootpath/1, + sst_filename/3 +]). -export([pcl_getsstpids/1, pcl_getclerkpid/1]). @@ -217,101 +220,113 @@ -define(SUPER_MAX_TABLE_SIZE, 40000). -define(WORKQUEUE_BACKLOG_TOLERANCE, 4). -define(COIN_SIDECOUNT, 4). --define(SLOW_FETCH, 500000). % Log a very slow fetch - longer than 500ms +% Log a very slow fetch - longer than 500ms +-define(SLOW_FETCH, 500000). -define(FOLD_SCANWIDTH, 32). -define(ITERATOR_SCANWIDTH, 4). -define(ITERATOR_MINSCANWIDTH, 1). -define(SHUTDOWN_LOOPS, 10). -define(SHUTDOWN_PAUSE, 10000). - % How long to wait for snapshots to be released on shutdown - % before forcing closure of snapshots - % 10s may not be long enough for all snapshots, but avoids crashes of - % short-lived queries racing with the shutdown - --record(state, {manifest :: - leveled_pmanifest:manifest() | undefined | redacted, - % Can be undefined in some snapshots, but must always be - % defined when using a primary penciller - query_manifest :: - {list(), - leveled_codec:ledger_key(), - leveled_codec:ledger_key()} | undefined, - % Slimmed down version of the manifest containing part - % related to specific query, and the StartKey/EndKey - % used to extract this part - % Only found in snapshots - - persisted_sqn = 0 :: integer(), % The highest SQN persisted - ledger_sqn = 0 :: integer(), % The highest SQN added to L0 - - levelzero_pending = false :: boolean(), - levelzero_constructor :: pid() | undefined, - levelzero_cache = [] :: levelzero_cache() | redacted, - levelzero_size = 0 :: integer(), - levelzero_maxcachesize = 0 :: non_neg_integer(), - % Will default to 0 in snapshots (when not required) - levelzero_cointoss = false :: boolean(), - levelzero_index :: - leveled_pmem:index_array() | undefined | redacted, - levelzero_astree :: list() | undefined | redacted, - - root_path = "test" :: string(), - clerk :: pid() | undefined, - % Can only be undefined in a snapshot - - is_snapshot = false :: boolean(), - snapshot_fully_loaded = false :: boolean(), - snapshot_time :: pos_integer() | undefined, - source_penciller :: pid() | undefined, - bookie_monref :: reference() | undefined, - - work_ongoing = false :: boolean(), % i.e. compaction work - work_backlog = false :: boolean(), % i.e. compaction work - - pending_removals = [] :: list(string()), - maybe_release = false :: boolean(), - - snaptimeout_short :: pos_integer()|undefined, - snaptimeout_long :: pos_integer()|undefined, - - monitor = {no_monitor, 0} :: leveled_monitor:monitor(), - - sst_options = #sst_options{} :: sst_options(), - - shutdown_loops = ?SHUTDOWN_LOOPS :: non_neg_integer() - }). - +% How long to wait for snapshots to be released on shutdown +% before forcing closure of snapshots +% 10s may not be long enough for all snapshots, but avoids crashes of +% short-lived queries racing with the shutdown + +-record(state, { + manifest :: + leveled_pmanifest:manifest() | undefined | redacted, + % Can be undefined in some snapshots, but must always be + % defined when using a primary penciller + query_manifest :: + {list(), leveled_codec:ledger_key(), leveled_codec:ledger_key()} + | undefined, + % Slimmed down version of the manifest containing part + % related to specific query, and the StartKey/EndKey + % used to extract this part + % Only found in snapshots + + % The highest SQN persisted + persisted_sqn = 0 :: integer(), + % The highest SQN added to L0 + ledger_sqn = 0 :: integer(), + + levelzero_pending = false :: boolean(), + levelzero_constructor :: pid() | undefined, + levelzero_cache = [] :: levelzero_cache() | redacted, + levelzero_size = 0 :: integer(), + levelzero_maxcachesize = 0 :: non_neg_integer(), + % Will default to 0 in snapshots (when not required) + levelzero_cointoss = false :: boolean(), + levelzero_index :: + leveled_pmem:index_array() | undefined | redacted, + levelzero_astree :: list() | undefined | redacted, + + root_path = "test" :: string(), + clerk :: pid() | undefined, + % Can only be undefined in a snapshot + + is_snapshot = false :: boolean(), + snapshot_fully_loaded = false :: boolean(), + snapshot_time :: pos_integer() | undefined, + source_penciller :: pid() | undefined, + bookie_monref :: reference() | undefined, + + % i.e. compaction work + work_ongoing = false :: boolean(), + % i.e. compaction work + work_backlog = false :: boolean(), + + pending_removals = [] :: list(string()), + maybe_release = false :: boolean(), + + snaptimeout_short :: pos_integer() | undefined, + snaptimeout_long :: pos_integer() | undefined, + + monitor = {no_monitor, 0} :: leveled_monitor:monitor(), + + sst_options = #sst_options{} :: sst_options(), + + shutdown_loops = ?SHUTDOWN_LOOPS :: non_neg_integer() +}). -type penciller_options() :: #penciller_options{}. -type bookies_memory() :: - { - ets:table()|tuple()|empty_cache, - array:array()|empty_array, - integer()|infinity, - integer() - }. + { + ets:table() | tuple() | empty_cache, + array:array() | empty_array, + integer() | infinity, + integer() + }. -type pcl_state() :: #state{}. -type levelzero_cacheentry() :: {pos_integer(), leveled_tree:leveled_tree()}. -type levelzero_cache() :: list(levelzero_cacheentry()). --type sqn_check() :: current|replaced|missing. +-type sqn_check() :: current | replaced | missing. +%% erlfmt:ignore - issues with editors when function definitions are split -type sst_fetchfun() :: - fun((pid(), - leveled_codec:ledger_key(), - leveled_codec:segment_hash(), - non_neg_integer()) -> - leveled_codec:ledger_kv()|not_present). + fun(( + pid(), + leveled_codec:ledger_key(), + leveled_codec:segment_hash(), + non_neg_integer() + ) + -> leveled_codec:ledger_kv() | not_present + ). -type levelzero_returnfun() :: fun((levelzero_cacheentry()) -> ok). +%% erlfmt:ignore - issues with editors when function definitions are split -type pclacc_fun() :: - fun((leveled_codec:object_key(), - leveled_codec:ledger_value(), - dynamic()) -> dynamic()). + fun((leveled_codec:object_key(), leveled_codec:ledger_value(), dynamic()) + -> dynamic() + ). -type sst_options() :: #sst_options{}. -export_type( - [levelzero_cacheentry/0, + [ + levelzero_cacheentry/0, levelzero_returnfun/0, sqn_check/0, - pclacc_fun/0]). + pclacc_fun/0 + ] +). %%%============================================================================ %%% API @@ -327,7 +342,7 @@ %% When starting a clone a query can also be passed. This prevents the whole %% Level Zero memory space from being copied to the snapshot, instead the %% query is run against the level zero space and just the query results are -%% copied into the clone. +%% copied into the clone. pcl_start(PCLopts) -> {ok, Pcl} = gen_server:start_link(?MODULE, [leveled_log:get_opts(), PCLopts], []), @@ -341,7 +356,7 @@ pcl_snapstart(PCLopts) -> gen_server:start(?MODULE, [leveled_log:get_opts(), PCLopts], []), {ok, PclSnap}. --spec pcl_pushmem(pid(), bookies_memory()) -> ok|returned. +-spec pcl_pushmem(pid(), bookies_memory()) -> ok | returned. %% @doc %% Load the contents of the Bookie's memory of recent additions to the Ledger %% to the Ledger proper. @@ -359,10 +374,12 @@ pcl_pushmem(Pid, LedgerCache) -> %% Bookie to dump memory onto penciller gen_server:call(Pid, {push_mem, LedgerCache}, infinity). --spec pcl_fetchlevelzero(pid(), - non_neg_integer(), - fun((levelzero_cacheentry()) -> ok)) - -> ok. +-spec pcl_fetchlevelzero( + pid(), + non_neg_integer(), + fun((levelzero_cacheentry()) -> ok) +) -> + ok. %% @doc %% Allows a single slot of the penciller's levelzero cache to be fetched. The %% levelzero cache can be up to 40K keys - sending this to the process that is @@ -380,17 +397,19 @@ pcl_fetchlevelzero(Pid, Slot, ReturnFun) -> % be stuck in L0 pending gen_server:cast(Pid, {fetch_levelzero, Slot, ReturnFun}). --spec pcl_fetch(pid(), - leveled_codec:ledger_key(), - leveled_codec:segment_hash(), - boolean()) -> leveled_codec:ledger_kv()|not_present. +-spec pcl_fetch( + pid(), + leveled_codec:ledger_key(), + leveled_codec:segment_hash(), + boolean() +) -> leveled_codec:ledger_kv() | not_present. %% @doc %% Fetch a key, return the first (highest SQN) occurrence of that Key along %% with the value. %% %% Hash should be result of leveled_codec:segment_hash(Key) %% The L0Index cannot be used when in head_only mode - as although such keys -%% are fetchable no index entries are created whne added to the ledger cache +%% are fetchable no index entries are created whne added to the ledger cache pcl_fetch(Pid, Key, Hash, UseL0Index) -> gen_server:call(Pid, {fetch, Key, Hash, UseL0Index}, infinity). @@ -399,7 +418,8 @@ pcl_fetch(Pid, Key, Hash, UseL0Index) -> leveled_codec:query_key(), leveled_codec:query_key(), pclacc_fun(), - dynamic()) -> dynamic(). + dynamic() +) -> dynamic(). %% @doc %% Run a range query between StartKey and EndKey (inclusive). This will cover %% all keys in the range - so must only be run against snapshots of the @@ -413,51 +433,62 @@ pcl_fetchkeys(Pid, StartKey, EndKey, AccFun, InitAcc) -> pcl_fetchkeys(Pid, StartKey, EndKey, AccFun, InitAcc, as_pcl). -spec pcl_fetchkeys - (pid(), + ( + pid(), leveled_codec:query_key(), leveled_codec:query_key(), pclacc_fun(), dynamic(), - as_pcl) -> dynamic(); - (pid(), + as_pcl + ) -> dynamic(); + ( + pid(), leveled_codec:query_key(), leveled_codec:query_key(), pclacc_fun(), dynamic(), - by_runner) -> fun(() -> dynamic()). + by_runner + ) -> fun(() -> dynamic()). pcl_fetchkeys(Pid, StartKey, EndKey, AccFun, InitAcc, By) -> - gen_server:call(Pid, - {fetch_keys, - StartKey, EndKey, - AccFun, InitAcc, - false, false, -1, - By}, - infinity). - + gen_server:call( + Pid, + {fetch_keys, StartKey, EndKey, AccFun, InitAcc, false, false, -1, By}, + infinity + ). -spec pcl_fetchkeysbysegment( - pid(), - leveled_codec:ledger_key(), - leveled_codec:ledger_key(), - pclacc_fun(), any(), + pid(), + leveled_codec:ledger_key(), + leveled_codec:ledger_key(), + pclacc_fun(), + any(), leveled_codec:segment_list(), false | leveled_codec:lastmod_range(), - boolean()) -> any(). + boolean() +) -> any(). %% @doc %% Run a range query between StartKey and EndKey (inclusive). This will cover %% all keys in the range - so must only be run against snapshots of the -%% penciller to avoid blocking behaviour. +%% penciller to avoid blocking behaviour. %% -%% This version allows an additional input of a SegChecker. This is a list +%% This version allows an additional input of a SegChecker. This is a list %% of 16-bit integers representing the segment IDs band ((2 ^ 16) -1) that %% are interesting to the fetch %% %% Note that segment must be false unless the object Tag supports additional %% indexing by segment. This cannot be used on ?IDX_TAG and other tags that %% use the no_lookup hash -pcl_fetchkeysbysegment(Pid, StartKey, EndKey, AccFun, InitAcc, - SegmentList, LastModRange, LimitByCount) -> - {MaxKeys, InitAcc0} = +pcl_fetchkeysbysegment( + Pid, + StartKey, + EndKey, + AccFun, + InitAcc, + SegmentList, + LastModRange, + LimitByCount +) -> + {MaxKeys, InitAcc0} = case LimitByCount of true -> % The passed in accumulator should have the Max Key Count @@ -466,18 +497,20 @@ pcl_fetchkeysbysegment(Pid, StartKey, EndKey, AccFun, InitAcc, false -> {-1, InitAcc} end, - gen_server:call(Pid, - {fetch_keys, - StartKey, EndKey, AccFun, InitAcc0, - SegmentList, LastModRange, MaxKeys, - by_runner}, - infinity). + gen_server:call( + Pid, + {fetch_keys, StartKey, EndKey, AccFun, InitAcc0, SegmentList, + LastModRange, MaxKeys, by_runner}, + infinity + ). -spec pcl_fetchnextkey( - pid(), - leveled_codec:ledger_key(), - leveled_codec:ledger_key(), - pclacc_fun(), any()) -> any(). + pid(), + leveled_codec:ledger_key(), + leveled_codec:ledger_key(), + pclacc_fun(), + any() +) -> any(). %% @doc %% Run a range query between StartKey and EndKey (inclusive). This has the %% same constraints as pcl_fetchkeys/5, but will only return the first key @@ -485,14 +518,14 @@ pcl_fetchkeysbysegment(Pid, StartKey, EndKey, AccFun, InitAcc, pcl_fetchnextkey(Pid, StartKey, EndKey, AccFun, InitAcc) -> gen_server:call( Pid, - {fetch_keys, - StartKey, EndKey, AccFun, InitAcc, false, false, 1, as_pcl - }, + {fetch_keys, StartKey, EndKey, AccFun, InitAcc, false, false, 1, + as_pcl}, infinity ). -spec pcl_checksequencenumber( - pid(), leveled_codec:ledger_key(), integer()) -> sqn_check(). + pid(), leveled_codec:ledger_key(), integer() +) -> sqn_check(). %% @doc %% Check if the sequence number of the passed key is not replaced by a change %% after the passed sequence number. Will return: @@ -521,14 +554,16 @@ pcl_workforclerk(Pid) -> pcl_manifestchange(Pid, Manifest) -> gen_server:cast(Pid, {manifest_change, Manifest}). --spec pcl_confirml0complete(pid(), - string(), - leveled_codec:ledger_key(), - leveled_codec:ledger_key(), - binary()) -> ok. +-spec pcl_confirml0complete( + pid(), + string(), + leveled_codec:ledger_key(), + leveled_codec:ledger_key(), + binary() +) -> ok. %% @doc -%% Allows a SST writer that has written a L0 file to confirm that the file -%% is now complete, so the filename and key ranges can be added to the +%% Allows a SST writer that has written a L0 file to confirm that the file +%% is now complete, so the filename and key ranges can be added to the %% manifest and the file can be used in place of the in-memory levelzero %% cache. pcl_confirml0complete(Pid, FN, StartKey, EndKey, Bloom) -> @@ -544,7 +579,7 @@ pcl_confirmdelete(Pid, FileName, FilePid) -> -spec pcl_getstartupsequencenumber(pid()) -> integer(). %% @doc -%% At startup the penciller will get the largest sequence number that is +%% At startup the penciller will get the largest sequence number that is %% within the persisted files. This function allows for this sequence number %% to be fetched - so that it can be used to determine parts of the Ledger %% which may have been lost in the last shutdown (so that the ledger can @@ -552,31 +587,34 @@ pcl_confirmdelete(Pid, FileName, FilePid) -> pcl_getstartupsequencenumber(Pid) -> gen_server:call(Pid, get_startup_sqn, infinity). --spec pcl_registersnapshot(pid(), - pid(), - no_lookup|{tuple(), tuple()}|undefined, - bookies_memory(), - boolean()) - -> {ok, pcl_state()}. +-spec pcl_registersnapshot( + pid(), + pid(), + no_lookup | {tuple(), tuple()} | undefined, + bookies_memory(), + boolean() +) -> + {ok, pcl_state()}. %% @doc %% Register a snapshot of the penciller, returning a state record from the %% penciller for the snapshot to use as its LoopData pcl_registersnapshot(Pid, Snapshot, Query, BookiesMem, LR) -> - gen_server:call(Pid, - {register_snapshot, Snapshot, Query, BookiesMem, LR}, - infinity). + gen_server:call( + Pid, + {register_snapshot, Snapshot, Query, BookiesMem, LR}, + infinity + ). -spec pcl_releasesnapshot(pid(), pid()) -> ok. %% @doc -%% Inform the primary penciller that a snapshot is finished, so that the +%% Inform the primary penciller that a snapshot is finished, so that the %% penciller can allow deletes to proceed if appropriate. pcl_releasesnapshot(Pid, Snapshot) -> gen_server:cast(Pid, {release_snapshot, Snapshot}). - -spec pcl_persistedsqn(pid()) -> integer(). %% @doc -%% Return the persisted SQN, the highest SQN which has been persisted into the +%% Return the persisted SQN, the highest SQN which has been persisted into the %% Ledger pcl_persistedsqn(Pid) -> gen_server:call(Pid, persisted_sqn, infinity). @@ -609,7 +647,7 @@ pcl_doom(Pid) -> -spec pcl_checkbloomtest(pid(), tuple()) -> boolean(). %% @doc -%% Function specifically added to help testing. In particular to make sure +%% Function specifically added to help testing. In particular to make sure %% that blooms are still available after pencllers have been re-loaded from %% disk. pcl_checkbloomtest(Pid, Key) -> @@ -661,44 +699,50 @@ pcl_getclerkpid(Pid) -> init([LogOpts, PCLopts]) -> leveled_log:save(LogOpts), - case {PCLopts#penciller_options.root_path, + case + { + PCLopts#penciller_options.root_path, PCLopts#penciller_options.start_snapshot, PCLopts#penciller_options.snapshot_query, PCLopts#penciller_options.bookies_mem, PCLopts#penciller_options.source_penciller - } of - {undefined, _Snapshot=true, Query, BookiesMem, SrcPenciller} - when ?IS_DEF(BookiesMem), ?IS_DEF(SrcPenciller) -> + } + of + {undefined, _Snapshot = true, Query, BookiesMem, SrcPenciller} when + ?IS_DEF(BookiesMem), ?IS_DEF(SrcPenciller) + -> LongRunning = PCLopts#penciller_options.snapshot_longrunning, %% monitor the bookie, and close the snapshot when bookie %% exits - BookieMonitor = + BookieMonitor = erlang:monitor(process, PCLopts#penciller_options.bookies_pid), {ok, State} = pcl_registersnapshot( - SrcPenciller, self(), Query, BookiesMem, LongRunning), + SrcPenciller, self(), Query, BookiesMem, LongRunning + ), leveled_log:log(p0001, [self()]), - {ok, - State#state{ - is_snapshot = true, - clerk = undefined, - bookie_monref = BookieMonitor, - source_penciller = SrcPenciller}}; - {_RootPath, _Snapshot=false, _Q, _BM, _SP} -> + {ok, State#state{ + is_snapshot = true, + clerk = undefined, + bookie_monref = BookieMonitor, + source_penciller = SrcPenciller + }}; + {_RootPath, _Snapshot = false, _Q, _BM, _SP} -> start_from_file(PCLopts) - end. - + end. -handle_call({push_mem, {LedgerTable, PushedIdx, MinSQN, MaxSQN}}, - _From, - State=#state{is_snapshot=Snap}) when Snap == false -> +handle_call( + {push_mem, {LedgerTable, PushedIdx, MinSQN, MaxSQN}}, + _From, + State = #state{is_snapshot = Snap} +) when Snap == false -> % The push_mem process is as follows: % % 1. If either the penciller is still waiting on the last L0 file to be % written, or there is a work backlog - the cache is returned with the % expectation that PUTs should be slowed. Also if the cache has reached % the maximum number of lines (by default after 31 pushes from the bookie) - % + % % 2. If (1) does not apply, the bookie's cache will be added to the % penciller's cache. SW = os:timestamp(), @@ -718,7 +762,8 @@ handle_call({push_mem, {LedgerTable, PushedIdx, MinSQN, MaxSQN}}, % updated cache at a later time leveled_log:log( p0018, - [L0Size, L0Pending, WorkBacklog, CacheAlreadyFull]), + [L0Size, L0Pending, WorkBacklog, CacheAlreadyFull] + ), {reply, returned, State}; false -> % Return ok as cache has been updated on State and the Bookie @@ -730,12 +775,15 @@ handle_call({push_mem, {LedgerTable, PushedIdx, MinSQN, MaxSQN}}, false -> leveled_tree:from_orderedset(LedgerTable, ?CACHE_TYPE) end, - case leveled_pmem:add_to_cache( + case + leveled_pmem:add_to_cache( L0Size, {PushedTree, MinSQN, MaxSQN}, State#state.ledger_sqn, State#state.levelzero_cache, - true) of + true + ) + of empty_push -> {reply, ok, State}; {UpdMaxSQN, NewL0Size, UpdL0Cache} -> @@ -743,30 +791,35 @@ handle_call({push_mem, {LedgerTable, PushedIdx, MinSQN, MaxSQN}}, leveled_pmem:add_to_index( PushedIdx, State#state.levelzero_index, - length(State#state.levelzero_cache) + 1), + length(State#state.levelzero_cache) + 1 + ), leveled_log:log_randomtimer( p0031, - [NewL0Size, true, true, MinSQN, MaxSQN], SW, 0.1), - {reply, - ok, - State#state{ - levelzero_cache = UpdL0Cache, - levelzero_size = NewL0Size, - levelzero_index = UpdL0Index, - ledger_sqn = UpdMaxSQN}} + [NewL0Size, true, true, MinSQN, MaxSQN], + SW, + 0.1 + ), + {reply, ok, State#state{ + levelzero_cache = UpdL0Cache, + levelzero_size = NewL0Size, + levelzero_index = UpdL0Index, + ledger_sqn = UpdMaxSQN + }} end end; handle_call( - {fetch, Key, Hash, UseL0Index}, _From, State = #state{manifest = M}) - when ?IS_DEF(M) -> - L0Idx = - case UseL0Index of + {fetch, Key, Hash, UseL0Index}, _From, State = #state{manifest = M} +) when + ?IS_DEF(M) +-> + L0Idx = + case UseL0Index of true -> State#state.levelzero_index; false -> none end, - R = + R = timed_fetch_mem( Key, Hash, @@ -779,22 +832,23 @@ handle_call( handle_call( {check_sqn, Key, Hash, SQN}, _From, - State = #state{manifest = M, levelzero_cache = L0C, levelzero_index = L0I}) - % This is either a primary penciller, or snapshot taken without a query - % so that it contains the full level zero. - % Not to be used in head_only mode (where levelzero_index may not be - % complete) - when ?IS_DEF(M), ?IS_DEF(L0C), ?IS_DEF(L0I) -> - {reply, - compare_to_sqn(fetch_sqn(Key, Hash, M, L0C, L0I), SQN), - State}; -handle_call({fetch_keys, - StartKey, EndKey, - AccFun, InitAcc, - SegmentList, LastModRange, MaxKeys, By}, - _From, - State=#state{snapshot_fully_loaded=Ready}) - when Ready == true -> + State = #state{manifest = M, levelzero_cache = L0C, levelzero_index = L0I} +) when + % This is either a primary penciller, or snapshot taken without a query + % so that it contains the full level zero. + % Not to be used in head_only mode (where levelzero_index may not be + % complete) + ?IS_DEF(M), ?IS_DEF(L0C), ?IS_DEF(L0I) +-> + {reply, compare_to_sqn(fetch_sqn(Key, Hash, M, L0C, L0I), SQN), State}; +handle_call( + {fetch_keys, StartKey, EndKey, AccFun, InitAcc, SegmentList, LastModRange, + MaxKeys, By}, + _From, + State = #state{snapshot_fully_loaded = Ready} +) when + Ready == true +-> LastModRange0 = case LastModRange of false -> @@ -810,27 +864,30 @@ handle_call({fetch_keys, StartKey, EndKey, State#state.levelzero_cache, - leveled_tree:empty(?CACHE_TYPE)); + leveled_tree:empty(?CACHE_TYPE) + ); List -> List end, - SegChecker = + SegChecker = leveled_sst:segment_checker(leveled_sst:tune_seglist(SegmentList)), - FilteredL0 = + FilteredL0 = case SegChecker of false -> L0AsList; {Min, Max, CheckFun} -> FilterFun = fun(LKV) -> - CheckSeg = + CheckSeg = leveled_sst:extract_hash( - leveled_codec:strip_to_segmentonly(LKV)), + leveled_codec:strip_to_segmentonly(LKV) + ), case CheckSeg of - CheckSeg - when is_integer(CheckSeg), - CheckSeg >= Min, - CheckSeg =< Max -> + CheckSeg when + is_integer(CheckSeg), + CheckSeg >= Min, + CheckSeg =< Max + -> CheckFun(CheckSeg); _ -> false @@ -838,32 +895,36 @@ handle_call({fetch_keys, end, lists:filter(FilterFun, L0AsList) end, - + leveled_log:log_randomtimer( - p0037, [State#state.levelzero_size], SW, 0.01), - + p0037, [State#state.levelzero_size], SW, 0.01 + ), + %% Rename any reference to loop state that may be used by the function %% to be returned - https://github.com/martinsumner/leveled/issues/326 SSTiter = case State#state.query_manifest of undefined -> leveled_pmanifest:query_manifest( - State#state.manifest, StartKey, EndKey); - {QueryManifest, StartKeyQM, EndKeyQM} - when StartKey >= StartKeyQM, EndKey =< EndKeyQM -> + State#state.manifest, StartKey, EndKey + ); + {QueryManifest, StartKeyQM, EndKeyQM} when + StartKey >= StartKeyQM, EndKey =< EndKeyQM + -> QueryManifest - end, + end, SnapshotTime = State#state.snapshot_time, PersistedIterator = maps:from_list(SSTiter), - Folder = - fun() -> + Folder = + fun() -> keyfolder( maps:put(-1, FilteredL0, PersistedIterator), {StartKey, EndKey}, {AccFun, InitAcc, SnapshotTime}, - {SegChecker, LastModRange0, MaxKeys}) + {SegChecker, LastModRange0, MaxKeys} + ) end, - case By of + case By of as_pcl -> {reply, Folder(), State}; by_runner -> @@ -874,16 +935,18 @@ handle_call(get_startup_sqn, _From, State) -> handle_call( {register_snapshot, Snapshot, Query, BookiesMem, LongRunning}, _From, - State = #state{manifest = Manifest}) - when ?IS_DEF(Manifest) -> + State = #state{manifest = Manifest} +) when + ?IS_DEF(Manifest) +-> % Register and load a snapshot % % For setup of the snapshot to be efficient should pass a query % of (StartKey, EndKey) - this will avoid a fully copy of the penciller's % memory being required to be trasnferred to the clone. However, this % will not be a valid clone for fetch - - TimeO = + + TimeO = case LongRunning of true -> State#state.snaptimeout_long; @@ -901,7 +964,7 @@ handle_call( BookieIncrTree end, - {CloneState, ManifestClone, QueryManifest} = + {CloneState, ManifestClone, QueryManifest} = case Query of no_lookup -> {UpdMaxSQN, UpdSize, L0Cache} = @@ -910,30 +973,45 @@ handle_call( {LM1Cache, MinSQN, MaxSQN}, State#state.ledger_sqn, State#state.levelzero_cache, - false), - {#state{levelzero_cache = L0Cache, + false + ), + { + #state{ + levelzero_cache = L0Cache, ledger_sqn = UpdMaxSQN, levelzero_size = UpdSize, - persisted_sqn = State#state.persisted_sqn}, + persisted_sqn = State#state.persisted_sqn + }, leveled_pmanifest:copy_manifest(Manifest), - undefined}; + undefined + }; {StartKey, EndKey} -> SW = os:timestamp(), L0AsTree = - leveled_pmem:merge_trees(StartKey, - EndKey, - State#state.levelzero_cache, - LM1Cache), + leveled_pmem:merge_trees( + StartKey, + EndKey, + State#state.levelzero_cache, + LM1Cache + ), leveled_log:log_randomtimer( - p0037, [State#state.levelzero_size], SW, 0.01), - {#state{levelzero_astree = L0AsTree, + p0037, [State#state.levelzero_size], SW, 0.01 + ), + { + #state{ + levelzero_astree = L0AsTree, ledger_sqn = MaxSQN, - persisted_sqn = State#state.persisted_sqn}, + persisted_sqn = State#state.persisted_sqn + }, undefined, - {leveled_pmanifest:query_manifest( - Manifest, StartKey, EndKey), + { + leveled_pmanifest:query_manifest( + Manifest, StartKey, EndKey + ), StartKey, - EndKey}}; + EndKey + } + }; undefined -> {UpdMaxSQN, UpdSize, L0Cache} = leveled_pmem:add_to_cache( @@ -941,7 +1019,8 @@ handle_call( {LM1Cache, MinSQN, MaxSQN}, State#state.ledger_sqn, State#state.levelzero_cache, - false), + false + ), LM1Idx = case BookieIdx of empty_index -> @@ -951,35 +1030,42 @@ handle_call( end, L0Index = leveled_pmem:add_to_index( - LM1Idx, State#state.levelzero_index, length(L0Cache)), - {#state{levelzero_cache = L0Cache, + LM1Idx, State#state.levelzero_index, length(L0Cache) + ), + { + #state{ + levelzero_cache = L0Cache, levelzero_index = L0Index, levelzero_size = UpdSize, ledger_sqn = UpdMaxSQN, - persisted_sqn = State#state.persisted_sqn}, + persisted_sqn = State#state.persisted_sqn + }, leveled_pmanifest:copy_manifest(Manifest), - undefined} + undefined + } end, {reply, - {ok, - CloneState#state{ - snapshot_fully_loaded = true, - snapshot_time = leveled_util:integer_now(), - manifest = ManifestClone, - query_manifest = QueryManifest - } - }, + {ok, CloneState#state{ + snapshot_fully_loaded = true, + snapshot_time = leveled_util:integer_now(), + manifest = ManifestClone, + query_manifest = QueryManifest + }}, State#state{manifest = Manifest0}}; -handle_call(close, _From, State=#state{is_snapshot=Snap}) when Snap == true -> +handle_call(close, _From, State = #state{is_snapshot = Snap}) when + Snap == true +-> ok = pcl_releasesnapshot(State#state.source_penciller, self()), {stop, normal, ok, State}; handle_call( close, From, - State = #state{manifest = Manifest, clerk = Clerk, levelzero_cache = L0C}) - % By definition not a snapshot (as snapshot covered by clause above), - % so manifest, clerk and cache must all be present - when ?IS_DEF(Manifest), ?IS_DEF(Clerk), ?IS_DEF(L0C) -> + State = #state{manifest = Manifest, clerk = Clerk, levelzero_cache = L0C} +) when + % By definition not a snapshot (as snapshot covered by clause above), + % so manifest, clerk and cache must all be present + ?IS_DEF(Manifest), ?IS_DEF(Clerk), ?IS_DEF(L0C) +-> % Level 0 files lie outside of the manifest, and so if there is no L0 % file present it is safe to write the current contents of memory. If % there is a L0 file present - then the memory can be dropped (it is @@ -1002,7 +1088,8 @@ handle_call( L0C, length(L0C), State#state.sst_options, - true), + true + ), ok = leveled_sst:sst_close(Constructor); false -> leveled_log:log(p0010, [State#state.levelzero_size]) @@ -1010,18 +1097,22 @@ handle_call( gen_server:cast(self(), {maybe_defer_shutdown, close, From}), {noreply, State}; handle_call( - doom, From, State = #state{clerk = Clerk}) - when ?IS_DEF(Clerk) -> + doom, From, State = #state{clerk = Clerk} +) when + ?IS_DEF(Clerk) +-> leveled_log:log(p0030, []), ok = leveled_pclerk:clerk_close(Clerk), gen_server:cast(self(), {maybe_defer_shutdown, doom, From}), {noreply, State}; handle_call( - {checkbloom_fortest, Key, Hash}, _From, State = #state{manifest = Man}) - when ?IS_DEF(Man) -> - FoldFun = + {checkbloom_fortest, Key, Hash}, _From, State = #state{manifest = Man} +) when + ?IS_DEF(Man) +-> + FoldFun = fun(Level, Acc) -> - case Acc of + case Acc of true -> true; false -> @@ -1035,56 +1126,69 @@ handle_call( end, {reply, lists:foldl(FoldFun, false, lists:seq(0, ?MAX_LEVELS)), State}; handle_call( - check_for_work, _From, State = #state{manifest = Manifest}) - when ?IS_DEF(Manifest) -> + check_for_work, _From, State = #state{manifest = Manifest} +) when + ?IS_DEF(Manifest) +-> {_WL, WC} = leveled_pmanifest:check_for_work(Manifest), {reply, WC > 0, State}; handle_call(persisted_sqn, _From, State) -> {reply, State#state.persisted_sqn, State}; handle_call( - get_sstpids, _From, State = #state{manifest = Manifest}) - when ?IS_DEF(Manifest) -> + get_sstpids, _From, State = #state{manifest = Manifest} +) when + ?IS_DEF(Manifest) +-> {reply, leveled_pmanifest:get_sstpids(Manifest), State}; handle_call(get_clerkpid, _From, State) -> {reply, State#state.clerk, State}. handle_cast( {manifest_change, Manifest}, - State = #state{manifest = OldManifest, clerk = Clerk}) - when ?IS_DEF(OldManifest), ?IS_DEF(Clerk) -> + State = #state{manifest = OldManifest, clerk = Clerk} +) when + ?IS_DEF(OldManifest), ?IS_DEF(Clerk) +-> NewManSQN = leveled_pmanifest:get_manifest_sqn(Manifest), OldManSQN = leveled_pmanifest:get_manifest_sqn(OldManifest), leveled_log:log(p0041, [OldManSQN, NewManSQN]), % Only safe to update the manifest if the SQN increments - if NewManSQN > OldManSQN -> - ok = - leveled_pclerk:clerk_promptdeletions(Clerk, NewManSQN), + if + NewManSQN > OldManSQN -> + ok = + leveled_pclerk:clerk_promptdeletions(Clerk, NewManSQN), % This is accepted as the new manifest, files may be deleted - UpdManifest0 = - leveled_pmanifest:merge_snapshot(OldManifest, Manifest), + UpdManifest0 = + leveled_pmanifest:merge_snapshot(OldManifest, Manifest), % Need to preserve the penciller's view of snapshots stored in % the manifest - UpdManifest1 = - leveled_pmanifest:clear_pending( - UpdManifest0, - lists:usort(State#state.pending_removals), - State#state.maybe_release), - {noreply, - State#state{ - manifest=UpdManifest1, + UpdManifest1 = + leveled_pmanifest:clear_pending( + UpdManifest0, + lists:usort(State#state.pending_removals), + State#state.maybe_release + ), + {noreply, State#state{ + manifest = UpdManifest1, pending_removals = [], maybe_release = false, - work_ongoing=false}} + work_ongoing = false + }} end; handle_cast( - {release_snapshot, Snapshot}, State = #state{manifest = Manifest}) - when ?IS_DEF(Manifest) -> + {release_snapshot, Snapshot}, State = #state{manifest = Manifest} +) when + ?IS_DEF(Manifest) +-> Manifest0 = leveled_pmanifest:release_snapshot(Manifest, Snapshot), leveled_log:log(p0003, [Snapshot]), - {noreply, State#state{manifest=Manifest0}}; -handle_cast({confirm_delete, PDFN, FilePid}, State=#state{is_snapshot=Snap}) - when Snap == false -> + {noreply, State#state{manifest = Manifest0}}; +handle_cast( + {confirm_delete, PDFN, FilePid}, State = #state{is_snapshot = Snap} +) when + Snap == false +-> % This is a two stage process. A file that is ready for deletion can be % checked against the manifest to prompt the deletion, however it must also % be removed from the manifest's list of pending deletes. This is only @@ -1103,20 +1207,20 @@ handle_cast({confirm_delete, PDFN, FilePid}, State=#state{is_snapshot=Snap}) true -> leveled_log:log(p0005, [PDFN]), ok = leveled_sst:sst_deleteconfirmed(FilePid), - case State#state.work_ongoing of + case State#state.work_ongoing of true -> - {noreply, - State#state{ - pending_removals = - [PDFN|State#state.pending_removals]}}; + {noreply, State#state{ + pending_removals = + [PDFN | State#state.pending_removals] + }}; false -> UpdManifest = leveled_pmanifest:clear_pending( State#state.manifest, [PDFN], - false), - {noreply, - State#state{manifest = UpdManifest}} + false + ), + {noreply, State#state{manifest = UpdManifest}} end; false -> case State#state.work_ongoing of @@ -1127,15 +1231,17 @@ handle_cast({confirm_delete, PDFN, FilePid}, State=#state{is_snapshot=Snap}) leveled_pmanifest:clear_pending( State#state.manifest, [], - true), - {noreply, - State#state{manifest = UpdManifest}} + true + ), + {noreply, State#state{manifest = UpdManifest}} end end; handle_cast( {levelzero_complete, FN, StartKey, EndKey, Bloom}, - State = #state{manifest = Man, levelzero_constructor = L0C, clerk = Clerk}) - when ?IS_DEF(Man), ?IS_DEF(L0C), ?IS_DEF(Clerk) -> + State = #state{manifest = Man, levelzero_constructor = L0C, clerk = Clerk} +) when + ?IS_DEF(Man), ?IS_DEF(L0C), ?IS_DEF(Clerk) +-> leveled_log:log(p0029, []), ManEntry = leveled_pmanifest:new_entry(StartKey, EndKey, L0C, FN, Bloom), ManifestSQN = leveled_pmanifest:get_manifest_sqn(Man) + 1, @@ -1143,19 +1249,27 @@ handle_cast( leveled_pmanifest:insert_manifest_entry(Man, ManifestSQN, 0, ManEntry), % Prompt clerk to ask about work - do this for every L0 roll ok = leveled_pclerk:clerk_prompt(Clerk), - {noreply, State#state{levelzero_cache=[], - levelzero_index=[], - levelzero_pending=false, - levelzero_constructor=undefined, - levelzero_size=0, - manifest=UpdMan, - persisted_sqn=State#state.ledger_sqn}}; + {noreply, State#state{ + levelzero_cache = [], + levelzero_index = [], + levelzero_pending = false, + levelzero_constructor = undefined, + levelzero_size = 0, + manifest = UpdMan, + persisted_sqn = State#state.ledger_sqn + }}; handle_cast( work_for_clerk, - State = #state{manifest = Man, levelzero_cache = L0Cache, clerk = Clerk}) - when ?IS_DEF(Man), ?IS_DEF(L0Cache), ?IS_DEF(Clerk) -> - case {(State#state.levelzero_pending or State#state.work_ongoing), - leveled_pmanifest:levelzero_present(Man)} of + State = #state{manifest = Man, levelzero_cache = L0Cache, clerk = Clerk} +) when + ?IS_DEF(Man), ?IS_DEF(L0Cache), ?IS_DEF(Clerk) +-> + case + { + (State#state.levelzero_pending or State#state.work_ongoing), + leveled_pmanifest:levelzero_present(Man) + } + of {true, _L0Present} -> % Work is blocked by ongoing activity {noreply, State}; @@ -1163,7 +1277,7 @@ handle_cast( % If L0 present, and no work ongoing - dropping L0 to L1 is the % priority ok = leveled_pclerk:clerk_push(Clerk, {0, Man}), - {noreply, State#state{work_ongoing=true}}; + {noreply, State#state{work_ongoing = true}}; {false, false} -> % No impediment to work - see what other work may be required % See if the in-memory cache requires rolling now @@ -1171,7 +1285,8 @@ handle_cast( maybe_cache_too_big( State#state.levelzero_size, State#state.levelzero_maxcachesize, - State#state.levelzero_cointoss), + State#state.levelzero_cointoss + ), CacheAlreadyFull = leveled_pmem:cache_full(L0Cache), % Check for a backlog of work {WL, WC} = leveled_pmanifest:check_for_work(Man), @@ -1181,7 +1296,7 @@ handle_cast( {noreply, State#state{work_backlog = false}}; {WC, true} when WC < ?WORKQUEUE_BACKLOG_TOLERANCE -> % Rolling the memory to create a new Level Zero file - % Must not do this if there is a work backlog beyond the + % Must not do this if there is a work backlog beyond the % tolerance, as then the backlog may never be addressed. NextSQN = leveled_pmanifest:get_manifest_sqn(Man) + 1, @@ -1193,55 +1308,63 @@ handle_cast( none, length(L0Cache), State#state.sst_options, - false), - {noreply, - State#state{ - levelzero_pending = true, - levelzero_constructor = Constructor, - work_backlog = false}}; + false + ), + {noreply, State#state{ + levelzero_pending = true, + levelzero_constructor = Constructor, + work_backlog = false + }}; {WC, L0Full} -> % Address the backlog of work, either because there is no % L0 work to do, or because the backlog has grown beyond % tolerance Backlog = WC >= ?WORKQUEUE_BACKLOG_TOLERANCE, leveled_log:log(p0024, [WC, Backlog, L0Full]), - [TL|_Tail] = WL, + [TL | _Tail] = WL, ok = leveled_pclerk:clerk_push(Clerk, {TL, Man}), - {noreply, - State#state{ - work_backlog = Backlog, work_ongoing = true}} + {noreply, State#state{ + work_backlog = Backlog, work_ongoing = true + }} end end; handle_cast( {fetch_levelzero, Slot, ReturnFun}, - State = #state{levelzero_cache = L0Cache}) - when ?IS_DEF(L0Cache) -> + State = #state{levelzero_cache = L0Cache} +) when + ?IS_DEF(L0Cache) +-> ReturnFun(lists:nth(Slot, State#state.levelzero_cache)), {noreply, State}; handle_cast({log_level, LogLevel}, State) -> update_clerk( - State#state.clerk, fun leveled_pclerk:clerk_loglevel/2, LogLevel), + State#state.clerk, fun leveled_pclerk:clerk_loglevel/2, LogLevel + ), SSTopts = State#state.sst_options, SSTopts0 = SSTopts#sst_options{log_options = leveled_log:get_opts()}, {noreply, State#state{sst_options = SSTopts0}}; handle_cast({add_logs, ForcedLogs}, State) -> update_clerk( - State#state.clerk, fun leveled_pclerk:clerk_addlogs/2, ForcedLogs), + State#state.clerk, fun leveled_pclerk:clerk_addlogs/2, ForcedLogs + ), ok = leveled_log:add_forcedlogs(ForcedLogs), SSTopts = State#state.sst_options, SSTopts0 = SSTopts#sst_options{log_options = leveled_log:get_opts()}, {noreply, State#state{sst_options = SSTopts0}}; handle_cast({remove_logs, ForcedLogs}, State) -> update_clerk( - State#state.clerk, fun leveled_pclerk:clerk_removelogs/2, ForcedLogs), + State#state.clerk, fun leveled_pclerk:clerk_removelogs/2, ForcedLogs + ), ok = leveled_log:remove_forcedlogs(ForcedLogs), SSTopts = State#state.sst_options, SSTopts0 = SSTopts#sst_options{log_options = leveled_log:get_opts()}, {noreply, State#state{sst_options = SSTopts0}}; handle_cast( {maybe_defer_shutdown, ShutdownType, From}, - State = #state{manifest = Manifest}) - when ?IS_DEF(Manifest) -> + State = #state{manifest = Manifest} +) when + ?IS_DEF(Manifest) +-> case length(leveled_pmanifest:snapshot_pids(Manifest)) of 0 -> gen_server:cast(self(), {complete_shutdown, ShutdownType, From}), @@ -1255,21 +1378,26 @@ handle_cast( leveled_log:log(p0042, [N]), timer:sleep(?SHUTDOWN_PAUSE div ?SHUTDOWN_LOOPS), gen_server:cast( - self(), {maybe_defer_shutdown, ShutdownType, From}), + self(), {maybe_defer_shutdown, ShutdownType, From} + ), {noreply, State#state{shutdown_loops = LoopCount - 1}}; 0 -> gen_server:cast( - self(), {complete_shutdown, ShutdownType, From}), + self(), {complete_shutdown, ShutdownType, From} + ), {noreply, State} end end; handle_cast( {complete_shutdown, ShutdownType, From}, - State = #state{manifest = Manifest}) - when ?IS_DEF(Manifest) -> + State = #state{manifest = Manifest} +) when + ?IS_DEF(Manifest) +-> lists:foreach( fun(Snap) -> ok = pcl_snapclose(Snap) end, - leveled_pmanifest:snapshot_pids(Manifest)), + leveled_pmanifest:snapshot_pids(Manifest) + ), shutdown_manifest(Manifest, State#state.levelzero_constructor), case ShutdownType of doom -> @@ -1284,14 +1412,16 @@ handle_cast( %% handle the bookie stopping and stop this snapshot handle_info( {'DOWN', BookieMonRef, process, _BookiePid, _Info}, - State=#state{bookie_monref = BookieMonRef, source_penciller = SrcPCL}) - when ?IS_DEF(SrcPCL) -> + State = #state{bookie_monref = BookieMonRef, source_penciller = SrcPCL} +) when + ?IS_DEF(SrcPCL) +-> ok = pcl_releasesnapshot(State#state.source_penciller, self()), {stop, normal, State}; handle_info(_Info, State) -> {noreply, State}. -terminate(Reason, _State=#state{is_snapshot=Snap}) when Snap == true -> +terminate(Reason, _State = #state{is_snapshot = Snap}) when Snap == true -> leveled_log:log(p0007, [Reason]); terminate(Reason, _State) -> leveled_log:log(p0011, [Reason]). @@ -1303,10 +1433,11 @@ format_status(Status) -> maps:update( state, State#state{ - manifest = redacted, + manifest = redacted, levelzero_cache = redacted, levelzero_index = redacted, - levelzero_astree = redacted}, + levelzero_astree = redacted + }, Status ); _ -> @@ -1316,7 +1447,6 @@ format_status(Status) -> code_change(_OldVsn, State, _Extra) -> {ok, State}. - %%%============================================================================ %%% Path functions %%%============================================================================ @@ -1328,8 +1458,8 @@ sst_rootpath(RootPath) -> sst_filename(ManSQN, Level, Count) -> lists:flatten( - io_lib:format("./~w_~w_~w" ++ ?SST_FILEX, [ManSQN, Level, Count])). - + io_lib:format("./~w_~w_~w" ++ ?SST_FILEX, [ManSQN, Level, Count]) + ). %%%============================================================================ %%% Internal functions @@ -1339,8 +1469,8 @@ sst_filename(ManSQN, Level, Count) -> -type update_loglevel_fun() :: fun((pid(), atom()) -> ok). -spec update_clerk - (pid()|undefined, update_loglevel_fun(), atom()) -> ok; - (pid()|undefined, update_forcedlogs_fun(), list(atom())) -> ok. + (pid() | undefined, update_loglevel_fun(), atom()) -> ok; + (pid() | undefined, update_forcedlogs_fun(), list(atom())) -> ok. update_clerk(undefined, _F, _T) -> ok; update_clerk(Clerk, F, T) when is_pid(Clerk) -> @@ -1348,14 +1478,16 @@ update_clerk(Clerk, F, T) when is_pid(Clerk) -> -spec start_from_file(penciller_options()) -> {ok, pcl_state()}. %% @doc -%% Normal start of a penciller (i.e. not a snapshot), needs to read the +%% Normal start of a penciller (i.e. not a snapshot), needs to read the %% filesystem and reconstruct the ledger from the files that it finds start_from_file( PCLopts = #penciller_options{ - root_path = RootPath, max_inmemory_tablesize = MaxTableSize} - ) - when ?IS_DEF(RootPath), ?IS_DEF(MaxTableSize) -> + root_path = RootPath, max_inmemory_tablesize = MaxTableSize + } +) when + ?IS_DEF(RootPath), ?IS_DEF(MaxTableSize) +-> RootPath = PCLopts#penciller_options.root_path, MaxTableSize = PCLopts#penciller_options.max_inmemory_tablesize, OptsSST = PCLopts#penciller_options.sst_options, @@ -1363,25 +1495,26 @@ start_from_file( SnapTimeoutShort = PCLopts#penciller_options.snaptimeout_short, SnapTimeoutLong = PCLopts#penciller_options.snaptimeout_long, - + {ok, MergeClerk} = leveled_pclerk:clerk_new(self(), RootPath, OptsSST), - + CoinToss = PCLopts#penciller_options.levelzero_cointoss, % Used to randomly defer the writing of L0 file. Intended to help with % vnode syncronisation issues (e.g. stop them all by default merging to % level zero concurrently) - + %% Open manifest Manifest0 = leveled_pmanifest:open_manifest(RootPath), OpenFun = fun(FN, Level) -> - {ok, Pid, {_FK, _LK}, Bloom} = + {ok, Pid, {_FK, _LK}, Bloom} = leveled_sst:sst_open( - sst_rootpath(RootPath), FN, OptsSST, Level), + sst_rootpath(RootPath), FN, OptsSST, Level + ), {Pid, Bloom} end, SQNFun = fun leveled_sst:sst_getmaxsequencenumber/1, - {MaxSQN, Manifest1, FileList} = + {MaxSQN, Manifest1, FileList} = leveled_pmanifest:load_manifest(Manifest0, OpenFun, SQNFun), leveled_log:log(p0014, [MaxSQN]), ManSQN = leveled_pmanifest:get_manifest_sqn(Manifest1), @@ -1394,24 +1527,27 @@ start_from_file( leveled_log:log(p0015, [L0FN]), L0Open = leveled_sst:sst_open( - sst_rootpath(RootPath), L0FN, OptsSST, 0), + sst_rootpath(RootPath), L0FN, OptsSST, 0 + ), {ok, L0Pid, {L0StartKey, L0EndKey}, Bloom} = L0Open, L0SQN = leveled_sst:sst_getmaxsequencenumber(L0Pid), L0Entry = leveled_pmanifest:new_entry( - L0StartKey, L0EndKey, L0Pid, L0FN, Bloom), - Manifest2 = + L0StartKey, L0EndKey, L0Pid, L0FN, Bloom + ), + Manifest2 = leveled_pmanifest:insert_manifest_entry( - Manifest1, ManSQN + 1, 0, L0Entry), + Manifest1, ManSQN + 1, 0, L0Entry + ), leveled_log:log(p0016, [L0SQN]), LedgerSQN = max(MaxSQN, L0SQN), { {Manifest2, LedgerSQN, LedgerSQN}, - [L0FN|FileList] + [L0FN | FileList] }; false -> leveled_log:log(p0017, []), - {{Manifest1, MaxSQN, MaxSQN}, FileList} + {{Manifest1, MaxSQN, MaxSQN}, FileList} end, ok = archive_files(RootPath, FileList0), { @@ -1432,8 +1568,7 @@ start_from_file( } }. - --spec shutdown_manifest(leveled_pmanifest:manifest(), pid()|undefined) -> ok. +-spec shutdown_manifest(leveled_pmanifest:manifest(), pid() | undefined) -> ok. %% @doc %% Shutdown all the SST files within the manifest shutdown_manifest(Manifest, L0Constructor) -> @@ -1451,7 +1586,7 @@ shutdown_manifest(Manifest, L0Constructor) -> ME end end, - ok = + ok = case check_alive(Owner) of true -> leveled_sst:sst_close(Owner); @@ -1462,7 +1597,7 @@ shutdown_manifest(Manifest, L0Constructor) -> leveled_pmanifest:close_manifest(Manifest, EntryCloseFun), EntryCloseFun(L0Constructor). --spec check_alive(pid()|undefined) -> boolean(). +-spec check_alive(pid() | undefined) -> boolean(). %% @doc %% Double-check a processis active before attempting to terminate check_alive(Owner) when is_pid(Owner) -> @@ -1472,23 +1607,23 @@ check_alive(_Owner) -> -spec archive_files(list(), list()) -> ok. %% @doc -%% Archive any sst files in the folder that have not been used to build the +%% Archive any sst files in the folder that have not been used to build the %% ledger at startup. They may have not deeleted as expected, so this saves -%% them off as non-SST fies to make it easier for an admin to garbage collect +%% them off as non-SST fies to make it easier for an admin to garbage collect %% theses files archive_files(RootPath, UsedFileList) -> {ok, AllFiles} = file:list_dir(sst_rootpath(RootPath)), FileCheckFun = fun(FN, UnusedFiles) -> FN0 = "./" ++ FN, - case filename:extension(FN0) of + case filename:extension(FN0) of ?SST_FILEX -> - case lists:member(FN0, UsedFileList) of + case lists:member(FN0, UsedFileList) of true -> UnusedFiles; false -> leveled_log:log(p0040, [FN0]), - [FN0|UnusedFiles] + [FN0 | UnusedFiles] end; _ -> UnusedFiles @@ -1496,26 +1631,31 @@ archive_files(RootPath, UsedFileList) -> end, RenameFun = fun(FN) -> - AltName = filename:join(sst_rootpath(RootPath), - filename:basename(FN, ?SST_FILEX)) - ++ ?ARCHIVE_FILEX, - file:rename(filename:join(sst_rootpath(RootPath), FN), - AltName) + AltName = + filename:join( + sst_rootpath(RootPath), + filename:basename(FN, ?SST_FILEX) + ) ++ + ?ARCHIVE_FILEX, + file:rename( + filename:join(sst_rootpath(RootPath), FN), + AltName + ) end, FilesToArchive = lists:foldl(FileCheckFun, [], AllFiles), lists:foreach(RenameFun, FilesToArchive), ok. - -spec maybe_cache_too_big( - pos_integer(), pos_integer(), boolean()) -> boolean(). + pos_integer(), pos_integer(), boolean() +) -> boolean(). %% @doc %% Is the cache too big - should it be flushed to on-disk Level 0 %% There exists some jitter to prevent all caches from flushing concurrently %% where there are multiple leveled instances on one machine. maybe_cache_too_big(NewL0Size, L0MaxSize, CoinToss) -> CacheTooBig = NewL0Size > L0MaxSize, - CacheMuchTooBig = + CacheMuchTooBig = NewL0Size > min(?SUPER_MAX_TABLE_SIZE, 2 * L0MaxSize), RandomFactor = case CoinToss of @@ -1532,13 +1672,18 @@ maybe_cache_too_big(NewL0Size, L0MaxSize, CoinToss) -> CacheTooBig and (RandomFactor or CacheMuchTooBig). -spec roll_memory( - pos_integer(), non_neg_integer(), string(), - levelzero_cache()|none, pos_integer(), - sst_options(), boolean()) - -> {pid(), leveled_ebloom:bloom()|none}. + pos_integer(), + non_neg_integer(), + string(), + levelzero_cache() | none, + pos_integer(), + sst_options(), + boolean() +) -> + {pid(), leveled_ebloom:bloom() | none}. %% @doc -%% Roll the in-memory cache into a L0 file. If this is done synchronously, -%% will return a bloom representing the contents of the file. +%% Roll the in-memory cache into a L0 file. If this is done synchronously, +%% will return a bloom representing the contents of the file. %% %% Casting a large object (the levelzero cache) to the SST file does not lead %% to an immediate return. With 32K keys in the TreeList it could take around @@ -1558,33 +1703,38 @@ roll_memory(NextManSQN, LedgerSQN, RootPath, none, CL, SSTOpts, false) -> fun(Slot, ReturnFun) -> pcl_fetchlevelzero(PCL, Slot, ReturnFun) end, {ok, Constructor, _} = leveled_sst:sst_newlevelzero( - L0Path, L0FN, CL, FetchFun, PCL, LedgerSQN, SSTOpts), + L0Path, L0FN, CL, FetchFun, PCL, LedgerSQN, SSTOpts + ), {Constructor, none}; roll_memory( - NextManSQN, LedgerSQN, RootPath, L0Cache, CL, SSTOpts, true) - when is_list(L0Cache) -> + NextManSQN, LedgerSQN, RootPath, L0Cache, CL, SSTOpts, true +) when + is_list(L0Cache) +-> L0Path = sst_rootpath(RootPath), L0FN = sst_filename(NextManSQN, 0, 0), FetchFun = fun(Slot) -> lists:nth(Slot, L0Cache) end, KVList = leveled_pmem:to_list(CL, FetchFun), {ok, Constructor, _, Bloom} = leveled_sst:sst_new( - L0Path, L0FN, 0, KVList, LedgerSQN, SSTOpts), + L0Path, L0FN, 0, KVList, LedgerSQN, SSTOpts + ), {Constructor, Bloom}. -spec timed_fetch_mem( tuple(), - {integer(), integer()}, + {integer(), integer()}, leveled_pmanifest:manifest(), - list(), + list(), leveled_pmem:index_array(), - leveled_monitor:monitor()) -> leveled_codec:ledger_kv()|not_found. + leveled_monitor:monitor() +) -> leveled_codec:ledger_kv() | not_found. %% @doc -%% Fetch the result from the penciller, starting by looking in the memory, +%% Fetch the result from the penciller, starting by looking in the memory, %% and if it is not found looking down level by level through the LSM tree. %% %% This allows for the request to be timed, and the timing result to be added -%% to the aggregate timings - so that timinings per level can be logged and +%% to the aggregate timings - so that timinings per level can be logged and %% the cost of requests dropping levels can be monitored. %% %% the result tuple includes the level at which the result was found. @@ -1601,17 +1751,18 @@ timed_fetch_mem(Key, Hash, Manifest, L0Cache, L0Index, Monitor) -> leveled_codec:segment_hash(), leveled_pmanifest:manifest(), list(), - leveled_pmem:index_array()) -> - not_present|leveled_codec:ledger_kv()|leveled_codec:sqn(). + leveled_pmem:index_array() +) -> + not_present | leveled_codec:ledger_kv() | leveled_codec:sqn(). %% @doc -%% Fetch the result from the penciller, starting by looking in the memory, +%% Fetch the result from the penciller, starting by looking in the memory, %% and if it is not found looking down level by level through the LSM tree. fetch_sqn(Key, Hash, Manifest, L0Cache, L0Index) -> R = fetch_mem(Key, Hash, Manifest, L0Cache, L0Index, fun sst_getsqn/4), element(1, R). fetch_mem(Key, Hash, Manifest, L0Cache, L0Index, FetchFun) -> - PosList = + PosList = case L0Index of none -> lists:seq(1, length(L0Cache)); @@ -1626,11 +1777,15 @@ fetch_mem(Key, Hash, Manifest, L0Cache, L0Index, FetchFun) -> {KV, memory} end. --spec fetch(tuple(), {integer(), integer()}, - leveled_pmanifest:manifest(), integer(), - sst_fetchfun()) -> {tuple()|not_present, integer()|basement}. +-spec fetch( + tuple(), + {integer(), integer()}, + leveled_pmanifest:manifest(), + integer(), + sst_fetchfun() +) -> {tuple() | not_present, integer() | basement}. %% @doc -%% Fetch from the persisted portion of the LSM tree, checking each level in +%% Fetch from the persisted portion of the LSM tree, checking each level in %% turn until a match is found. %% Levels can be skipped by checking the bloom for the relevant file at that %% level. @@ -1641,7 +1796,7 @@ fetch(Key, Hash, Manifest, Level, FetchFun) -> false -> fetch(Key, Hash, Manifest, Level + 1, FetchFun); FP -> - case leveled_pmanifest:check_bloom(Manifest, FP, Hash) of + case leveled_pmanifest:check_bloom(Manifest, FP, Hash) of true -> case FetchFun(FP, Key, Hash, Level) of not_present -> @@ -1653,7 +1808,7 @@ fetch(Key, Hash, Manifest, Level, FetchFun) -> fetch(Key, Hash, Manifest, Level + 1, FetchFun) end end. - + timed_sst_get(PID, Key, Hash, Level) -> SW = os:timestamp(), R = leveled_sst:sst_get(PID, Key, Hash), @@ -1676,10 +1831,11 @@ log_slowfetch(T0, R, PID, Level, FetchTolerance) -> end. -spec compare_to_sqn( - leveled_codec:ledger_kv()|leveled_codec:sqn()|not_present, - integer()) -> sqn_check(). + leveled_codec:ledger_kv() | leveled_codec:sqn() | not_present, + integer() +) -> sqn_check(). %% @doc -%% Check to see if the SQN in the penciller is after the SQN expected for an +%% Check to see if the SQN in the penciller is after the SQN expected for an %% object (used to allow the journal to check compaction status from a cache %% of the ledger - objects with a more recent sequence number can be compacted). compare_to_sqn(not_present, _SQN) -> @@ -1698,34 +1854,40 @@ compare_to_sqn(Obj, SQN) -> -spec maybelog_fetch_timing( leveled_monitor:monitor(), - memory|leveled_pmanifest:lsm_level(), + memory | leveled_pmanifest:lsm_level(), leveled_monitor:timing(), - boolean()) -> ok. + boolean() +) -> ok. maybelog_fetch_timing(_Monitor, _Level, no_timing, _NF) -> ok; maybelog_fetch_timing( - {Pid, _StatsFreq}, _Level, FetchTime, true) when is_pid(Pid) -> + {Pid, _StatsFreq}, _Level, FetchTime, true +) when is_pid(Pid) -> leveled_monitor:add_stat(Pid, {pcl_fetch_update, not_found, FetchTime}); maybelog_fetch_timing( - {Pid, _StatsFreq}, Level, FetchTime, _NF) when is_pid(Pid) -> + {Pid, _StatsFreq}, Level, FetchTime, _NF +) when is_pid(Pid) -> leveled_monitor:add_stat(Pid, {pcl_fetch_update, Level, FetchTime}). %%%============================================================================ %%% Key folder %%%============================================================================ --type sst_iterator() - :: #{ +-type sst_iterator() :: + #{ leveled_pmanifest:lsm_level() => - list(leveled_sst:expandable_pointer()|leveled_codec:ledger_kv()), + list(leveled_sst:expandable_pointer() | leveled_codec:ledger_kv()), -1 => - list(leveled_codec:ledger_kv())}. --type max_keys() :: unlimited|non_neg_integer(). --type iterator_level() :: -1|leveled_pmanifest:lsm_level(). + list(leveled_codec:ledger_kv()) + }. +-type max_keys() :: unlimited | non_neg_integer(). +-type iterator_level() :: -1 | leveled_pmanifest:lsm_level(). -type search_info() :: - {{leveled_codec:ledger_key(), leveled_codec:ledger_key()}, - {non_neg_integer(), pos_integer()|infinity}, - leveled_sst:segment_check_fun()}. + { + {leveled_codec:ledger_key(), leveled_codec:ledger_key()}, + {non_neg_integer(), pos_integer() | infinity}, + leveled_sst:segment_check_fun() + }. -define(NULL_KEY, {null, null}). @@ -1733,24 +1895,30 @@ maybelog_fetch_timing( sst_iterator(), {leveled_codec:ledger_key(), leveled_codec:ledger_key()}, {pclacc_fun(), any(), pos_integer()}, - {leveled_sst:segment_check_fun(), - {non_neg_integer(), pos_integer()|infinity}, - -1|non_neg_integer()}) -> {non_neg_integer(), term()}|term(). + { + leveled_sst:segment_check_fun(), + {non_neg_integer(), pos_integer() | infinity}, + -1 | non_neg_integer() + } +) -> {non_neg_integer(), term()} | term(). keyfolder( - Iterator, - {StartKey, EndKey}, - {AccFun, InitAcc, Now}, - {SegCheckFun, LastModRange, KeyLimit}) -> + Iterator, + {StartKey, EndKey}, + {AccFun, InitAcc, Now}, + {SegCheckFun, LastModRange, KeyLimit} +) -> % The in-memory dump of keys in this range, may go beyond the end key - so - % strip these back before starting the fold + % strip these back before starting the fold StripIMMFun = fun(MemIter) -> lists:reverse( lists:dropwhile( fun({K, _V}) -> leveled_codec:endkey_passed(EndKey, K) end, - lists:reverse(MemIter))) + lists:reverse(MemIter) + ) + ) end, - MaxKeys = + MaxKeys = case KeyLimit of -1 -> unlimited; KeyLimit when is_integer(KeyLimit), KeyLimit >= 0 -> KeyLimit @@ -1761,25 +1929,27 @@ keyfolder( MaxKeys, {?FOLD_SCANWIDTH, lists:sort(maps:keys(Iterator))}, {{StartKey, EndKey}, LastModRange, SegCheckFun}, - {AccFun, Now}). + {AccFun, Now} + ). -spec keyfolder( - sst_iterator()|no_more_keys, + sst_iterator() | no_more_keys, term(), max_keys(), {pos_integer(), list(iterator_level())}, search_info(), - {pclacc_fun(), integer()}) -> {non_neg_integer(), term()}|term(). + {pclacc_fun(), integer()} +) -> {non_neg_integer(), term()} | term(). %% @doc %% The keyfolder takes an iterator - a map with an entry for each level, from %% level -1 (the in-memory cache of keys) through to level 7 (the theoretical) -%% maximum level. +%% maximum level. %% %% The find_nextkeys function is used to scan the iterators to find the next %% set of W keys. These can then be accumulated. If there is a MaxKeys set %% (i.e. a maximum number of KV pairs to be accumulated), then this must be %% tracked so the keyfolder never asks for more than the remainder from -%% find_nextkeys +%% find_nextkeys keyfolder(no_more_keys, Acc, MaxKeys, _LevelInfo, _SearchInfo, _AccDetails) -> case MaxKeys of unlimited -> Acc; @@ -1788,12 +1958,13 @@ keyfolder(no_more_keys, Acc, MaxKeys, _LevelInfo, _SearchInfo, _AccDetails) -> keyfolder(_Iter, Acc, 0, _LevelInfo, _SearchInfo, _AccDetails) -> {0, Acc}; keyfolder( - Iter, - Acc, - MaxKeys, - {W, Ls}=LevelInfo, - {_KR, LastModRange, _SCF}=SearchInfo, - {AccFun, Now}=AccDetails) -> + Iter, + Acc, + MaxKeys, + {W, Ls} = LevelInfo, + {_KR, LastModRange, _SCF} = SearchInfo, + {AccFun, Now} = AccDetails +) -> {IterUpd, FoundKVs} = find_nextkeys( Iter, @@ -1801,10 +1972,12 @@ keyfolder( [], Ls, {fetch_size(MaxKeys, W), scan_size(MaxKeys)}, - SearchInfo), + SearchInfo + ), {UpdAcc, KeyCount} = leveled_codec:maybe_accumulate( - lists:reverse(FoundKVs), Acc, 0, {Now, LastModRange}, AccFun), + lists:reverse(FoundKVs), Acc, 0, {Now, LastModRange}, AccFun + ), MaxKeysLeft = case MaxKeys of unlimited -> unlimited; @@ -1824,112 +1997,148 @@ scan_size(MaxKeys) -> -spec find_nextkeys( sst_iterator(), - {list(iterator_level()), - {null|iterator_level(), null|leveled_codec:ledger_kv()}}, + {list(iterator_level()), { + null | iterator_level(), null | leveled_codec:ledger_kv() + }}, list(leveled_codec:ledger_kv()), list(iterator_level()), {pos_integer(), pos_integer()}, - search_info()) -> - {no_more_keys, list(leveled_codec:ledger_kv())}| - {sst_iterator(), list(leveled_codec:ledger_kv())}. + search_info() +) -> + {no_more_keys, list(leveled_codec:ledger_kv())} + | {sst_iterator(), list(leveled_codec:ledger_kv())}. %% @doc %% Looks to find up to W keys, where for each key every level is checked, %% comparing keys to find the best key for that loop find_nextkeys( - _Iter, {[], ?NULL_KEY}, FoundKVs, _Ls, _BatchInfo, _SearchInfo) -> + _Iter, {[], ?NULL_KEY}, FoundKVs, _Ls, _BatchInfo, _SearchInfo +) -> % Each level checked and best key still NULL => no_more_keys {no_more_keys, FoundKVs}; find_nextkeys( - Iter, {[], {BKL, BestKV}}, FoundKVs, _Ls, {W, _SW}, _SearchInfo) - when length(FoundKVs) == W - 1, BestKV =/= null -> + Iter, {[], {BKL, BestKV}}, FoundKVs, _Ls, {W, _SW}, _SearchInfo +) when + length(FoundKVs) == W - 1, BestKV =/= null +-> % All levels scanned, and there are now W keys (W - 1 previously found plus % the latest best key) - {maps:update_with(BKL, fun tl/1, Iter), [BestKV|FoundKVs]}; + {maps:update_with(BKL, fun tl/1, Iter), [BestKV | FoundKVs]}; find_nextkeys( - Iter, {[], {BKL, BestKV}}, FoundKVs, Ls, BatchInfo, SearchInfo) - when BestKV =/= null -> + Iter, {[], {BKL, BestKV}}, FoundKVs, Ls, BatchInfo, SearchInfo +) when + BestKV =/= null +-> % All levels scanned so this is the best key ... now loop to find more find_nextkeys( maps:update_with(BKL, fun tl/1, Iter), {Ls, ?NULL_KEY}, - [BestKV|FoundKVs], - Ls, BatchInfo, SearchInfo); -find_nextkeys( - Iter, - {[LCnt|OtherLevels]=LoopLs, {BKL, BKV}=PrevBest}, - FoundKVs, + [BestKV | FoundKVs], Ls, - {_W, ScanWidth}=BI, - {{StartKey, EndKey}, {LowLastMod, _High}, SegChecker}=SI) -> + BatchInfo, + SearchInfo + ); +find_nextkeys( + Iter, + {[LCnt | OtherLevels] = LoopLs, {BKL, BKV} = PrevBest}, + FoundKVs, + Ls, + {_W, ScanWidth} = BI, + {{StartKey, EndKey}, {LowLastMod, _High}, SegChecker} = SI +) -> case maps:get(LCnt, Iter) of [] -> find_nextkeys( Iter, {OtherLevels, PrevBest}, FoundKVs, - Ls -- [LCnt], BI, SI); - [{next, Owner, _SK}|RestOfKeys] -> + Ls -- [LCnt], + BI, + SI + ); + [{next, Owner, _SK} | RestOfKeys] -> % Expansion required Pointer = {next, Owner, StartKey, EndKey}, UpdList = leveled_sst:sst_expandpointer( - Pointer, RestOfKeys, ScanWidth, SegChecker, LowLastMod), + Pointer, RestOfKeys, ScanWidth, SegChecker, LowLastMod + ), % Need to loop around at this level (LCnt) as we have not yet % examined a real key at this level find_nextkeys( maps:update(LCnt, UpdList, Iter), {LoopLs, PrevBest}, FoundKVs, - Ls, BI, SI); - [{pointer, SSTPid, Slot, PSK, PEK}|RestOfKeys] -> + Ls, + BI, + SI + ); + [{pointer, SSTPid, Slot, PSK, PEK} | RestOfKeys] -> % Expansion required Pointer = {pointer, SSTPid, Slot, PSK, PEK}, UpdList = leveled_sst:sst_expandpointer( - Pointer, RestOfKeys, ScanWidth, SegChecker, LowLastMod), + Pointer, RestOfKeys, ScanWidth, SegChecker, LowLastMod + ), % Need to loop around at this level (LCnt) as we have not yet % examined a real key at this level find_nextkeys( maps:update(LCnt, UpdList, Iter), {LoopLs, PrevBest}, FoundKVs, - Ls, BI, SI); - [{Key, Val}|_RestOfKeys] when BKV == null -> + Ls, + BI, + SI + ); + [{Key, Val} | _RestOfKeys] when BKV == null -> find_nextkeys( Iter, {OtherLevels, {LCnt, {Key, Val}}}, FoundKVs, - Ls, BI, SI); - [{Key, Val}|_RestOfKeys] when Key < element(1, BKV) -> + Ls, + BI, + SI + ); + [{Key, Val} | _RestOfKeys] when Key < element(1, BKV) -> find_nextkeys( Iter, {OtherLevels, {LCnt, {Key, Val}}}, FoundKVs, - Ls, BI, SI); - [{Key, _Val}|_RestOfKeys] when Key > element(1, BKV) -> + Ls, + BI, + SI + ); + [{Key, _Val} | _RestOfKeys] when Key > element(1, BKV) -> find_nextkeys( Iter, {OtherLevels, PrevBest}, FoundKVs, - Ls, BI, SI); - [{Key, Val}|_RestOfKeys] when BKV =/= null -> + Ls, + BI, + SI + ); + [{Key, Val} | _RestOfKeys] when BKV =/= null -> case leveled_codec:key_dominates({Key, Val}, BKV) of true -> find_nextkeys( maps:update_with(BKL, fun tl/1, Iter), {OtherLevels, {LCnt, {Key, Val}}}, FoundKVs, - Ls, BI, SI); + Ls, + BI, + SI + ); false -> find_nextkeys( maps:update_with(LCnt, fun tl/1, Iter), {OtherLevels, PrevBest}, FoundKVs, - Ls, BI, SI) + Ls, + BI, + SI + ) end end. - %%%============================================================================ %%% Test %%%============================================================================ @@ -1939,8 +2148,9 @@ find_nextkeys( -include_lib("eunit/include/eunit.hrl"). -spec pcl_fetch( - pid(), leveled_codec:ledger_key()) - -> leveled_codec:ledger_kv()|not_present. + pid(), leveled_codec:ledger_key() +) -> + leveled_codec:ledger_kv() | not_present. pcl_fetch(Pid, Key) -> Hash = leveled_codec:segment_hash(Key), if @@ -1953,7 +2163,8 @@ keyfolder_test(IMMiter, SSTiter, StartKey, EndKey, {AccFun, Acc, Now}) -> maps:put(-1, IMMiter, SSTiter), {StartKey, EndKey}, {AccFun, Acc, Now}, - {false, {0, infinity}, -1}). + {false, {0, infinity}, -1} + ). convert_qmanifest_tomap(SSTiter) -> maps:from_list(SSTiter). @@ -1966,7 +2177,8 @@ find_nextkey(QueryArray, StartKey, EndKey) -> [], maps:keys(QueryArray), {1, 1}, - {{StartKey, EndKey}, {0, infinity}, false}), + {{StartKey, EndKey}, {0, infinity}, false} + ), case UpdArray of no_more_keys -> no_more_keys; @@ -1981,7 +2193,7 @@ generate_randomkeys({Count, StartSQN}) -> generate_randomkeys(0, _SQN, Acc) -> lists:reverse(Acc); generate_randomkeys(Count, SQN, Acc) -> - K = + K = { o, list_to_binary(lists:concat(["Bucket", rand:uniform(1024)])), @@ -1993,7 +2205,7 @@ generate_randomkeys(Count, SQN, Acc) -> K, {SQN, {active, infinity}, leveled_codec:segment_hash(K), null} }, - generate_randomkeys(Count - 1, SQN + 1, [RandKey|Acc]). + generate_randomkeys(Count - 1, SQN + 1, [RandKey | Acc]). clean_testdir(RootPath) -> clean_subdir(sst_rootpath(RootPath)), @@ -2009,7 +2221,8 @@ clean_subdir(DirPath) -> ok = file:delete(File), io:format("Success deleting ~s~n", [File]) end, - Files); + Files + ); false -> ok end. @@ -2020,7 +2233,7 @@ maybe_pause_push(PCL, KL) -> T1 = lists:foldl( fun({K, V}, {AccSL, AccIdx, MinSQN, MaxSQN}) -> - UpdSL = [{K, V}|AccSL], + UpdSL = [{K, V} | AccSL], SQN = leveled_codec:strip_to_seqonly({K, V}), H = leveled_codec:segment_hash(K), UpdIdx = leveled_pmem:prepare_for_index(AccIdx, H), @@ -2044,7 +2257,6 @@ maybe_pause_push(PCL, KL) -> add_missing_hash({K, {SQN, ST, MD}}) -> {K, {SQN, ST, leveled_codec:segment_hash(K), MD}}. - archive_files_test() -> RootPath = "test/test_area/ledger", SSTPath = sst_rootpath(RootPath), @@ -2062,12 +2274,12 @@ archive_files_test() -> ok = clean_subdir(SSTPath). shutdown_when_compact(Pid) -> - FoldFun = + FoldFun = fun(_I, Ready) -> - case Ready of - true -> + case Ready of + true -> true; - false -> + false -> timer:sleep(200), not pcl_checkforwork(Pid) end @@ -2079,12 +2291,14 @@ shutdown_when_compact(Pid) -> fetch_status_test() -> RootPath = "test/test_area/ledger", clean_testdir(RootPath), - {ok, PCL} = - pcl_start(#penciller_options{root_path=RootPath, - max_inmemory_tablesize=1000, - sst_options=#sst_options{}}), + {ok, PCL} = + pcl_start(#penciller_options{ + root_path = RootPath, + max_inmemory_tablesize = 1000, + sst_options = #sst_options{} + }), {status, PCL, {module, gen_server}, SItemL} = sys:get_status(PCL), - {data,[{"State", S}]} = lists:nth(3, lists:nth(5, SItemL)), + {data, [{"State", S}]} = lists:nth(3, lists:nth(5, SItemL)), true = is_integer(array:size(element(2, S#state.manifest))), Status = format_status(#{reason => terminate, state => S}), ST = maps:get(state, Status), @@ -2103,12 +2317,14 @@ close_no_crash_test_() -> close_no_crash_tester() -> RootPath = "test/test_area/ledger_close", clean_testdir(RootPath), - {ok, PCL} = + {ok, PCL} = pcl_start( #penciller_options{ - root_path=RootPath, - max_inmemory_tablesize=1000, - sst_options=#sst_options{}}), + root_path = RootPath, + max_inmemory_tablesize = 1000, + sst_options = #sst_options{} + } + ), {ok, PclSnap} = pcl_snapstart( #penciller_options{ @@ -2124,16 +2340,15 @@ close_no_crash_tester() -> ok = pcl_close(PCL), clean_testdir(RootPath). - simple_server_test() -> RootPath = "test/test_area/ledger", clean_testdir(RootPath), - {ok, PCL} = + {ok, PCL} = pcl_start( #penciller_options{ - root_path=RootPath, - max_inmemory_tablesize=1000, - sst_options=#sst_options{} + root_path = RootPath, + max_inmemory_tablesize = 1000, + sst_options = #sst_options{} } ), Key1_Pre = @@ -2143,7 +2358,7 @@ simple_server_test() -> }, Key1 = add_missing_hash(Key1_Pre), KL1 = generate_randomkeys({1000, 2}), - Key2_Pre = + Key2_Pre = { {o, <<"Bucket0002">>, <<"Key0002">>, null}, {1002, {active, infinity}, null} @@ -2157,7 +2372,7 @@ simple_server_test() -> {2003, {active, infinity}, null} }, Key3 = add_missing_hash(Key3_Pre), - KL3 = generate_randomkeys({1000, 2004}), + KL3 = generate_randomkeys({1000, 2004}), Key4_Pre = { {o, <<"Bucket0004">>, <<"Key0004">>, null}, @@ -2184,14 +2399,14 @@ simple_server_test() -> Key2, pcl_fetch(PCL, {o, <<"Bucket0002">>, <<"Key0002">>, null}) ), - + ok = maybe_pause_push(PCL, KL2), ?assertMatch( Key2, pcl_fetch(PCL, {o, <<"Bucket0002">>, <<"Key0002">>, null}) ), ok = maybe_pause_push(PCL, [Key3]), - + ?assertMatch( Key1, pcl_fetch(PCL, {o, <<"Bucket0001">>, <<"Key0001">>, null}) @@ -2202,23 +2417,25 @@ simple_server_test() -> ), ?assertMatch( Key3, - pcl_fetch(PCL, {o, <<"Bucket0003">>, <<"Key0003">>, null})), - + pcl_fetch(PCL, {o, <<"Bucket0003">>, <<"Key0003">>, null}) + ), + true = pcl_checkbloomtest(PCL, {o, <<"Bucket0001">>, <<"Key0001">>, null}), true = pcl_checkbloomtest(PCL, {o, <<"Bucket0002">>, <<"Key0002">>, null}), true = pcl_checkbloomtest(PCL, {o, <<"Bucket0003">>, <<"Key0003">>, null}), false = pcl_checkbloomtest(PCL, {o, <<"Bucket9999">>, <<"Key9999">>, null}), - + ok = shutdown_when_compact(PCL), - {ok, PCLr} = + {ok, PCLr} = pcl_start( #penciller_options{ - root_path=RootPath, - max_inmemory_tablesize=1000, - sst_options=#sst_options{} - }), + root_path = RootPath, + max_inmemory_tablesize = 1000, + sst_options = #sst_options{} + } + ), ?assertMatch(2003, pcl_getstartupsequencenumber(PCLr)), true = pcl_checkbloomtest(PCLr, {o, <<"Bucket0001">>, <<"Key0001">>, null}), @@ -2228,17 +2445,19 @@ simple_server_test() -> pcl_checkbloomtest(PCLr, {o, <<"Bucket0003">>, <<"Key0003">>, null}), false = pcl_checkbloomtest(PCLr, {o, <<"Bucket9999">>, <<"Key9999">>, null}), - + ?assertMatch( Key1, pcl_fetch(PCLr, {o, <<"Bucket0001">>, <<"Key0001">>, null}) ), ?assertMatch( Key2, - pcl_fetch(PCLr, {o, <<"Bucket0002">>, <<"Key0002">>, null})), + pcl_fetch(PCLr, {o, <<"Bucket0002">>, <<"Key0002">>, null}) + ), ?assertMatch( Key3, - pcl_fetch(PCLr, {o, <<"Bucket0003">>, <<"Key0003">>, null})), + pcl_fetch(PCLr, {o, <<"Bucket0003">>, <<"Key0003">>, null}) + ), ok = maybe_pause_push(PCLr, KL3), ok = maybe_pause_push(PCLr, [Key4]), ok = maybe_pause_push(PCLr, KL4), @@ -2249,16 +2468,18 @@ simple_server_test() -> ), ?assertMatch( Key2, - pcl_fetch(PCLr, {o, <<"Bucket0002">>, <<"Key0002">>, null})), + pcl_fetch(PCLr, {o, <<"Bucket0002">>, <<"Key0002">>, null}) + ), ?assertMatch( Key3, - pcl_fetch(PCLr, {o, <<"Bucket0003">>, <<"Key0003">>, null})), + pcl_fetch(PCLr, {o, <<"Bucket0003">>, <<"Key0003">>, null}) + ), ?assertMatch( Key4, pcl_fetch(PCLr, {o, <<"Bucket0004">>, <<"Key0004">>, null}) ), - - {ok, PclSnap, null} = + + {ok, PclSnap, null} = leveled_bookie:snapshot_store( leveled_bookie:empty_ledgercache(), PCLr, @@ -2266,41 +2487,54 @@ simple_server_test() -> {no_monitor, 0}, ledger, undefined, - false), - + false + ), + ?assertMatch( Key1, - pcl_fetch(PclSnap, {o, <<"Bucket0001">>, <<"Key0001">>, null})), + pcl_fetch(PclSnap, {o, <<"Bucket0001">>, <<"Key0001">>, null}) + ), ?assertMatch( Key2, - pcl_fetch(PclSnap, {o, <<"Bucket0002">>, <<"Key0002">>, null})), + pcl_fetch(PclSnap, {o, <<"Bucket0002">>, <<"Key0002">>, null}) + ), ?assertMatch( Key3, - pcl_fetch(PclSnap, {o, <<"Bucket0003">>, <<"Key0003">>, null})), + pcl_fetch(PclSnap, {o, <<"Bucket0003">>, <<"Key0003">>, null}) + ), ?assertMatch( Key4, - pcl_fetch(PclSnap, {o, <<"Bucket0004">>, <<"Key0004">>, null})), + pcl_fetch(PclSnap, {o, <<"Bucket0004">>, <<"Key0004">>, null}) + ), ?assertMatch( - current, + current, pcl_checksequencenumber( - PclSnap, {o, <<"Bucket0001">>, <<"Key0001">>, null}, 1)), + PclSnap, {o, <<"Bucket0001">>, <<"Key0001">>, null}, 1 + ) + ), ?assertMatch( current, pcl_checksequencenumber( - PclSnap, {o, <<"Bucket0002">>, <<"Key0002">>, null}, 1002)), + PclSnap, {o, <<"Bucket0002">>, <<"Key0002">>, null}, 1002 + ) + ), ?assertMatch( current, pcl_checksequencenumber( - PclSnap, {o, <<"Bucket0003">>, <<"Key0003">>, null}, 2003)), + PclSnap, {o, <<"Bucket0003">>, <<"Key0003">>, null}, 2003 + ) + ), ?assertMatch( current, pcl_checksequencenumber( - PclSnap, {o, <<"Bucket0004">>, <<"Key0004">>, null}, 3004)), + PclSnap, {o, <<"Bucket0004">>, <<"Key0004">>, null}, 3004 + ) + ), % Add some more keys and confirm that check sequence number still - % sees the old version in the previous snapshot, but will see the new + % sees the old version in the previous snapshot, but will see the new % version in a new snapshot - + Key1A_Pre = { {o, <<"Bucket0001">>, <<"Key0001">>, null}, @@ -2313,10 +2547,12 @@ simple_server_test() -> ?assertMatch( current, pcl_checksequencenumber( - PclSnap, {o, <<"Bucket0001">>, <<"Key0001">>, null}, 1)), + PclSnap, {o, <<"Bucket0001">>, <<"Key0001">>, null}, 1 + ) + ), ok = pcl_close(PclSnap), - - {ok, PclSnap2, null} = + + {ok, PclSnap2, null} = leveled_bookie:snapshot_store( leveled_bookie:empty_ledgercache(), PCLr, @@ -2324,56 +2560,56 @@ simple_server_test() -> {no_monitor, 0}, ledger, undefined, - false), - + false + ), + ?assertMatch( replaced, pcl_checksequencenumber( - PclSnap2, {o, <<"Bucket0001">>, <<"Key0001">>, null}, 1)), + PclSnap2, {o, <<"Bucket0001">>, <<"Key0001">>, null}, 1 + ) + ), ?assertMatch( current, pcl_checksequencenumber( - PclSnap2, {o, <<"Bucket0001">>, <<"Key0001">>, null}, 4005)), + PclSnap2, {o, <<"Bucket0001">>, <<"Key0001">>, null}, 4005 + ) + ), ?assertMatch( current, pcl_checksequencenumber( - PclSnap2, {o, <<"Bucket0002">>, <<"Key0002">>, null}, 1002)), + PclSnap2, {o, <<"Bucket0002">>, <<"Key0002">>, null}, 1002 + ) + ), ok = pcl_close(PclSnap2), ok = pcl_close(PCLr), clean_testdir(RootPath). - simple_findnextkey_test() -> - QueryArrayAsList = + QueryArrayAsList = [ - {2, - [ - { - {o, <<"Bucket1">>, <<"Key1">>, null}, - {5, {active, infinity}, {0, 0}, null} - }, - { - {o, <<"Bucket1">>, <<"Key5">>, null}, - {4, {active, infinity}, {0, 0}, null} - } - ] - }, - {3, - [ - { - {o, <<"Bucket1">>, <<"Key3">>, null}, - {3, {active, infinity}, {0, 0}, null} - } - ] - }, - {5, - [ - { - {o, <<"Bucket1">>, <<"Key2">>, null}, - {2, {active, infinity}, {0, 0}, null} - } - ] - } + {2, [ + { + {o, <<"Bucket1">>, <<"Key1">>, null}, + {5, {active, infinity}, {0, 0}, null} + }, + { + {o, <<"Bucket1">>, <<"Key5">>, null}, + {4, {active, infinity}, {0, 0}, null} + } + ]}, + {3, [ + { + {o, <<"Bucket1">>, <<"Key3">>, null}, + {3, {active, infinity}, {0, 0}, null} + } + ]}, + {5, [ + { + {o, <<"Bucket1">>, <<"Key2">>, null}, + {2, {active, infinity}, {0, 0}, null} + } + ]} ], QueryArray = convert_qmanifest_tomap(QueryArrayAsList), {Array2, KV1} = @@ -2387,7 +2623,8 @@ simple_findnextkey_test() -> {o, <<"Bucket1">>, <<"Key1">>, null}, {5, {active, infinity}, {0, 0}, null} }, - KV1), + KV1 + ), {Array3, KV2} = find_nextkey( Array2, @@ -2396,10 +2633,11 @@ simple_findnextkey_test() -> ), ?assertMatch( { - {o, <<"Bucket1">>, <<"Key2">>, null}, + {o, <<"Bucket1">>, <<"Key2">>, null}, {2, {active, infinity}, {0, 0}, null} }, - KV2), + KV2 + ), {Array4, KV3} = find_nextkey( Array3, @@ -2408,10 +2646,11 @@ simple_findnextkey_test() -> ), ?assertMatch( { - {o, <<"Bucket1">>, <<"Key3">>, null}, + {o, <<"Bucket1">>, <<"Key3">>, null}, {3, {active, infinity}, {0, 0}, null} }, - KV3), + KV3 + ), {Array5, KV4} = find_nextkey( Array4, @@ -2420,10 +2659,11 @@ simple_findnextkey_test() -> ), ?assertMatch( { - {o, <<"Bucket1">>, <<"Key5">>, null}, + {o, <<"Bucket1">>, <<"Key5">>, null}, {4, {active, infinity}, {0, 0}, null} }, - KV4), + KV4 + ), ER = find_nextkey( Array5, @@ -2433,36 +2673,30 @@ simple_findnextkey_test() -> ?assertMatch(no_more_keys, ER). sqnoverlap_findnextkey_test() -> - QueryArrayAsList = + QueryArrayAsList = [ - {2, - [ - { - {o, <<"Bucket1">>, <<"Key1">>, null}, - {5, {active, infinity}, {0, 0}, null} - }, - { - {o, <<"Bucket1">>, <<"Key5">>, null}, - {4, {active, infinity}, {0, 0}, null} - } - ] - }, - {3, - [ - { - {o, <<"Bucket1">>, <<"Key3">>, null}, - {3, {active, infinity}, {0, 0}, null} - } - ] - }, - {5, - [ - { - {o, <<"Bucket1">>, <<"Key5">>, null}, - {2, {active, infinity}, {0, 0}, null} - } - ] - } + {2, [ + { + {o, <<"Bucket1">>, <<"Key1">>, null}, + {5, {active, infinity}, {0, 0}, null} + }, + { + {o, <<"Bucket1">>, <<"Key5">>, null}, + {4, {active, infinity}, {0, 0}, null} + } + ]}, + {3, [ + { + {o, <<"Bucket1">>, <<"Key3">>, null}, + {3, {active, infinity}, {0, 0}, null} + } + ]}, + {5, [ + { + {o, <<"Bucket1">>, <<"Key5">>, null}, + {2, {active, infinity}, {0, 0}, null} + } + ]} ], QueryArray = convert_qmanifest_tomap(QueryArrayAsList), {Array2, KV1} = @@ -2473,10 +2707,11 @@ sqnoverlap_findnextkey_test() -> ), ?assertMatch( { - {o, <<"Bucket1">>, <<"Key1">>, null}, + {o, <<"Bucket1">>, <<"Key1">>, null}, {5, {active, infinity}, {0, 0}, null} }, - KV1), + KV1 + ), {Array3, KV2} = find_nextkey( Array2, @@ -2485,10 +2720,11 @@ sqnoverlap_findnextkey_test() -> ), ?assertMatch( { - {o, <<"Bucket1">>, <<"Key3">>, null}, + {o, <<"Bucket1">>, <<"Key3">>, null}, {3, {active, infinity}, {0, 0}, null} }, - KV2), + KV2 + ), {Array4, KV3} = find_nextkey( Array3, @@ -2497,10 +2733,11 @@ sqnoverlap_findnextkey_test() -> ), ?assertMatch( { - {o, <<"Bucket1">>, <<"Key5">>, null}, + {o, <<"Bucket1">>, <<"Key5">>, null}, {4, {active, infinity}, {0, 0}, null} }, - KV3), + KV3 + ), ER = find_nextkey( Array4, @@ -2510,36 +2747,30 @@ sqnoverlap_findnextkey_test() -> ?assertMatch(no_more_keys, ER). sqnoverlap_otherway_findnextkey_test() -> - QueryArrayAsList = + QueryArrayAsList = [ - {2, - [ - { - {o, <<"Bucket1">>, <<"Key1">>, null}, - {5, {active, infinity}, {0, 0}, null} - }, - { - {o, <<"Bucket1">>, <<"Key5">>, null}, - {1, {active, infinity}, {0, 0}, null} - } - ] - }, - {3, - [ - { - {o, <<"Bucket1">>, <<"Key3">>, null}, - {3, {active, infinity}, {0, 0}, null} - } - ] - }, - {5, - [ - { - {o, <<"Bucket1">>, <<"Key5">>, null}, - {2, {active, infinity}, {0, 0}, null} - } - ] - } + {2, [ + { + {o, <<"Bucket1">>, <<"Key1">>, null}, + {5, {active, infinity}, {0, 0}, null} + }, + { + {o, <<"Bucket1">>, <<"Key5">>, null}, + {1, {active, infinity}, {0, 0}, null} + } + ]}, + {3, [ + { + {o, <<"Bucket1">>, <<"Key3">>, null}, + {3, {active, infinity}, {0, 0}, null} + } + ]}, + {5, [ + { + {o, <<"Bucket1">>, <<"Key5">>, null}, + {2, {active, infinity}, {0, 0}, null} + } + ]} ], QueryArray = convert_qmanifest_tomap(QueryArrayAsList), {Array2, KV1} = @@ -2553,7 +2784,8 @@ sqnoverlap_otherway_findnextkey_test() -> {o, <<"Bucket1">>, <<"Key1">>, null}, {5, {active, infinity}, {0, 0}, null} }, - KV1), + KV1 + ), {Array3, KV2} = find_nextkey( Array2, @@ -2565,7 +2797,8 @@ sqnoverlap_otherway_findnextkey_test() -> {o, <<"Bucket1">>, <<"Key3">>, null}, {3, {active, infinity}, {0, 0}, null} }, - KV2), + KV2 + ), {Array4, KV3} = find_nextkey( Array3, @@ -2577,7 +2810,8 @@ sqnoverlap_otherway_findnextkey_test() -> {o, <<"Bucket1">>, <<"Key5">>, null}, {2, {active, infinity}, {0, 0}, null} }, - KV3), + KV3 + ), ER = find_nextkey( Array4, {o, <<"Bucket1">>, <<"Key0">>, null}, @@ -2587,39 +2821,33 @@ sqnoverlap_otherway_findnextkey_test() -> foldwithimm_simple_test() -> Now = leveled_util:integer_now(), - QueryArrayAsList = + QueryArrayAsList = [ - {2, - [ - { - {o, <<"Bucket1">>, <<"Key1">>, null}, - {5, {active, infinity}, {0, 0}, null} - }, - { - {o, <<"Bucket1">>, <<"Key5">>, null}, - {1, {active, infinity}, {0, 0}, null} - } - ] - }, - {3, - [ - { - {o, <<"Bucket1">>, <<"Key3">>, null}, - {3, {active, infinity}, {0, 0}, null} - } - ] - }, - {5, - [ - { - {o, <<"Bucket1">>, <<"Key5">>, null}, - {2, {active, infinity}, {0, 0}, null} - } - ] - } + {2, [ + { + {o, <<"Bucket1">>, <<"Key1">>, null}, + {5, {active, infinity}, {0, 0}, null} + }, + { + {o, <<"Bucket1">>, <<"Key5">>, null}, + {1, {active, infinity}, {0, 0}, null} + } + ]}, + {3, [ + { + {o, <<"Bucket1">>, <<"Key3">>, null}, + {3, {active, infinity}, {0, 0}, null} + } + ]}, + {5, [ + { + {o, <<"Bucket1">>, <<"Key5">>, null}, + {2, {active, infinity}, {0, 0}, null} + } + ]} ], QueryArray = convert_qmanifest_tomap(QueryArrayAsList), - KL1A = + KL1A = [ { {o, <<"Bucket1">>, <<"Key6">>, null}, @@ -2639,13 +2867,14 @@ foldwithimm_simple_test() -> leveled_tree:match_range( {o, <<"Bucket1">>, <<"Key1">>, null}, {o, null, null, null}, - IMM2), + IMM2 + ), AccFun = fun(K, V, Acc) -> SQN = leveled_codec:strip_to_seqonly({K, V}), Acc ++ [{K, SQN}] end, - Acc = + Acc = keyfolder_test( IMMiter, QueryArray, @@ -2660,8 +2889,9 @@ foldwithimm_simple_test() -> {{o, <<"Bucket1">>, <<"Key5">>, null}, 2}, {{o, <<"Bucket1">>, <<"Key6">>, null}, 7} ], - Acc), - + Acc + ), + IMMiterA = [ { @@ -2673,7 +2903,7 @@ foldwithimm_simple_test() -> keyfolder_test( IMMiterA, QueryArray, - {o, <<"Bucket1">>, <<"Key1">>, null}, + {o, <<"Bucket1">>, <<"Key1">>, null}, {o, <<"Bucket1">>, <<"Key6">>, null}, {AccFun, [], Now} ), @@ -2683,14 +2913,15 @@ foldwithimm_simple_test() -> {{o, <<"Bucket1">>, <<"Key3">>, null}, 3}, {{o, <<"Bucket1">>, <<"Key5">>, null}, 2} ], - AccA), - + AccA + ), + AddKV = { {o, <<"Bucket1">>, <<"Key4">>, null}, {10, {active, infinity}, 0, null} }, - KL1B = [AddKV|KL1A], + KL1B = [AddKV | KL1A], IMM3 = leveled_tree:from_orderedlist(lists:ukeysort(1, KL1B), ?CACHE_TYPE), IMMiterB = leveled_tree:match_range( @@ -2715,22 +2946,25 @@ foldwithimm_simple_test() -> {{o, <<"Bucket1">>, <<"Key5">>, null}, 2}, {{o, <<"Bucket1">>, <<"Key6">>, null}, 7} ], - AccB). + AccB + ). create_file_test() -> {RP, Filename} = {"test/test_area/", "new_file.sst"}, ok = file:write_file(filename:join(RP, Filename), term_to_binary("hello")), KVL = lists:usort(generate_randomkeys({50000, 0})), Tree = leveled_tree:from_orderedlist(KVL, ?CACHE_TYPE), - - {ok, SP, noreply} = - leveled_sst:sst_newlevelzero(RP, - Filename, - 1, - [Tree], - undefined, - 50000, - #sst_options{press_method = native}), + + {ok, SP, noreply} = + leveled_sst:sst_newlevelzero( + RP, + Filename, + 1, + [Tree], + undefined, + 50000, + #sst_options{press_method = native} + ), {ok, SrcFN, StartKey, EndKey} = leveled_sst:sst_checkready(SP), io:format("StartKey ~w EndKey ~w~n", [StartKey, EndKey]), ?assertMatch({o, _, _, _}, StartKey), @@ -2751,10 +2985,12 @@ coverage_cheat_test() -> handle_down_test() -> RootPath = "test/test_area/ledger", clean_testdir(RootPath), - {ok, PCLr} = - pcl_start(#penciller_options{root_path=RootPath, - max_inmemory_tablesize=1000, - sst_options=#sst_options{}}), + {ok, PCLr} = + pcl_start(#penciller_options{ + root_path = RootPath, + max_inmemory_tablesize = 1000, + sst_options = #sst_options{} + }), FakeBookie = spawn(fun loop/0), Mon = erlang:monitor(process, FakeBookie), @@ -2766,7 +3002,7 @@ handle_down_test() -> {FakeBookie, {ok, Snap, null}} -> {ok, Snap, null} end, - + CheckSnapDiesFun = fun(_X, IsDead) -> case IsDead of @@ -2798,7 +3034,6 @@ handle_down_test() -> pcl_close(PCLr), clean_testdir(RootPath). - %% the fake bookie. Some calls to leveled_bookie (like the two below) %% do not go via the gen_server (but it looks like they expect to be %% called by the gen_server, internally!) they use "self()" to @@ -2812,15 +3047,16 @@ loop() -> leveled_bookie:snapshot_store( leveled_bookie:empty_ledgercache(), PCLr, - null, + null, {no_monitor, 0}, ledger, undefined, - false), + false + ), TestPid ! {self(), {ok, Snap, null}}, loop(); stop -> ok end. --endif. \ No newline at end of file +-endif. diff --git a/src/leveled_pmanifest.erl b/src/leveled_pmanifest.erl index 63d50389..d2269bb7 100644 --- a/src/leveled_pmanifest.erl +++ b/src/leveled_pmanifest.erl @@ -3,7 +3,7 @@ %% The manifest is an ordered set of files for each level to be used to find %% which file is relevant for a given key or range lookup at a given level. %% -%% This implementation is incomplete, in that it just uses a plain list at +%% This implementation is incomplete, in that it just uses a plain list at %% each level. This is fine for short-lived volume tests, but as the deeper %% levels are used there will be an exponential penalty. %% @@ -23,35 +23,35 @@ -include("leveled.hrl"). -export([ - new_manifest/0, - open_manifest/1, - copy_manifest/1, - load_manifest/3, - close_manifest/2, - save_manifest/2, - query_manifest/3, - get_manifest_sqn/1, - key_lookup/3, - range_lookup/4, - merge_lookup/4, - insert_manifest_entry/4, - remove_manifest_entry/4, - replace_manifest_entry/5, - switch_manifest_entry/4, - mergefile_selector/3, - add_snapshot/3, - release_snapshot/2, - merge_snapshot/2, - ready_to_delete/2, - clear_pending/3, - check_for_work/1, - is_basement/2, - levelzero_present/1, - check_bloom/3, - report_manifest_level/2, - snapshot_pids/1, - get_sstpids/1 - ]). + new_manifest/0, + open_manifest/1, + copy_manifest/1, + load_manifest/3, + close_manifest/2, + save_manifest/2, + query_manifest/3, + get_manifest_sqn/1, + key_lookup/3, + range_lookup/4, + merge_lookup/4, + insert_manifest_entry/4, + remove_manifest_entry/4, + replace_manifest_entry/5, + switch_manifest_entry/4, + mergefile_selector/3, + add_snapshot/3, + release_snapshot/2, + merge_snapshot/2, + ready_to_delete/2, + clear_pending/3, + check_for_work/1, + is_basement/2, + levelzero_present/1, + check_bloom/3, + report_manifest_level/2, + snapshot_pids/1, + get_sstpids/1 +]). -export( [ @@ -65,28 +65,35 @@ ). -export([ - filepath/2 - ]). + filepath/2 +]). -define(MANIFEST_FILEX, "man"). -define(PENDING_FILEX, "pnd"). -define(MANIFEST_FP, "ledger_manifest"). --define(LEVEL_SCALEFACTOR, - [{0, 0}, - {1, 4}, {2, 16}, {3, 64}, % Factor of 4 - {4, 384}, {5, 2304}, % Factor of 6 - {6, 18432}, % Factor of 8 - {7, infinity}]). - % As an alternative to going up by a factor of 8 at each level, - % increase by a factor of 4 at young levels - to make early - % compaction jobs shorter. - % - % There are 32K keys per files => with 4096 files there are 100M - % keys supported, - - % 600M keys is supported before hitting the infinite level. - % At o(10) trillion keys behaviour may become increasingly - % difficult to predict. +-define(LEVEL_SCALEFACTOR, [ + {0, 0}, + % Factor of 4 + {1, 4}, + {2, 16}, + {3, 64}, + % Factor of 6 + {4, 384}, + {5, 2304}, + % Factor of 8 + {6, 18432}, + {7, infinity} +]). +% As an alternative to going up by a factor of 8 at each level, +% increase by a factor of 4 at young levels - to make early +% compaction jobs shorter. +% +% There are 32K keys per files => with 4096 files there are 100M +% keys supported, + +% 600M keys is supported before hitting the infinite level. +% At o(10) trillion keys behaviour may become increasingly +% difficult to predict. -if(?OTP_RELEASE >= 25). -if(length(?LEVEL_SCALEFACTOR) /= ?MAX_LEVELS). @@ -100,32 +107,29 @@ -define(MANIFESTS_TO_RETAIN, 5). -define(GROOM_SAMPLE, 16). --record(manifest, - { - levels :: array:array(dynamic()), - % an array of lists or trees representing the manifest, where the - % list is created using the to_list function on leveled_treee - manifest_sqn = 0 :: non_neg_integer(), - % The current manifest SQN - snapshots = [] :: list(snapshot()), - % A list of snaphots (i.e. clones) - min_snapshot_sqn = 0 :: integer(), - % The smallest snapshot manifest SQN in the snapshot list - pending_deletes = new_pending_deletions() :: pending_deletions(), - basement :: non_neg_integer(), - % Currently the lowest level (the largest number) - blooms = new_blooms() :: blooms() - }). - --record(manifest_entry, - { - start_key :: leveled_codec:object_key(), - end_key :: leveled_codec:object_key(), - owner :: pid(), - filename :: string(), - bloom = none :: leveled_ebloom:bloom() | none - } -). +-record(manifest, { + levels :: array:array(dynamic()), + % an array of lists or trees representing the manifest, where the + % list is created using the to_list function on leveled_treee + manifest_sqn = 0 :: non_neg_integer(), + % The current manifest SQN + snapshots = [] :: list(snapshot()), + % A list of snaphots (i.e. clones) + min_snapshot_sqn = 0 :: integer(), + % The smallest snapshot manifest SQN in the snapshot list + pending_deletes = new_pending_deletions() :: pending_deletions(), + basement :: non_neg_integer(), + % Currently the lowest level (the largest number) + blooms = new_blooms() :: blooms() +}). + +-record(manifest_entry, { + start_key :: leveled_codec:object_key(), + end_key :: leveled_codec:object_key(), + owner :: pid(), + filename :: string(), + bloom = none :: leveled_ebloom:bloom() | none +}). -type snapshot() :: {pid(), non_neg_integer(), pos_integer(), pos_integer()}. @@ -136,7 +140,7 @@ -type pending_deletions() :: dict:dict(). -type blooms() :: dict:dict(). -type selector_strategy() :: - random|{grooming, fun((list(manifest_entry())) -> manifest_entry())}. + random | {grooming, fun((list(manifest_entry())) -> manifest_entry())}. -export_type([manifest/0, manifest_entry/0, manifest_owner/0, lsm_level/0]). @@ -161,11 +165,11 @@ new_manifest() -> SetLowerLevelFun, LevelArray0, lists:seq(2, ?MAX_LEVELS) ), #manifest{ - levels = LevelArray1, - manifest_sqn = 0, + levels = LevelArray1, + manifest_sqn = 0, snapshots = [], basement = 0 - }. + }. -spec open_manifest(string()) -> manifest(). %% @doc @@ -204,8 +208,9 @@ copy_manifest(Manifest) -> -spec load_manifest( manifest(), fun((file:name_all(), 1..7) -> {pid(), leveled_ebloom:bloom()}), - fun((pid()) -> pos_integer())) - -> {integer(), manifest(), list()}. + fun((pid()) -> pos_integer()) +) -> + {integer(), manifest(), list()}. %% @doc %% Roll over the manifest starting a process to manage each file in the %% manifest. The PidFun should be able to return the Pid of a file process @@ -214,32 +219,37 @@ copy_manifest(Manifest) -> %% %% The manifest is started from the basement first, and then the higher levels %% as the page cache will be loaded with each file, and it would be -%% preferable to have the higher levels in the cache if memory is insufficient +%% preferable to have the higher levels in the cache if memory is insufficient %% to load each level load_manifest(Manifest, LoadFun, SQNFun) -> UpdateLevelFun = fun(LevelIdx, {AccMaxSQN, AccMan, AccFL}) -> L0 = array:get(LevelIdx, AccMan#manifest.levels), - {L1, SQN1, FileList, LvlBloom} = + {L1, SQN1, FileList, LvlBloom} = load_level(LevelIdx, L0, LoadFun, SQNFun), UpdLevels = array:set(LevelIdx, L1, AccMan#manifest.levels), - FoldBloomFun = - fun({P, B}, BAcc) -> - dict:store(P, B, BAcc) + FoldBloomFun = + fun({P, B}, BAcc) -> + dict:store(P, B, BAcc) end, - UpdBlooms = + UpdBlooms = lists:foldl(FoldBloomFun, AccMan#manifest.blooms, LvlBloom), - {max(AccMaxSQN, SQN1), + { + max(AccMaxSQN, SQN1), AccMan#manifest{levels = UpdLevels, blooms = UpdBlooms}, - AccFL ++ FileList} + AccFL ++ FileList + } end, - lists:foldl(UpdateLevelFun, - {0, Manifest, []}, - lists:reverse(lists:seq(0, Manifest#manifest.basement))). + lists:foldl( + UpdateLevelFun, + {0, Manifest, []}, + lists:reverse(lists:seq(0, Manifest#manifest.basement)) + ). -spec close_manifest( manifest(), - fun((any()) -> ok)) -> ok. + fun((any()) -> ok) +) -> ok. %% @doc %% Close all the files in the manifest (using CloseEntryFun to call close on %% a file). Firts all the files in the active manifest are called, and then @@ -251,7 +261,7 @@ close_manifest(Manifest, CloseEntryFun) -> close_level(LevelIdx, Level, CloseEntryFun) end, lists:foreach(CloseLevelFun, lists:seq(0, Manifest#manifest.basement)), - + ClosePDFun = fun({_FN, {_SQN, ME}}) -> CloseEntryFun(ME) @@ -277,19 +287,19 @@ save_manifest(Manifest, RootPath) -> ToPersist = <>, ok = leveled_util:safe_rename(TFP, AFP, ToPersist, true), GC_SQN = Manifest#manifest.manifest_sqn - ?MANIFESTS_TO_RETAIN, - % If a manifest is corrupted the previous one will be tried, so don't - % delete the previous one straight away. Retain until enough have been - % kept to make the probability of all being independently corrupted - % through separate events negligible + % If a manifest is corrupted the previous one will be tried, so don't + % delete the previous one straight away. Retain until enough have been + % kept to make the probability of all being independently corrupted + % through separate events negligible ok = remove_manifest(RootPath, GC_SQN), - % Sometimes we skip a SQN, so to GC all may need to clear up previous - % as well + % Sometimes we skip a SQN, so to GC all may need to clear up previous + % as well ok = remove_manifest(RootPath, GC_SQN - 1). -spec remove_manifest(string(), integer()) -> ok. remove_manifest(RootPath, GC_SQN) -> LFP = filepath(RootPath, GC_SQN, current_manifest), - ok = + ok = case filelib:is_file(LFP) of true -> file:delete(LFP); @@ -297,17 +307,19 @@ remove_manifest(RootPath, GC_SQN) -> ok end. - -spec report_manifest_level( - manifest(), non_neg_integer()) -> - {non_neg_integer(), - non_neg_integer(), - {string(), pid(), non_neg_integer()} | - undefined, - non_neg_integer(), - non_neg_integer(), - non_neg_integer(), - non_neg_integer()}. + manifest(), non_neg_integer() +) -> + { + non_neg_integer(), + non_neg_integer(), + {string(), pid(), non_neg_integer()} + | undefined, + non_neg_integer(), + non_neg_integer(), + non_neg_integer(), + non_neg_integer() + }. %% @doc %% Report on a level in the manifest %% - How many files in the level @@ -316,19 +328,19 @@ remove_manifest(RootPath, GC_SQN) -> report_manifest_level(Manifest, LevelIdx) -> Levels = Manifest#manifest.levels, Level = array:get(LevelIdx, Levels), - {LevelSize, LevelList} = + {LevelSize, LevelList} = case is_list(Level) of true -> {length(Level), Level}; _ -> {leveled_tree:tsize(Level), leveled_tree:to_list(Level)} end, - AccMemFun = + AccMemFun = fun(MaybeME, {MemAcc, Max, HBSAcc, HSAcc, LHSAcc, BVHSAcc}) -> ME = get_manifest_entry(MaybeME), P = ME#manifest_entry.owner, {memory, PM} = process_info(P, memory), - UpdMax = + UpdMax = case Max of {_MaxFN, _MaxP, MaxPM} when MaxPM > PM -> Max; @@ -341,8 +353,14 @@ report_manifest_level(Manifest, LevelIdx) -> HS = proplists:get_value(heap_size, GCI), LHS = proplists:get_value(recent_size, GCI), BVHS = proplists:get_value(bin_vheap_size, GCI), - {MemAcc + PM, UpdMax, - HBSAcc + HBS, HSAcc + HS, LHSAcc + LHS, BVHSAcc + BVHS} + { + MemAcc + PM, + UpdMax, + HBSAcc + HBS, + HSAcc + HS, + LHSAcc + LHS, + BVHSAcc + BVHS + } end, case LevelSize of 0 -> @@ -351,18 +369,17 @@ report_manifest_level(Manifest, LevelIdx) -> {TotalMem, BiggestMem, TotalHBS, TotalHS, TotalLHS, TotalBVBS} = lists:foldl(AccMemFun, {0, undefined, 0, 0, 0, 0}, LevelList), {LevelSize, TotalMem div LevelSize, BiggestMem, - TotalHBS div LevelSize, - TotalHS div LevelSize, - TotalLHS div LevelSize, - TotalBVBS div LevelSize} + TotalHBS div LevelSize, TotalHS div LevelSize, + TotalLHS div LevelSize, TotalBVBS div LevelSize} end. -spec replace_manifest_entry( manifest(), integer(), integer(), - list()|manifest_entry(), - list()|manifest_entry()) -> manifest(). + list() | manifest_entry(), + list() | manifest_entry() +) -> manifest(). %% @doc %% Replace a list of manifest entries in the manifest with a new set of entries %% Pass in the new manifest SQN to be used for this manifest. The list of @@ -372,13 +389,14 @@ report_manifest_level(Manifest, LevelIdx) -> replace_manifest_entry(Manifest, ManSQN, LevelIdx, Removals, Additions) -> Levels = Manifest#manifest.levels, Level = array:get(LevelIdx, Levels), - {UpdBlooms, StrippedAdditions} = + {UpdBlooms, StrippedAdditions} = update_blooms(Removals, Additions, Manifest#manifest.blooms), UpdLevel = replace_entry(LevelIdx, Level, Removals, StrippedAdditions), leveled_log:log(pc019, ["insert", LevelIdx, UpdLevel]), - PendingDeletes = + PendingDeletes = update_pendingdeletes( - ManSQN, Removals, Manifest#manifest.pending_deletes), + ManSQN, Removals, Manifest#manifest.pending_deletes + ), UpdLevels = array:set(LevelIdx, UpdLevel, Levels), case is_empty(LevelIdx, UpdLevel) of true -> @@ -401,14 +419,15 @@ replace_manifest_entry(Manifest, ManSQN, LevelIdx, Removals, Additions) -> end. -spec insert_manifest_entry( - manifest(), integer(), integer(), list()|manifest_entry()) -> manifest(). + manifest(), integer(), integer(), list() | manifest_entry() +) -> manifest(). %% @doc %% Place a single new manifest entry into a level of the manifest, at a given %% level and manifest sequence number insert_manifest_entry(Manifest, ManSQN, LevelIdx, Entry) -> Levels = Manifest#manifest.levels, Level = array:get(LevelIdx, Levels), - {UpdBlooms, UpdEntry} = + {UpdBlooms, UpdEntry} = update_blooms([], Entry, Manifest#manifest.blooms), UpdLevel = add_entry(LevelIdx, Level, UpdEntry), leveled_log:log(pc019, ["insert", LevelIdx, UpdLevel]), @@ -421,19 +440,21 @@ insert_manifest_entry(Manifest, ManSQN, LevelIdx, Entry) -> }. -spec remove_manifest_entry( - manifest(), integer(), integer(), list()|manifest_entry()) -> manifest(). + manifest(), integer(), integer(), list() | manifest_entry() +) -> manifest(). %% @doc %% Remove a manifest entry (as it has been merged into the level below) remove_manifest_entry(Manifest, ManSQN, LevelIdx, Entry) -> Levels = Manifest#manifest.levels, Level = array:get(LevelIdx, Levels), - {UpdBlooms, []} = + {UpdBlooms, []} = update_blooms(Entry, [], Manifest#manifest.blooms), UpdLevel = remove_entry(LevelIdx, Level, Entry), leveled_log:log(pc019, ["remove", LevelIdx, UpdLevel]), PendingDeletes = update_pendingdeletes( - ManSQN, Entry, Manifest#manifest.pending_deletes), + ManSQN, Entry, Manifest#manifest.pending_deletes + ), UpdLevels = array:set(LevelIdx, UpdLevel, Levels), case is_empty(LevelIdx, UpdLevel) of true -> @@ -454,7 +475,8 @@ remove_manifest_entry(Manifest, ManSQN, LevelIdx, Entry) -> end. -spec switch_manifest_entry( - manifest(), integer(), integer(), list()|manifest_entry()) -> manifest(). + manifest(), integer(), integer(), list() | manifest_entry() +) -> manifest(). %% @doc %% Switch a manifest etry from this level to the level below (i.e when there %% are no overlapping manifest entries in the level below) @@ -466,7 +488,8 @@ switch_manifest_entry(Manifest, ManSQN, SrcLevel, Entry) -> UpdLevel = remove_entry(SrcLevel, Level, Entry), UpdLevels = array:set(SrcLevel, UpdLevel, Levels), insert_manifest_entry( - Manifest#manifest{levels = UpdLevels}, ManSQN, SrcLevel + 1, Entry). + Manifest#manifest{levels = UpdLevels}, ManSQN, SrcLevel + 1, Entry + ). -spec get_manifest_sqn(manifest()) -> integer(). %% @doc @@ -475,8 +498,9 @@ get_manifest_sqn(Manifest) -> Manifest#manifest.manifest_sqn. -spec key_lookup( - manifest(), integer(), leveled_codec:ledger_key()) -> - false|manifest_owner(). + manifest(), integer(), leveled_codec:ledger_key() +) -> + false | manifest_owner(). %% @doc %% For a given key find which manifest entry covers that key at that level, %% returning false if there is no covering manifest entry at that level. @@ -486,16 +510,21 @@ key_lookup(Manifest, LevelIdx, Key) -> false; false -> key_lookup_level( - LevelIdx, array:get(LevelIdx, Manifest#manifest.levels), Key) + LevelIdx, array:get(LevelIdx, Manifest#manifest.levels), Key + ) end. -spec query_manifest( manifest(), leveled_codec:ledger_key(), - leveled_codec:ledger_key()) - -> list( - {lsm_level(), - list({next, manifest_entry(), leveled_codec:ledger_key()})}). + leveled_codec:ledger_key() +) -> + list( + { + lsm_level(), + list({next, manifest_entry(), leveled_codec:ledger_key()}) + } + ). query_manifest(Manifest, StartKey, EndKey) -> SetupFoldFun = fun(Level, Acc) -> @@ -503,17 +532,18 @@ query_manifest(Manifest, StartKey, EndKey) -> [] -> Acc; Pointers -> - [{Level, Pointers}|Acc] + [{Level, Pointers} | Acc] end end, lists:foldl(SetupFoldFun, [], lists:seq(0, ?MAX_LEVELS - 1)). -spec range_lookup( - manifest(), - integer(), - leveled_codec:ledger_key(), - leveled_codec:ledger_key()) - -> list({next, manifest_entry(), leveled_codec:ledger_key()}). + manifest(), + integer(), + leveled_codec:ledger_key(), + leveled_codec:ledger_key() +) -> + list({next, manifest_entry(), leveled_codec:ledger_key()}). %% @doc %% Return a list of manifest_entry pointers at this level which cover the %% key query range. @@ -525,10 +555,11 @@ range_lookup(Manifest, LevelIdx, StartKey, EndKey) -> range_lookup_int(Manifest, LevelIdx, StartKey, EndKey, MakePointerFun). -spec merge_lookup( - manifest(), - integer(), - leveled_codec:ledger_key(), - leveled_codec:ledger_key()) -> list({next, manifest_entry(), all}). + manifest(), + integer(), + leveled_codec:ledger_key(), + leveled_codec:ledger_key() +) -> list({next, manifest_entry(), all}). %% @doc %% Return a list of manifest_entry pointers at this level which cover the %% key query range, only all keys in the files should be included in the @@ -540,9 +571,9 @@ merge_lookup(Manifest, LevelIdx, StartKey, EndKey) -> end, range_lookup_int(Manifest, LevelIdx, StartKey, EndKey, MakePointerFun). - -spec mergefile_selector( - manifest(), integer(), selector_strategy()) -> manifest_entry(). + manifest(), integer(), selector_strategy() +) -> manifest_entry(). %% @doc %% An algorithm for discovering which files to merge .... %% We can find the most optimal file: @@ -560,17 +591,19 @@ mergefile_selector(Manifest, LevelIdx, _Strategy) when LevelIdx =< 1 -> mergefile_selector(Manifest, LevelIdx, random) -> Level = leveled_tree:to_list( - array:get(LevelIdx, Manifest#manifest.levels)), + array:get(LevelIdx, Manifest#manifest.levels) + ), {_SK, ME} = lists:nth(rand:uniform(length(Level)), Level), ME; mergefile_selector(Manifest, LevelIdx, {grooming, ScoringFun}) -> Level = leveled_tree:to_list( - array:get(LevelIdx, Manifest#manifest.levels)), + array:get(LevelIdx, Manifest#manifest.levels) + ), SelectorFun = fun(_I, Acc) -> {_SK, ME} = lists:nth(rand:uniform(length(Level)), Level), - [ME|Acc] + [ME | Acc] end, Sample = lists:usort(lists:foldl(SelectorFun, [], lists:seq(1, ?GROOM_SAMPLE))), @@ -578,7 +611,6 @@ mergefile_selector(Manifest, LevelIdx, {grooming, ScoringFun}) -> % multiple times. Level cannot be empty, as otherwise a merge would not % have been chosen at this level ScoringFun(Sample). - -spec merge_snapshot(manifest(), manifest()) -> manifest(). %% @doc @@ -589,7 +621,8 @@ mergefile_selector(Manifest, LevelIdx, {grooming, ScoringFun}) -> merge_snapshot(PencillerManifest, ClerkManifest) -> ClerkManifest#manifest{ snapshots = PencillerManifest#manifest.snapshots, - min_snapshot_sqn = PencillerManifest#manifest.min_snapshot_sqn}. + min_snapshot_sqn = PencillerManifest#manifest.min_snapshot_sqn + }. -spec add_snapshot(manifest(), pid(), integer()) -> manifest(). %% @doc @@ -600,19 +633,21 @@ merge_snapshot(PencillerManifest, ClerkManifest) -> %% clone is still active it may crash) add_snapshot(Manifest, Pid, Timeout) -> SnapEntry = {Pid, Manifest#manifest.manifest_sqn, seconds_now(), Timeout}, - SnapList0 = [SnapEntry|Manifest#manifest.snapshots], + SnapList0 = [SnapEntry | Manifest#manifest.snapshots], ManSQN = Manifest#manifest.manifest_sqn, case Manifest#manifest.min_snapshot_sqn of 0 -> Manifest#manifest{ - snapshots = SnapList0, min_snapshot_sqn = ManSQN}; + snapshots = SnapList0, min_snapshot_sqn = ManSQN + }; N -> N0 = min(N, ManSQN), Manifest#manifest{ - snapshots = SnapList0, min_snapshot_sqn = N0} + snapshots = SnapList0, min_snapshot_sqn = N0 + } end. --spec release_snapshot(manifest(), pid()|atom()) -> manifest(). +-spec release_snapshot(manifest(), pid() | atom()) -> manifest(). %% @doc %% When a clone is complete the release should be notified to the manifest. release_snapshot(Manifest, Pid) -> @@ -622,12 +657,12 @@ release_snapshot(Manifest, Pid) -> Pid -> {Acc, MinSQN, true}; _ -> - case seconds_now() > (ST + TO) of + case seconds_now() > (ST + TO) of true -> - leveled_log:log(p0038, [P, SQN, ST, TO]), + leveled_log:log(p0038, [P, SQN, ST, TO]), {Acc, MinSQN, Found}; false -> - {[{P, SQN, ST, TO}|Acc], min(SQN, MinSQN), Found} + {[{P, SQN, ST, TO} | Acc], min(SQN, MinSQN), Found} end end end, @@ -637,23 +672,22 @@ release_snapshot(Manifest, Pid) -> {[], infinity, false}, Manifest#manifest.snapshots ), - case Hit of + case Hit of false -> leveled_log:log(p0039, [Pid, length(SnapList0), MinSnapSQN]); true -> - ok + ok end, case SnapList0 of [] -> Manifest#manifest{snapshots = SnapList0, min_snapshot_sqn = 0}; - _ when is_integer(MinSnapSQN) -> + _ when is_integer(MinSnapSQN) -> leveled_log:log(p0004, [SnapList0]), Manifest#manifest{ snapshots = SnapList0, min_snapshot_sqn = MinSnapSQN } end. - %% @doc %% A SST file which is in the delete_pending state can check to see if it is %% ready to delete against the manifest. @@ -681,12 +715,13 @@ clear_pending(Manifest, [], true) -> release_snapshot(Manifest, ?PHANTOM_PID); clear_pending(Manifest, [], false) -> Manifest; -clear_pending(Manifest, [FN|RestFN], MaybeRelease) -> +clear_pending(Manifest, [FN | RestFN], MaybeRelease) -> PDs = dict:erase(FN, Manifest#manifest.pending_deletes), clear_pending( Manifest#manifest{pending_deletes = PDs}, RestFN, - MaybeRelease). + MaybeRelease + ). -spec check_for_work(manifest()) -> {list(), integer()}. %% @doc @@ -711,13 +746,13 @@ check_for_work(Manifest) -> S = size(LevelIdx, Level), case S > MaxCount of true -> - {[LevelIdx|AccL], AccC + S - MaxCount}; + {[LevelIdx | AccL], AccC + S - MaxCount}; false -> {AccL, AccC} end end end, - lists:foldr(CheckLevelFun, {[], 0}, ?LEVEL_SCALEFACTOR). + lists:foldr(CheckLevelFun, {[], 0}, ?LEVEL_SCALEFACTOR). -spec is_basement(manifest(), integer()) -> boolean(). %% @doc @@ -733,13 +768,12 @@ is_basement(Manifest, Level) -> levelzero_present(Manifest) -> not is_empty(0, array:get(0, Manifest#manifest.levels)). - -spec check_bloom(manifest(), pid(), {integer(), integer()}) -> boolean(). %% @doc %% Check to see if a hash is present in a manifest entry by using the exported %% bloom filter check_bloom(Manifest, FP, Hash) -> - case dict:find(FP, Manifest#manifest.blooms) of + case dict:find(FP, Manifest#manifest.blooms) of {ok, Bloom} when is_binary(Bloom) -> leveled_ebloom:check_hash(Hash, Bloom); _ -> @@ -772,7 +806,8 @@ get_sstpids(Manifest) -> ME = get_manifest_entry(MaybeME), ME#manifest_entry.owner end, - LevelAsList), + LevelAsList + ), Acc ++ Pids end, lists:foldl(FoldFun, [], lists:seq(0, Manifest#manifest.basement)). @@ -786,7 +821,8 @@ get_sstpids(Manifest) -> leveled_codec:object_key(), pid(), string(), - leveled_ebloom:bloom()|none) -> manifest_entry(). + leveled_ebloom:bloom() | none +) -> manifest_entry(). new_entry(StartKey, EndKey, Owner, FileName, Bloom) -> #manifest_entry{ start_key = StartKey, @@ -809,14 +845,15 @@ entry_endkey(ME) -> ME#manifest_entry.end_key. entry_owner(ME) -> ME#manifest_entry.owner. -spec entry_filename(manifest_entry()) -> string(). -entry_filename(#manifest_entry{filename = FN}) when ?IS_DEF(FN)-> FN. +entry_filename(#manifest_entry{filename = FN}) when ?IS_DEF(FN) -> FN. %%%============================================================================ %%% Internal Functions %%%============================================================================ -spec get_manifest_entry( - {tuple(), manifest_entry()}|manifest_entry()) -> manifest_entry(). + {tuple(), manifest_entry()} | manifest_entry() +) -> manifest_entry(). %% @doc %% Manifest levels can have entries of two forms, use this if only interested %% in the latter form @@ -836,35 +873,41 @@ load_level(LevelIdx, Level, LoadFun, SQNFun) -> FN = ME#manifest_entry.filename, {P, Bloom} = LoadFun(FN, LevelIdx), SQN = SQNFun(P), - {[ME#manifest_entry{owner=P}|L_Out], + { + [ME#manifest_entry{owner = P} | L_Out], max(SQN, L_MaxSQN), - [FN|FileList], - [{P, Bloom}|BloomL]} + [FN | FileList], + [{P, Bloom} | BloomL] + } end, LowerLevelLoadFun = fun({EK, ME}, {L_Out, L_MaxSQN, FileList, BloomL}) -> FN = ME#manifest_entry.filename, {P, Bloom} = LoadFun(FN, LevelIdx), SQN = SQNFun(P), - {[{EK, ME#manifest_entry{owner=P}}|L_Out], + { + [{EK, ME#manifest_entry{owner = P}} | L_Out], max(SQN, L_MaxSQN), - [FN|FileList], - [{P, Bloom}|BloomL]} + [FN | FileList], + [{P, Bloom} | BloomL] + } end, case LevelIdx =< 1 of true -> lists:foldr(HigherLevelLoadFun, {[], 0, [], []}, Level); false -> - {L0, MaxSQN, Flist, UpdBloomL} = + {L0, MaxSQN, Flist, UpdBloomL} = lists:foldr( - LowerLevelLoadFun, - {[], 0, [], []}, + LowerLevelLoadFun, + {[], 0, [], []}, leveled_tree:to_list(Level) ), - {leveled_tree:from_orderedlist(L0, ?TREE_TYPE, ?TREE_WIDTH), - MaxSQN, + { + leveled_tree:from_orderedlist(L0, ?TREE_TYPE, ?TREE_WIDTH), + MaxSQN, Flist, - UpdBloomL} + UpdBloomL + } end. close_level(LevelIdx, Level, CloseEntryFun) when LevelIdx =< 1 -> @@ -935,9 +978,11 @@ measure_removals(Removals) -> end. remove_section(LevelIdx, Level, FirstEntry, SectionLength) -> - PredFun = pred_fun(LevelIdx, - FirstEntry#manifest_entry.start_key, - FirstEntry#manifest_entry.end_key), + PredFun = pred_fun( + LevelIdx, + FirstEntry#manifest_entry.start_key, + FirstEntry#manifest_entry.end_key + ), case LevelIdx =< 1 of true -> {LHS, RHS} = lists:splitwith(PredFun, Level), @@ -947,7 +992,8 @@ remove_section(LevelIdx, Level, FirstEntry, SectionLength) -> {LHS, RHS} = lists:splitwith(PredFun, leveled_tree:to_list(Level)), Post = lists:nthtail(SectionLength, RHS), leveled_tree:from_orderedlist( - lists:append([LHS, Post]), ?TREE_TYPE, ?TREE_WIDTH) + lists:append([LHS, Post]), ?TREE_TYPE, ?TREE_WIDTH + ) end. replace_entry(LevelIdx, Level, Removals, Additions) when LevelIdx =< 1 -> @@ -983,14 +1029,13 @@ replace_entry(LevelIdx, Level, Removals, Additions) -> end, UpdList = lists:append([LHS, lists:map(MapFun, Additions), Post]), leveled_tree:from_orderedlist(UpdList, ?TREE_TYPE, ?TREE_WIDTH). - update_pendingdeletes(ManSQN, Removals, PendingDeletes) -> DelFun = fun(E, Acc) -> dict:store(E#manifest_entry.filename, {ManSQN, E}, Acc) end, - Entries = + Entries = case is_list(Removals) of true -> Removals; @@ -1000,29 +1045,30 @@ update_pendingdeletes(ManSQN, Removals, PendingDeletes) -> lists:foldl(DelFun, PendingDeletes, Entries). -spec update_blooms( - list()|manifest_entry(), - list()|manifest_entry(), - blooms()) - -> {blooms(), list()}. + list() | manifest_entry(), + list() | manifest_entry(), + blooms() +) -> + {blooms(), list()}. %% @doc %% -%% The manifest is a Pid-> Bloom mappping for every Pid, and this needs to -%% be updated to represent the changes. However, the bloom would bloat out +%% The manifest is a Pid-> Bloom mappping for every Pid, and this needs to +%% be updated to represent the changes. However, the bloom would bloat out %% the stored manifest, so the bloom must be stripped from the manifest entry %% as part of this process update_blooms(Removals, Additions, Blooms) -> Additions0 = - case is_list(Additions) of + case is_list(Additions) of true -> Additions; false -> [Additions] end, - Removals0 = - case is_list(Removals) of + Removals0 = + case is_list(Removals) of true -> Removals; false -> [Removals] end, - RemFun = + RemFun = fun(R, BloomD) -> dict:erase(R#manifest_entry.owner, BloomD) end, @@ -1034,15 +1080,14 @@ update_blooms(Removals, Additions, Blooms) -> fun(A) -> A#manifest_entry{bloom = none} end, - + Blooms0 = lists:foldl(RemFun, Blooms, Removals0), Blooms1 = lists:foldl(AddFun, Blooms0, Additions0), {Blooms1, lists:map(StripFun, Additions0)}. - key_lookup_level(LevelIdx, [], _Key) when LevelIdx =< 1 -> false; -key_lookup_level(LevelIdx, [Entry|Rest], Key) when LevelIdx =< 1 -> +key_lookup_level(LevelIdx, [Entry | Rest], Key) when LevelIdx =< 1 -> case Entry#manifest_entry.end_key >= Key of true -> case Key >= Entry#manifest_entry.start_key of @@ -1066,9 +1111,8 @@ key_lookup_level(_LevelIdx, Level, Key) -> ME#manifest_entry.owner end. - range_lookup_int(Manifest, LevelIdx, StartKey, EndKey, MakePointerFun) -> - Range = + Range = case LevelIdx > Manifest#manifest.basement of true -> []; @@ -1081,7 +1125,7 @@ range_lookup_int(Manifest, LevelIdx, StartKey, EndKey, MakePointerFun) -> ) end, lists:map(MakePointerFun, Range). - + range_lookup_level(LevelIdx, Level, QStartKey, QEndKey) when LevelIdx =< 1 -> BeforeFun = fun(M) -> @@ -1089,9 +1133,9 @@ range_lookup_level(LevelIdx, Level, QStartKey, QEndKey) when LevelIdx =< 1 -> end, NotAfterFun = fun(M) -> - not - leveled_codec:endkey_passed( - QEndKey, M#manifest_entry.start_key) + not leveled_codec:endkey_passed( + QEndKey, M#manifest_entry.start_key + ) end, {_Before, MaybeIn} = lists:splitwith(BeforeFun, Level), {In, _After} = lists:splitwith(NotAfterFun, MaybeIn), @@ -1126,16 +1170,16 @@ filepath(RootPath, manifest) -> MFP. filepath(RootPath, NewMSN, current_manifest) -> - filepath(RootPath, manifest) ++ "nonzero_" - ++ integer_to_list(NewMSN) ++ "." ++ ?MANIFEST_FILEX; + filepath(RootPath, manifest) ++ "nonzero_" ++ + integer_to_list(NewMSN) ++ "." ++ ?MANIFEST_FILEX; filepath(RootPath, NewMSN, pending_manifest) -> - filepath(RootPath, manifest) ++ "nonzero_" - ++ integer_to_list(NewMSN) ++ "." ++ ?PENDING_FILEX. + filepath(RootPath, manifest) ++ "nonzero_" ++ + integer_to_list(NewMSN) ++ "." ++ ?PENDING_FILEX. open_manifestfile(_RootPath, L) when L == [] orelse L == [0] -> leveled_log:log(p0013, []), new_manifest(); -open_manifestfile(RootPath, [TopManSQN|Rest]) -> +open_manifestfile(RootPath, [TopManSQN | Rest]) -> CurrManFile = filepath(RootPath, TopManSQN, current_manifest), {ok, FileBin} = file:read_file(CurrManFile), <> = FileBin, @@ -1168,64 +1212,63 @@ new_pending_deletions() -> dict:new(). -include_lib("eunit/include/eunit.hrl"). -initial_setup() -> +initial_setup() -> initial_setup(single_change). initial_setup(Changes) -> E1 = #manifest_entry{ - start_key={i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld1">>}, <<"K8">>}, - end_key={i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld9">>}, <<"K93">>}, - filename="Z1", - owner=list_to_pid("<0.101.0>"), - bloom=none + start_key = {i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld1">>}, <<"K8">>}, + end_key = {i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld9">>}, <<"K93">>}, + filename = "Z1", + owner = list_to_pid("<0.101.0>"), + bloom = none }, E2 = #manifest_entry{ - start_key={i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld9">>}, <<"K97">>}, - end_key={o, <<"Bucket1">>, <<"K71">>, null}, - filename="Z2", - owner=list_to_pid("<0.102.0>"), - bloom=none + start_key = {i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld9">>}, <<"K97">>}, + end_key = {o, <<"Bucket1">>, <<"K71">>, null}, + filename = "Z2", + owner = list_to_pid("<0.102.0>"), + bloom = none }, E3 = #manifest_entry{ - start_key={o, <<"Bucket1">>, <<"K75">>, null}, - end_key={o, <<"Bucket1">>, <<"K993">>, null}, - filename="Z3", - owner=list_to_pid("<0.103.0>"), - bloom=none + start_key = {o, <<"Bucket1">>, <<"K75">>, null}, + end_key = {o, <<"Bucket1">>, <<"K993">>, null}, + filename = "Z3", + owner = list_to_pid("<0.103.0>"), + bloom = none }, E4 = #manifest_entry{ - start_key={i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld1">>}, <<"K8">>}, - end_key={i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld7">>}, <<"K93">>}, - filename="Z4", - owner=list_to_pid("<0.104.0>"), - bloom=none + start_key = {i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld1">>}, <<"K8">>}, + end_key = {i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld7">>}, <<"K93">>}, + filename = "Z4", + owner = list_to_pid("<0.104.0>"), + bloom = none }, E5 = #manifest_entry{ - start_key={i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld7">>}, <<"K97">>}, - end_key={o, <<"Bucket1">>, <<"K78">>, null}, - filename="Z5", - owner=list_to_pid("<0.105.0>"), - bloom=none + start_key = {i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld7">>}, <<"K97">>}, + end_key = {o, <<"Bucket1">>, <<"K78">>, null}, + filename = "Z5", + owner = list_to_pid("<0.105.0>"), + bloom = none }, - E6 = + E6 = #manifest_entry{ - start_key={o, <<"Bucket1">>, <<"K81">>, null}, - end_key={o, <<"Bucket1">>, <<"K996">>, null}, - filename="Z6", - owner=list_to_pid("<0.106.0>"), - bloom=none + start_key = {o, <<"Bucket1">>, <<"K81">>, null}, + end_key = {o, <<"Bucket1">>, <<"K996">>, null}, + filename = "Z6", + owner = list_to_pid("<0.106.0>"), + bloom = none }, - initial_setup(Changes, E1, E2, E3, E4, E5, E6). - + initial_setup(Changes, E1, E2, E3, E4, E5, E6). initial_setup(single_change, E1, E2, E3, E4, E5, E6) -> Man0 = new_manifest(), - + Man1 = insert_manifest_entry(Man0, 1, 1, E1), Man2 = insert_manifest_entry(Man1, 1, 1, E2), Man3 = insert_manifest_entry(Man2, 1, 1, E3), @@ -1236,7 +1279,7 @@ initial_setup(single_change, E1, E2, E3, E4, E5, E6) -> {Man0, Man1, Man2, Man3, Man4, Man5, Man6}; initial_setup(multi_change, E1, E2, E3, E4, E5, E6) -> Man0 = new_manifest(), - + Man1 = insert_manifest_entry(Man0, 1, 1, E1), Man2 = insert_manifest_entry(Man1, 2, 1, E2), Man3 = insert_manifest_entry(Man2, 3, 1, E3), @@ -1246,76 +1289,75 @@ initial_setup(multi_change, E1, E2, E3, E4, E5, E6) -> ?assertMatch(Man6, insert_manifest_entry(Man6, 6, 2, [])), {Man0, Man1, Man2, Man3, Man4, Man5, Man6}. - changeup_setup(Man6) -> E1 = #manifest_entry{ - start_key={i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld1">>}, <<"K8">>}, - end_key={i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld9">>}, <<"K93">>}, - filename="Z1", - owner=list_to_pid("<0.101.0>"), - bloom=none + start_key = {i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld1">>}, <<"K8">>}, + end_key = {i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld9">>}, <<"K93">>}, + filename = "Z1", + owner = list_to_pid("<0.101.0>"), + bloom = none }, E2 = #manifest_entry{ - start_key={i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld9">>}, <<"K97">>}, - end_key={o, <<"Bucket1">>, <<"K71">>, null}, - filename="Z2", - owner=list_to_pid("<0.102.0>"), - bloom=none + start_key = {i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld9">>}, <<"K97">>}, + end_key = {o, <<"Bucket1">>, <<"K71">>, null}, + filename = "Z2", + owner = list_to_pid("<0.102.0>"), + bloom = none }, E3 = #manifest_entry{ - start_key={o, <<"Bucket1">>, <<"K75">>, null}, - end_key={o, <<"Bucket1">>, <<"K993">>, null}, - filename="Z3", - owner=list_to_pid("<0.103.0>"), - bloom=none + start_key = {o, <<"Bucket1">>, <<"K75">>, null}, + end_key = {o, <<"Bucket1">>, <<"K993">>, null}, + filename = "Z3", + owner = list_to_pid("<0.103.0>"), + bloom = none }, - + E1_2 = #manifest_entry{ - start_key={i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld4">>}, <<"K8">>}, - end_key={i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld9">>}, <<"K62">>}, - owner=list_to_pid("<0.201.0>"), - filename="Y1", - bloom=none + start_key = {i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld4">>}, <<"K8">>}, + end_key = {i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld9">>}, <<"K62">>}, + owner = list_to_pid("<0.201.0>"), + filename = "Y1", + bloom = none }, E2_2 = #manifest_entry{ - start_key={i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld9">>}, <<"K67">>}, - end_key={o, <<"Bucket1">>, <<"K45">>, null}, - owner=list_to_pid("<0.202.0>"), - filename="Y2", - bloom=none + start_key = {i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld9">>}, <<"K67">>}, + end_key = {o, <<"Bucket1">>, <<"K45">>, null}, + owner = list_to_pid("<0.202.0>"), + filename = "Y2", + bloom = none }, E3_2 = #manifest_entry{ - start_key={o, <<"Bucket1">>, <<"K47">>, null}, - end_key={o, <<"Bucket1">>, <<"K812">>, null}, - owner=list_to_pid("<0.203.0>"), - filename="Y3", - bloom=none + start_key = {o, <<"Bucket1">>, <<"K47">>, null}, + end_key = {o, <<"Bucket1">>, <<"K812">>, null}, + owner = list_to_pid("<0.203.0>"), + filename = "Y3", + bloom = none }, E4_2 = #manifest_entry{ - start_key={o, <<"Bucket1">>, <<"K815">>, null}, - end_key={o, <<"Bucket1">>, <<"K998">>, null}, - owner=list_to_pid("<0.204.0>"), - filename="Y4", - bloom=none + start_key = {o, <<"Bucket1">>, <<"K815">>, null}, + end_key = {o, <<"Bucket1">>, <<"K998">>, null}, + owner = list_to_pid("<0.204.0>"), + filename = "Y4", + bloom = none }, - + Man7 = remove_manifest_entry(Man6, 2, 1, E1), Man8 = remove_manifest_entry(Man7, 2, 1, E2), Man9 = remove_manifest_entry(Man8, 2, 1, E3), - + Man10 = insert_manifest_entry(Man9, 2, 1, E1_2), Man11 = insert_manifest_entry(Man10, 2, 1, E2_2), Man12 = insert_manifest_entry(Man11, 2, 1, E3_2), Man13 = insert_manifest_entry(Man12, 2, 1, E4_2), % remove_manifest_entry(Manifest, ManSQN, Level, Entry) - + {Man7, Man8, Man9, Man10, Man11, Man12, Man13}. random_select_test() -> @@ -1340,7 +1382,6 @@ manifest_gc_test() -> ?assertMatch(true, length(ManifestL) > ?MANIFESTS_TO_RETAIN), ?assertMatch(?MANIFESTS_TO_RETAIN, length(FNs)). - keylookup_manifest_test() -> {Man0, Man1, Man2, Man3, _Man4, _Man5, Man6} = initial_setup(), LK1_1 = {o, <<"Bucket1">>, <<"K711">>, null}, @@ -1348,13 +1389,13 @@ keylookup_manifest_test() -> LK1_3 = {o, <<"Bucket1">>, <<"K71">>, null}, LK1_4 = {o, <<"Bucket1">>, <<"K75">>, null}, LK1_5 = {o, <<"Bucket1">>, <<"K76">>, null}, - + ?assertMatch(false, key_lookup(Man0, 1, LK1_1)), ?assertMatch(false, key_lookup(Man1, 1, LK1_1)), ?assertMatch(false, key_lookup(Man2, 1, LK1_1)), ?assertMatch(false, key_lookup(Man3, 1, LK1_1)), ?assertMatch(false, key_lookup(Man6, 1, LK1_1)), - + PZ2 = list_to_pid("<0.102.0>"), PZ3 = list_to_pid("<0.103.0>"), PZ5 = list_to_pid("<0.105.0>"), @@ -1363,31 +1404,30 @@ keylookup_manifest_test() -> ?assertMatch(PZ2, key_lookup(Man6, 1, LK1_3)), ?assertMatch(PZ3, key_lookup(Man6, 1, LK1_4)), ?assertMatch(PZ3, key_lookup(Man6, 1, LK1_5)), - + ?assertMatch(PZ5, key_lookup(Man6, 2, LK1_2)), ?assertMatch(PZ5, key_lookup(Man6, 2, LK1_3)), ?assertMatch(PZ5, key_lookup(Man6, 2, LK1_4)), ?assertMatch(PZ5, key_lookup(Man6, 2, LK1_5)), - - {_Man7, _Man8, _Man9, _Man10, _Man11, _Man12, - Man13} = changeup_setup(Man6), - + + {_Man7, _Man8, _Man9, _Man10, _Man11, _Man12, Man13} = changeup_setup(Man6), + ?assertMatch(false, key_lookup(Man0, 1, LK1_1)), ?assertMatch(false, key_lookup(Man1, 1, LK1_1)), ?assertMatch(false, key_lookup(Man2, 1, LK1_1)), ?assertMatch(false, key_lookup(Man3, 1, LK1_1)), ?assertMatch(false, key_lookup(Man6, 1, LK1_1)), - + ?assertMatch(PZ2, key_lookup(Man6, 1, LK1_2)), ?assertMatch(PZ2, key_lookup(Man6, 1, LK1_3)), ?assertMatch(PZ3, key_lookup(Man6, 1, LK1_4)), ?assertMatch(PZ3, key_lookup(Man6, 1, LK1_5)), - + ?assertMatch(PZ5, key_lookup(Man6, 2, LK1_2)), ?assertMatch(PZ5, key_lookup(Man6, 2, LK1_3)), ?assertMatch(PZ5, key_lookup(Man6, 2, LK1_4)), ?assertMatch(PZ5, key_lookup(Man6, 2, LK1_5)), - + PY3 = list_to_pid("<0.203.0>"), ?assertMatch(PY3, key_lookup(Man13, 1, LK1_4)), @@ -1398,19 +1438,19 @@ ext_keylookup_manifest_test() -> ok = leveled_penciller:clean_testdir(RP), {_Man0, _Man1, _Man2, _Man3, _Man4, _Man5, Man6} = initial_setup(), save_manifest(Man6, RP), - + E7 = #manifest_entry{ - start_key={o, <<"Bucket1">>, <<"K997">>, null}, - end_key={o, <<"Bucket1">>, <<"K999">>, null}, - filename="Z7", - owner=list_to_pid("<0.107.0>") + start_key = {o, <<"Bucket1">>, <<"K997">>, null}, + end_key = {o, <<"Bucket1">>, <<"K999">>, null}, + filename = "Z7", + owner = list_to_pid("<0.107.0>") }, Man7 = insert_manifest_entry(Man6, 2, 2, E7), save_manifest(Man7, RP), ManOpen1 = open_manifest(RP), ?assertMatch(2, get_manifest_sqn(ManOpen1)), - + Man7FN = filepath(RP, 2, current_manifest), Man7FNAlt = filename:rootname(Man7FN) ++ ".pnd", {ok, BytesCopied} = file:copy(Man7FN, Man7FNAlt), @@ -1419,79 +1459,79 @@ ext_keylookup_manifest_test() -> RandPos = rand:uniform(bit_size(Bin) - 1), <> = Bin, Flipped = BitToFlip bxor 1, - ok = file:write_file(Man7FN, - <>), - + ok = file:write_file( + Man7FN, + <> + ), + ?assertMatch(2, get_manifest_sqn(Man7)), - + ManOpen2 = open_manifest(RP), ?assertMatch(1, get_manifest_sqn(ManOpen2)), - + E1 = #manifest_entry{ - start_key={i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld1">>}, <<"K8">>}, - end_key={i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld9">>}, <<"K93">>}, - filename="Z1", - owner=list_to_pid("<0.101.0>"), - bloom=none + start_key = {i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld1">>}, <<"K8">>}, + end_key = {i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld9">>}, <<"K93">>}, + filename = "Z1", + owner = list_to_pid("<0.101.0>"), + bloom = none }, E2 = #manifest_entry{ - start_key={i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld9">>}, <<"K97">>}, - end_key={o, <<"Bucket1">>, <<"K71">>, null}, - filename="Z2", - owner=list_to_pid("<0.102.0>"), - bloom=none + start_key = {i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld9">>}, <<"K97">>}, + end_key = {o, <<"Bucket1">>, <<"K71">>, null}, + filename = "Z2", + owner = list_to_pid("<0.102.0>"), + bloom = none }, E3 = #manifest_entry{ - start_key={o, <<"Bucket1">>, <<"K75">>, null}, - end_key={o, <<"Bucket1">>, <<"K993">>, null}, - filename="Z3", - owner=list_to_pid("<0.103.0>"), - bloom=none + start_key = {o, <<"Bucket1">>, <<"K75">>, null}, + end_key = {o, <<"Bucket1">>, <<"K993">>, null}, + filename = "Z3", + owner = list_to_pid("<0.103.0>"), + bloom = none }, - + E1_2 = #manifest_entry{ - start_key={i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld4">>}, <<"K8">>}, - end_key={i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld9">>}, <<"K62">>}, - owner=list_to_pid("<0.201.0>"), - filename="Y1", - bloom=none + start_key = {i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld4">>}, <<"K8">>}, + end_key = {i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld9">>}, <<"K62">>}, + owner = list_to_pid("<0.201.0>"), + filename = "Y1", + bloom = none }, E2_2 = #manifest_entry{ - start_key={i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld9">>}, <<"K67">>}, - end_key={o, <<"Bucket1">>, <<"K45">>, null}, - owner=list_to_pid("<0.202.0>"), - filename="Y2", - bloom=none + start_key = {i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld9">>}, <<"K67">>}, + end_key = {o, <<"Bucket1">>, <<"K45">>, null}, + owner = list_to_pid("<0.202.0>"), + filename = "Y2", + bloom = none }, E3_2 = #manifest_entry{ - start_key={o, <<"Bucket1">>, <<"K47">>, null}, - end_key={o, <<"Bucket1">>, <<"K812">>, null}, - owner=list_to_pid("<0.203.0>"), - filename="Y3", - bloom=none + start_key = {o, <<"Bucket1">>, <<"K47">>, null}, + end_key = {o, <<"Bucket1">>, <<"K812">>, null}, + owner = list_to_pid("<0.203.0>"), + filename = "Y3", + bloom = none }, E4_2 = #manifest_entry{ - start_key={o, <<"Bucket1">>, <<"K815">>, null}, - end_key={o, <<"Bucket1">>, <<"K998">>, null}, - owner=list_to_pid("<0.104.0>"), - filename="Y4", - bloom=none + start_key = {o, <<"Bucket1">>, <<"K815">>, null}, + end_key = {o, <<"Bucket1">>, <<"K998">>, null}, + owner = list_to_pid("<0.104.0>"), + filename = "Y4", + bloom = none }, - + Man8 = replace_manifest_entry(ManOpen2, 2, 1, E1, E1_2), Man9 = remove_manifest_entry(Man8, 2, 1, [E2, E3]), Man10 = insert_manifest_entry(Man9, 2, 1, [E2_2, E3_2, E4_2]), ?assertMatch(2, get_manifest_sqn(Man10)), - + LK1_4 = {o, <<"Bucket1">>, <<"K75">>, null}, PY3 = list_to_pid("<0.203.0>"), @@ -1499,35 +1539,35 @@ ext_keylookup_manifest_test() -> ?assertMatch(PY3, key_lookup(Man10, 1, LK1_4)), ?assertMatch(PZ5, key_lookup(Man10, 2, LK1_4)), - + E5 = #manifest_entry{ - start_key={i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld7">>}, <<"K97">>}, - end_key={o, <<"Bucket1">>, <<"K78">>, null}, - filename="Z5", - owner=PZ5, - bloom=none + start_key = {i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld7">>}, <<"K97">>}, + end_key = {o, <<"Bucket1">>, <<"K78">>, null}, + filename = "Z5", + owner = PZ5, + bloom = none }, E6 = #manifest_entry{ - start_key={o, <<"Bucket1">>, <<"K81">>, null}, - end_key={o, <<"Bucket1">>, <<"K996">>, null}, - filename="Z6", - owner=list_to_pid("<0.106.0>"), - bloom=none + start_key = {o, <<"Bucket1">>, <<"K81">>, null}, + end_key = {o, <<"Bucket1">>, <<"K996">>, null}, + filename = "Z6", + owner = list_to_pid("<0.106.0>"), + bloom = none }, - + Man11 = remove_manifest_entry(Man10, 3, 2, [E5, E6]), ?assertMatch(3, get_manifest_sqn(Man11)), ?assertMatch(false, key_lookup(Man11, 2, LK1_4)), - + Man12 = replace_manifest_entry(Man11, 4, 2, E2_2, E5), ?assertMatch(4, get_manifest_sqn(Man12)), ?assertMatch(PZ5, key_lookup(Man12, 2, LK1_4)). rangequery_manifest_test() -> {_Man0, _Man1, _Man2, _Man3, _Man4, _Man5, Man6} = initial_setup(), - + PidMapFun = fun(Pointer) -> {next, ME, _SK} = Pointer, @@ -1541,7 +1581,7 @@ rangequery_manifest_test() -> PY1 = list_to_pid("<0.201.0>"), PY3 = list_to_pid("<0.203.0>"), PY4 = list_to_pid("<0.204.0>"), - + SK1 = {o, <<"Bucket1">>, <<"K711">>, null}, EK1 = {o, <<"Bucket1">>, <<"K999">>, null}, RL1_1 = lists:map(PidMapFun, range_lookup(Man6, 1, SK1, EK1)), @@ -1554,24 +1594,23 @@ rangequery_manifest_test() -> ?assertMatch([PZ1], RL2_1), RL2_2 = lists:map(PidMapFun, range_lookup(Man6, 2, SK2, EK2)), ?assertMatch([PZ5], RL2_2), - + SK3 = {o, <<"Bucket1">>, <<"K994">>, null}, EK3 = {o, <<"Bucket1">>, <<"K995">>, null}, RL3_1 = lists:map(PidMapFun, range_lookup(Man6, 1, SK3, EK3)), ?assertMatch([], RL3_1), RL3_2 = lists:map(PidMapFun, range_lookup(Man6, 2, SK3, EK3)), ?assertMatch([PZ6], RL3_2), - - {_Man7, _Man8, _Man9, _Man10, _Man11, _Man12, - Man13} = changeup_setup(Man6), - + + {_Man7, _Man8, _Man9, _Man10, _Man11, _Man12, Man13} = changeup_setup(Man6), + RL1_1A = lists:map(PidMapFun, range_lookup(Man6, 1, SK1, EK1)), ?assertMatch([PZ3], RL1_1A), RL2_1A = lists:map(PidMapFun, range_lookup(Man6, 1, SK2, EK2)), ?assertMatch([PZ1], RL2_1A), RL3_1A = lists:map(PidMapFun, range_lookup(Man6, 1, SK3, EK3)), ?assertMatch([], RL3_1A), - + RL1_1B = lists:map(PidMapFun, range_lookup(Man13, 1, SK1, EK1)), ?assertMatch([PY3, PY4], RL1_1B), RL2_1B = lists:map(PidMapFun, range_lookup(Man13, 1, SK2, EK2)), @@ -1582,12 +1621,13 @@ rangequery_manifest_test() -> levelzero_present_test() -> E0 = #manifest_entry{ - start_key={i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld1">>}, <<"K8">>}, - end_key={o, <<"Bucket1">>, <<"Key996">>, null}, - filename="Z0", - owner=list_to_pid("<0.101.0>"), - bloom=none}, - + start_key = {i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld1">>}, <<"K8">>}, + end_key = {o, <<"Bucket1">>, <<"Key996">>, null}, + filename = "Z0", + owner = list_to_pid("<0.101.0>"), + bloom = none + }, + Man0 = new_manifest(), ?assertMatch(false, levelzero_present(Man0)), % insert_manifest_entry(Manifest, ManSQN, Level, Entry) @@ -1610,29 +1650,29 @@ snapshot_release_test() -> Man6 = element(7, initial_setup()), E1 = #manifest_entry{ - start_key={i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld1">>}, <<"K8">>}, - end_key={i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld9">>}, <<"K93">>}, - filename="Z1", - owner=list_to_pid("<0.101.0>"), - bloom=none + start_key = {i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld1">>}, <<"K8">>}, + end_key = {i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld9">>}, <<"K93">>}, + filename = "Z1", + owner = list_to_pid("<0.101.0>"), + bloom = none }, E2 = #manifest_entry{ - start_key={i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld9">>}, <<"K97">>}, - end_key={o, <<"Bucket1">>, <<"K71">>, null}, - filename="Z2", - owner=list_to_pid("<0.102.0>"), - bloom=none + start_key = {i, <<"Bucket1">>, {<<"Idx1">>, <<"Fld9">>}, <<"K97">>}, + end_key = {o, <<"Bucket1">>, <<"K71">>, null}, + filename = "Z2", + owner = list_to_pid("<0.102.0>"), + bloom = none }, E3 = #manifest_entry{ - start_key={o, <<"Bucket1">>, <<"K75">>, null}, - end_key={o, <<"Bucket1">>, <<"K993">>, null}, - filename="Z3", - owner=list_to_pid("<0.103.0>"), - bloom=none + start_key = {o, <<"Bucket1">>, <<"K75">>, null}, + end_key = {o, <<"Bucket1">>, <<"K993">>, null}, + filename = "Z3", + owner = list_to_pid("<0.103.0>"), + bloom = none }, - + Man7 = add_snapshot(Man6, PidA1, 3600), Man8 = remove_manifest_entry(Man7, 2, 1, E1), Man9 = add_snapshot(Man8, PidA2, 3600), @@ -1640,34 +1680,33 @@ snapshot_release_test() -> Man11 = add_snapshot(Man10, PidA3, 3600), Man12 = remove_manifest_entry(Man11, 4, 1, E3), Man13 = add_snapshot(Man12, PidA4, 3600), - + ?assertMatch(false, element(1, ready_to_delete_combined(Man8, "Z1"))), ?assertMatch(false, element(1, ready_to_delete_combined(Man10, "Z2"))), ?assertMatch(false, element(1, ready_to_delete_combined(Man12, "Z3"))), - + Man14 = release_snapshot(Man13, PidA1), ?assertMatch(false, element(1, ready_to_delete_combined(Man14, "Z2"))), ?assertMatch(false, element(1, ready_to_delete_combined(Man14, "Z3"))), {Bool14, Man15} = ready_to_delete_combined(Man14, "Z1"), ?assertMatch(true, Bool14), - + %This doesn't change anything - released snaphsot not the min Man16 = release_snapshot(Man15, PidA4), ?assertMatch(false, element(1, ready_to_delete_combined(Man16, "Z2"))), ?assertMatch(false, element(1, ready_to_delete_combined(Man16, "Z3"))), - + Man17 = release_snapshot(Man16, PidA2), ?assertMatch(false, element(1, ready_to_delete_combined(Man17, "Z3"))), {Bool17, Man18} = ready_to_delete_combined(Man17, "Z2"), ?assertMatch(true, Bool17), - + Man19 = release_snapshot(Man18, PidA3), - + io:format("MinSnapSQN ~w~n", [Man19#manifest.min_snapshot_sqn]), - + {Bool19, _Man20} = ready_to_delete_combined(Man19, "Z3"), ?assertMatch(true, Bool19). - snapshot_timeout_test() -> PidA1 = spawn(fun() -> ok end), @@ -1683,78 +1722,72 @@ snapshot_timeout_test() -> ?assertMatch(0, length(Man10#manifest.snapshots)). potential_issue_test() -> - Manifest = + Manifest = {manifest, - {array,9,0,[], - { - [], - [ - {manifest_entry, - {o_rkv, <<"Bucket">>, <<"Key10">>, null}, - {o_rkv, <<"Bucket">>, <<"Key12949">>,null}, - list_to_pid("<0.313.0>"), - "./16_1_0.sst", - none - }, - {manifest_entry, - {o_rkv, <<"Bucket">>, <<"Key129490">>, null}, - {o_rkv, <<"Bucket">>, <<"Key158981">>, null}, - list_to_pid("<0.315.0>"), - "./16_1_1.sst", - none - }, - {manifest_entry, - {o_rkv, <<"Bucket">>, <<"Key158982">>, null}, - {o_rkv, <<"Bucket">>, <<"Key188472">>, null}, - list_to_pid("<0.316.0>"), - "./16_1_2.sst", - none - } - ], - { - idxt, - 1, + {array, 9, 0, [], { + [], + [ + {manifest_entry, + {o_rkv, <<"Bucket">>, <<"Key10">>, null}, + {o_rkv, <<"Bucket">>, <<"Key12949">>, null}, + list_to_pid("<0.313.0>"), "./16_1_0.sst", none}, + {manifest_entry, + {o_rkv, <<"Bucket">>, <<"Key129490">>, null}, + {o_rkv, <<"Bucket">>, <<"Key158981">>, null}, + list_to_pid("<0.315.0>"), "./16_1_1.sst", none}, + {manifest_entry, + {o_rkv, <<"Bucket">>, <<"Key158982">>, null}, + {o_rkv, <<"Bucket">>, <<"Key188472">>, null}, + list_to_pid("<0.316.0>"), "./16_1_2.sst", none} + ], { - [ - {{o_rkv, <<"Bucket1">>, <<"Key1">>, null}, - { - manifest_entry, - {o_rkv, <<"Bucket">>, <<"Key9083">>, null}, - {o_rkv, <<"Bucket1">>, <<"Key1">>, null}, - list_to_pid("<0.320.0>"), - "./16_1_6.sst", - none - } - } - ] + idxt, + 1, + { + { + [ + {{o_rkv, <<"Bucket1">>, <<"Key1">>, null}, { + manifest_entry, + {o_rkv, <<"Bucket">>, <<"Key9083">>, + null}, + {o_rkv, <<"Bucket1">>, <<"Key1">>, + null}, + list_to_pid("<0.320.0>"), + "./16_1_6.sst", + none + }} + ] + }, + {1, { + {o_rkv, <<"Bucket1">>, <<"Key1">>, null}, + 1, + nil, + nil + }} + } }, - {1, {{o_rkv, <<"Bucket1">> ,<<"Key1">> ,null},1,nil,nil}}}}, - {idxt,0,{{},{0,nil}}}, - {idxt,0,{{},{0,nil}}}, - {idxt,0,{{},{0,nil}}}, - {idxt,0,{{},{0,nil}}}, - {idxt,0,{{},{0,nil}}}, - {idxt,0,{{},{0,nil}}}, - []}}, - 19, - [], - 0, - new_pending_deletions(), - 2, - new_blooms()}, - Range1 = + {idxt, 0, {{}, {0, nil}}}, + {idxt, 0, {{}, {0, nil}}}, + {idxt, 0, {{}, {0, nil}}}, + {idxt, 0, {{}, {0, nil}}}, + {idxt, 0, {{}, {0, nil}}}, + {idxt, 0, {{}, {0, nil}}}, + [] + }}, + 19, [], 0, new_pending_deletions(), 2, new_blooms()}, + Range1 = range_lookup( - Manifest, - 1, - {o_rkv, <<"Bucket">>, null, null}, + Manifest, + 1, + {o_rkv, <<"Bucket">>, null, null}, {o_rkv, <<"Bucket">>, null, null} ), Range2 = range_lookup( - Manifest, - 2, - {o_rkv, <<"Bucket">>, null, null}, + Manifest, + 2, + {o_rkv, <<"Bucket">>, null, null}, {o_rkv, <<"Bucket">>, null, null} ), io:format("Range in Level 1 ~w~n", [Range1]), @@ -1762,5 +1795,4 @@ potential_issue_test() -> ?assertMatch(3, length(Range1)), ?assertMatch(1, length(Range2)). - -endif. diff --git a/src/leveled_pmem.erl b/src/leveled_pmem.erl index db97a998..f3fe9b9e 100644 --- a/src/leveled_pmem.erl +++ b/src/leveled_pmem.erl @@ -29,24 +29,25 @@ -include("leveled.hrl"). -export([ - prepare_for_index/2, - add_to_cache/5, - to_list/2, - check_levelzero/3, - check_levelzero/4, - merge_trees/4, - add_to_index/3, - new_index/0, - check_index/2, - cache_full/1 - ]). + prepare_for_index/2, + add_to_cache/5, + to_list/2, + check_levelzero/3, + check_levelzero/4, + merge_trees/4, + add_to_index/3, + new_index/0, + check_index/2, + cache_full/1 +]). % Test functions to ignore for equalizer - due to array issues -eqwalizer({nowarn_function, index_performance_test/0}). --define(MAX_CACHE_LINES, 31). % Must be less than 128 +% Must be less than 128 +-define(MAX_CACHE_LINES, 31). --type index_array() :: list(array:array(binary()))|none. +-type index_array() :: list(array:array(binary())) | none. -export_type([index_array/0]). @@ -61,7 +62,8 @@ cache_full(L0Cache) -> length(L0Cache) == ?MAX_CACHE_LINES. -spec prepare_for_index( - array:array(binary()), leveled_codec:segment_hash()) -> array:array(). + array:array(binary()), leveled_codec:segment_hash() +) -> array:array(). %% @doc %% Add the hash of a key to the index. This is 'prepared' in the sense that %% this index is not use until it is loaded into the main index. @@ -77,15 +79,18 @@ prepare_for_index(IndexArray, Hash) -> array:set(Slot, <>, IndexArray). -spec add_to_index( - array:array(binary()), index_array(), integer()) -> index_array(). + array:array(binary()), index_array(), integer() +) -> index_array(). %% @doc %% Expand the penciller's current index array with the details from a new %% ledger cache tree sent from the Bookie. The tree will have a cache slot %% which is the index of this ledger_cache in the list of the ledger_caches add_to_index( - LM1Array, L0Index, CacheSlot) - when CacheSlot < 128, L0Index =/= none -> - [LM1Array|L0Index]. + LM1Array, L0Index, CacheSlot +) when + CacheSlot < 128, L0Index =/= none +-> + [LM1Array | L0Index]. -spec new_index() -> array:array(binary()). %% @doc @@ -94,8 +99,8 @@ new_index() -> % eqwalizer:ignore - array does contain binary() array:new([{size, 256}, {default, <<>>}]). --spec check_index(leveled_codec:segment_hash(), index_array()) - -> list(non_neg_integer()). +-spec check_index(leveled_codec:segment_hash(), index_array()) -> + list(non_neg_integer()). %% @doc %% return a list of positions in the list of cache arrays that may contain the %% key associated with the hash being checked @@ -106,20 +111,22 @@ check_index(Hash, L0Index) when L0Index =/= none -> fun(A, {SlotC, PosList}) -> B = array:get(Slot, A), case find_pos(B, H0) of - true -> {SlotC + 1, [SlotC|PosList]}; + true -> {SlotC + 1, [SlotC | PosList]}; false -> {SlotC + 1, PosList} end end, {1, []}, - L0Index), - lists:reverse(Positions). + L0Index + ), + lists:reverse(Positions). -spec add_to_cache( integer(), {tuple(), integer(), integer()}, integer(), list(), - boolean()) -> {integer(), integer(), list()}|empty_push. + boolean() +) -> {integer(), integer(), list()} | empty_push. %% @doc %% The penciller's cache is a list of leveled_trees, this adds a new tree to %% that cache, providing an update to the approximate size of the cache and @@ -134,14 +141,13 @@ add_to_cache(L0Size, {LM1, MinSQN, MaxSQN}, LedgerSQN, TreeList, Writeable) -> {_, LM1Size} -> if MinSQN >= LedgerSQN -> - {MaxSQN, - L0Size + LM1Size, - [LM1|TreeList]} + {MaxSQN, L0Size + LM1Size, [LM1 | TreeList]} end end. -spec to_list( - integer(), fun((pos_integer()) -> leveled_tree:leveled_tree())) -> list(). + integer(), fun((pos_integer()) -> leveled_tree:leveled_tree()) +) -> list(). %% @doc %% The cache is a list of leveled_trees of length Slots. This will fetch %% each tree in turn by slot ID and then produce a merged/sorted output of @@ -152,18 +158,20 @@ add_to_cache(L0Size, {LM1, MinSQN, MaxSQN}, LedgerSQN, TreeList, Writeable) -> to_list(Slots, FetchFun) -> SW = os:timestamp(), SlotList = lists:seq(1, Slots), - FullList = lists:foldl(fun(Slot, Acc) -> - Tree = FetchFun(Slot), - L = leveled_tree:to_list(Tree), - lists:ukeymerge(1, Acc, L) - end, - [], - SlotList), + FullList = lists:foldl( + fun(Slot, Acc) -> + Tree = FetchFun(Slot), + L = leveled_tree:to_list(Tree), + lists:ukeymerge(1, Acc, L) + end, + [], + SlotList + ), leveled_log:log_timer(pm002, [length(FullList)], SW), FullList. --spec check_levelzero(tuple(), list(integer()), list()) - -> {boolean(), tuple|not_found}. +-spec check_levelzero(tuple(), list(integer()), list()) -> + {boolean(), tuple | not_found}. %% @doc %% Check for the presence of a given Key in the Level Zero cache, with the %% index array having been checked first for a list of potential positions @@ -173,8 +181,8 @@ to_list(Slots, FetchFun) -> check_levelzero(Key, PosList, TreeList) -> check_levelzero(Key, leveled_codec:segment_hash(Key), PosList, TreeList). --spec check_levelzero(tuple(), {integer(), integer()}, list(integer()), list()) - -> {boolean(), tuple|not_found}. +-spec check_levelzero(tuple(), {integer(), integer()}, list(integer()), list()) -> + {boolean(), tuple | not_found}. %% @doc %% Check for the presence of a given Key in the Level Zero cache, with the %% index array having been checked first for a list of potential positions @@ -197,15 +205,16 @@ merge_trees(StartKey, EndKey, TreeList, LevelMinus1) -> lists:foldl( fun(Tree, Acc) -> R = leveled_tree:match_range(StartKey, EndKey, Tree), - lists:ukeymerge(1, Acc, R) end, - [], - [LevelMinus1|TreeList]). + lists:ukeymerge(1, Acc, R) + end, + [], + [LevelMinus1 | TreeList] + ). %%%============================================================================ %%% Internal Functions %%%============================================================================ - find_pos(<<>>, _Hash) -> false; find_pos(<>, Hash) -> @@ -213,7 +222,6 @@ find_pos(<>, Hash) -> find_pos(<<_Miss:24/integer, T/binary>>, Hash) -> find_pos(T, Hash). - split_hash({SegmentID, ExtraHash}) -> Slot = SegmentID band 255, H0 = (SegmentID bsr 8) bor (ExtraHash bsl 8), @@ -234,7 +242,7 @@ check_slotlist(Key, _Hash, CheckList, TreeList) -> {true, {Key, Value}} end end - end, + end, lists:foldl(SlotCheckFun, {false, not_found}, CheckList). %%%============================================================================ @@ -250,7 +258,7 @@ generate_randomkeys_aslist(Seqn, Count, BucketRangeLow, BucketRangeHigh) -> 1, generate_randomkeys(Seqn, Count, [], BucketRangeLow, BucketRangeHigh) ). - + generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) -> KVL = generate_randomkeys(Seqn, Count, [], BucketRangeLow, BucketRangeHigh), @@ -261,23 +269,23 @@ generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) -> generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) -> BNumber = lists:flatten( - io_lib:format("~4..0B", - [BucketLow + rand:uniform(BRange)])), + io_lib:format( + "~4..0B", + [BucketLow + rand:uniform(BRange)] + ) + ), KNumber = lists:flatten(io_lib:format("~4..0B", [rand:uniform(1000)])), {K, V} = { - {o, - list_to_binary("Bucket" ++ BNumber), - list_to_binary("Key" ++ KNumber), - null}, + {o, list_to_binary("Bucket" ++ BNumber), + list_to_binary("Key" ++ KNumber), null}, {Seqn, {active, infinity}, null} }, - generate_randomkeys(Seqn + 1, Count - 1, [{K, V}|Acc], BucketLow, BRange). - + generate_randomkeys(Seqn + 1, Count - 1, [{K, V} | Acc], BucketLow, BRange). compare_method_test() -> - R = + R = lists:foldl( fun(_X, {LedgerSQN, L0Size, L0TreeList}) -> LM1 = generate_randomkeys(LedgerSQN + 1, 2000, 1, 500), @@ -286,60 +294,63 @@ compare_method_test() -> {LM1, LedgerSQN + 1, LedgerSQN + 2000}, LedgerSQN, L0TreeList, - true) + true + ) end, {0, 0, []}, - lists:seq(1, 16)), + lists:seq(1, 16) + ), {SQN, Size, TreeList} = R, ?assertMatch(32000, SQN), ?assertMatch(true, Size =< 32000), - + TestList = leveled_tree:to_list(generate_randomkeys(1, 2000, 1, 800)), FindKeyFun = fun(Key) -> - fun(Tree, {Found, KV}) -> - case Found of - true -> - {true, KV}; - false -> - L0 = leveled_tree:match(Key, Tree), - case L0 of - none -> - {false, not_found}; - {value, Value} -> - {true, {Key, Value}} - end - end + fun(Tree, {Found, KV}) -> + case Found of + true -> + {true, KV}; + false -> + L0 = leveled_tree:match(Key, Tree), + case L0 of + none -> + {false, not_found}; + {value, Value} -> + {true, {Key, Value}} + end end - end, - + end + end, + S0 = lists:foldl( fun({Key, _V}, Acc) -> R0 = lists:foldl( - FindKeyFun(Key), {false, not_found}, TreeList), - [R0|Acc] + FindKeyFun(Key), {false, not_found}, TreeList + ), + [R0 | Acc] end, [], - TestList) - , - + TestList + ), + PosList = lists:seq(1, length(TreeList)), S1 = lists:foldl( fun({Key, _V}, Acc) -> R0 = check_levelzero(Key, PosList, TreeList), - [R0|Acc] + [R0 | Acc] end, - [], - TestList - ), - + [], + TestList + ), + ?assertMatch(S0, S1), - + StartKey = {o, <<"Bucket0100">>, null, null}, EndKey = {o, <<"Bucket0200">>, null, null}, SWa = os:timestamp(), @@ -351,7 +362,7 @@ compare_method_test() -> P = leveled_codec:endkey_passed(EndKey, K), case {K, P} of {K, false} when K >= StartKey -> - [{K, V}|Acc]; + [{K, V} | Acc]; _ -> Acc end @@ -366,11 +377,14 @@ compare_method_test() -> [timer:now_diff(os:timestamp(), SWa), Sz0] ), SWb = os:timestamp(), - Q1 = merge_trees(StartKey, EndKey, TreeList, leveled_tree:empty(?CACHE_TYPE)), + Q1 = merge_trees( + StartKey, EndKey, TreeList, leveled_tree:empty(?CACHE_TYPE) + ), Sz1 = length(Q1), io:format( "Merge method took ~w microseconds resulting in tree of size ~w~n", - [timer:now_diff(os:timestamp(), SWb), Sz1]), + [timer:now_diff(os:timestamp(), SWb), Sz1] + ), ?assertMatch(Sz0, Sz1). with_index_test_() -> @@ -387,19 +401,22 @@ with_index_test2() -> fun(_X, {{LedgerSQN, L0Size, L0TreeList}, L0Idx, SrcList}) -> LM1 = generate_randomkeys_aslist(LedgerSQN + 1, 2000, 1, 500), LM1Array = lists:foldl(IndexPrepareFun, new_index(), LM1), - LM1SL = leveled_tree:from_orderedlist(lists:ukeysort(1, LM1), ?CACHE_TYPE), + LM1SL = leveled_tree:from_orderedlist( + lists:ukeysort(1, LM1), ?CACHE_TYPE + ), UpdL0Index = add_to_index(LM1Array, L0Idx, length(L0TreeList) + 1), R = add_to_cache( - L0Size, + L0Size, {LM1SL, LedgerSQN + 1, LedgerSQN + 2000}, LedgerSQN, L0TreeList, - true), + true + ), {R, UpdL0Index, lists:ukeymerge(1, LM1, SrcList)} end, - + R0 = lists:foldl(LoadFun, {{0, 0, []}, [], []}, lists:seq(1, 16)), - + {{SQN, Size, TreeList}, L0Index, SrcKVL} = R0, ?assertMatch(32000, SQN), ?assertMatch(true, Size =< 32000), @@ -408,13 +425,14 @@ with_index_test2() -> fun({K, V}, {L0Idx, L0Cache}) -> H = leveled_codec:segment_hash(K), PosList = check_index(H, L0Idx), - ?assertMatch({true, {K, V}}, - check_slotlist(K, H, PosList, L0Cache)), + ?assertMatch( + {true, {K, V}}, + check_slotlist(K, H, PosList, L0Cache) + ), {L0Idx, L0Cache} end, - + _R1 = lists:foldl(CheckFun, {L0Index, TreeList}, SrcKVL). - index_performance_test() -> LM1 = generate_randomkeys_aslist(1, 2000, 1, 500), @@ -427,41 +445,46 @@ index_performance_test() -> lists:foldl( fun(H, A) -> prepare_for_index(A, H) end, new_index(), - HL1), + HL1 + ), io:format( - user, + user, "~nPrepare single index takes ~w microsec~n", - [timer:now_diff(os:timestamp(), SWP)]), - + [timer:now_diff(os:timestamp(), SWP)] + ), + SWL = os:timestamp(), - PMI1 = + PMI1 = lists:foldl( - fun(I, Idx) -> add_to_index(A1, Idx, I) end, [], lists:seq(1, 8)), + fun(I, Idx) -> add_to_index(A1, Idx, I) end, [], lists:seq(1, 8) + ), io:format( - user, + user, "Appending to array takes ~w microsec~n", - [timer:now_diff(os:timestamp(), SWL)]), - + [timer:now_diff(os:timestamp(), SWL)] + ), + SWC1 = os:timestamp(), R0 = lists:seq(1, 8), lists:foreach(fun(H) -> ?assertMatch(R0, check_index(H, PMI1)) end, HL1), io:format( - user, + user, "Checking 2000 matches in array at each level takes ~w microsec~n", - [timer:now_diff(os:timestamp(), SWC1)]), - + [timer:now_diff(os:timestamp(), SWC1)] + ), + SWC2 = os:timestamp(), - FPT = + FPT = lists:foldl( fun(H, FPC) -> FPC + length(check_index(H, PMI1)) end, 0, - HL2), + HL2 + ), io:format( - user, + user, "Checking 2000 misses in array at each level takes ~w microsec " ++ - "with ~w false positives~n", - [timer:now_diff(os:timestamp(), SWC2), FPT]). - - + "with ~w false positives~n", + [timer:now_diff(os:timestamp(), SWC2), FPT] + ). -endif. diff --git a/src/leveled_runner.erl b/src/leveled_runner.erl index de6ad80e..6519b0c1 100644 --- a/src/leveled_runner.erl +++ b/src/leveled_runner.erl @@ -1,67 +1,68 @@ %% -------- RUNNER --------- %% -%% A bookie's runner would traditionally allow remote actors to place bets -%% via the runner. In this case the runner will allow a remote actor to +%% A bookie's runner would traditionally allow remote actors to place bets +%% via the runner. In this case the runner will allow a remote actor to %% have query access to the ledger or journal. Runners provide a snapshot of -%% the book for querying the backend. +%% the book for querying the backend. %% -%% Runners implement the {async, Folder} within Riak backends - returning an -%% {async, Runner}. Runner is just a function that provides access to a +%% Runners implement the {async, Folder} within Riak backends - returning an +%% {async, Runner}. Runner is just a function that provides access to a %% snapshot of the database to allow for a particular query. The %% Runner may make the snapshot at the point it is called, or the snapshot can -%% be generated and encapsulated within the function (known as snap_prefold). +%% be generated and encapsulated within the function (known as snap_prefold). %% -%% Runners which view only the Ledger (the Penciller view of the state) may -%% have a CheckPresence boolean - which causes the function to perform a basic -%% check that the item is available in the Journal via the Inker as part of -%% the fold. This may be useful for anti-entropy folds +%% Runners which view only the Ledger (the Penciller view of the state) may +%% have a CheckPresence boolean - which causes the function to perform a basic +%% check that the item is available in the Journal via the Inker as part of +%% the fold. This may be useful for anti-entropy folds -module(leveled_runner). -include("leveled.hrl"). -export([ - bucket_sizestats/3, - bucket_list/4, - bucket_list/5, - index_query/3, - bucketkey_query/4, - bucketkey_query/6, - hashlist_query/3, - tictactree/5, - foldheads_allkeys/7, - foldobjects_allkeys/4, - foldheads_bybucket/8, - foldobjects_bybucket/4, - foldobjects_byindex/3 - ]). + bucket_sizestats/3, + bucket_list/4, + bucket_list/5, + index_query/3, + bucketkey_query/4, + bucketkey_query/6, + hashlist_query/3, + tictactree/5, + foldheads_allkeys/7, + foldobjects_allkeys/4, + foldheads_bybucket/8, + foldobjects_bybucket/4, + foldobjects_byindex/3 +]). -define(CHECKJOURNAL_PROB, 0.2). --type key_range() - :: {leveled_codec:query_key(), leveled_codec:query_key()}. +-type key_range() :: + {leveled_codec:query_key(), leveled_codec:query_key()}. -type foldacc() :: any(). - % Can't currently be specific about what an acc might be - --type fold_objects_fun() - :: fun((leveled_codec:key(), leveled_codec:key(), any(), foldacc()) - -> foldacc()). --type fold_keys_fun() - :: fun((leveled_codec:key(), leveled_codec:key(), foldacc()) - -> foldacc()). --type fold_buckets_fun() - :: fun((leveled_codec:key(), foldacc()) -> foldacc()). --type fold_filter_fun() - :: fun((leveled_codec:key(), leveled_codec:key()) -> accumulate|pass). - --type snap_fun() - :: fun(() -> {ok, pid(), pid()|null, fun(() -> ok)}). --type runner_fun() - :: fun(() -> foldacc()). --type objectacc_fun() - :: fun((leveled_codec:object_key(), any(), foldacc()) -> foldacc()). --type mp() - :: any(). +% Can't currently be specific about what an acc might be + +%% erlfmt:ignore - issues with editors when function definitions are split +-type fold_objects_fun() :: + fun((leveled_codec:key(), leveled_codec:key(), any(), foldacc()) + -> foldacc() + ). +-type fold_keys_fun() :: + fun((leveled_codec:key(), leveled_codec:key(), foldacc()) -> foldacc()). +-type fold_buckets_fun() :: + fun((leveled_codec:key(), foldacc()) -> foldacc()). +-type fold_filter_fun() :: + fun((leveled_codec:key(), leveled_codec:key()) -> accumulate | pass). + +-type snap_fun() :: + fun(() -> {ok, pid(), pid() | null, fun(() -> ok)}). +-type runner_fun() :: + fun(() -> foldacc()). +-type objectacc_fun() :: + fun((leveled_codec:object_key(), any(), foldacc()) -> foldacc()). +-type mp() :: + any(). -export_type([fold_keys_fun/0, mp/0]). @@ -69,56 +70,64 @@ %%% External functions %%%============================================================================ - --spec bucket_sizestats(snap_fun(),leveled_codec:key(), leveled_codec:tag()) - -> {async, runner_fun()}. +-spec bucket_sizestats(snap_fun(), leveled_codec:key(), leveled_codec:tag()) -> + {async, runner_fun()}. %% @doc %% Fold over a bucket accumulating the count of objects and their total sizes bucket_sizestats(SnapFun, Bucket, Tag) -> StartKey = leveled_codec:to_querykey(Bucket, null, Tag), EndKey = leveled_codec:to_querykey(Bucket, null, Tag), AccFun = accumulate_size(), - Runner = + Runner = fun() -> {ok, LedgerSnap, _JournalSnap, AfterFun} = SnapFun(), - Acc = + Acc = leveled_penciller:pcl_fetchkeys( - LedgerSnap, StartKey, EndKey, AccFun, {0, 0}, as_pcl), + LedgerSnap, StartKey, EndKey, AccFun, {0, 0}, as_pcl + ), AfterFun(), Acc end, {async, Runner}. --spec bucket_list(snap_fun(), - leveled_codec:tag(), - fold_buckets_fun(), foldacc()) -> {async, runner_fun()}. +-spec bucket_list( + snap_fun(), + leveled_codec:tag(), + fold_buckets_fun(), + foldacc() +) -> {async, runner_fun()}. %% @doc -%% List buckets for tag, assuming bucket names are all either binary, ascii -%% strings or integers +%% List buckets for tag, assuming bucket names are all either binary, ascii +%% strings or integers bucket_list(SnapFun, Tag, FoldBucketsFun, InitAcc) -> bucket_list(SnapFun, Tag, FoldBucketsFun, InitAcc, -1). --spec bucket_list(snap_fun(), - leveled_codec:tag(), - fold_buckets_fun(), foldacc(), - integer()) -> {async, runner_fun()}. +-spec bucket_list( + snap_fun(), + leveled_codec:tag(), + fold_buckets_fun(), + foldacc(), + integer() +) -> {async, runner_fun()}. %% @doc %% set Max Buckets to -1 to list all buckets, otherwise will only return %% MaxBuckets (use 1 to confirm that there exists any bucket for a given Tag) bucket_list(SnapFun, Tag, FoldBucketsFun, InitAcc, MaxBuckets) -> - Runner = + Runner = fun() -> {ok, LedgerSnapshot, _JournalSnapshot, AfterFun} = SnapFun(), - BucketAcc = + BucketAcc = get_nextbucket( - null, null, Tag, LedgerSnapshot, [], {0, MaxBuckets}), + null, null, Tag, LedgerSnapshot, [], {0, MaxBuckets} + ), FoldRunner = fun() -> lists:foldr( fun({B, _K}, Acc) -> FoldBucketsFun(B, Acc) end, InitAcc, - BucketAcc) - % Buckets in reverse alphabetical order so foldr + BucketAcc + ) + % Buckets in reverse alphabetical order so foldr end, % For this fold, the fold over the store is actually completed % before results are passed to the FoldBucketsFun to be @@ -130,22 +139,24 @@ bucket_list(SnapFun, Tag, FoldBucketsFun, InitAcc, MaxBuckets) -> -spec index_query( snap_fun(), - {leveled_codec:ledger_key(), leveled_codec:ledger_key(), - {boolean()|binary(), leveled_codec:term_expression()}}, - {fold_keys_fun(), foldacc()}) -> {async, runner_fun()}. + {leveled_codec:ledger_key(), leveled_codec:ledger_key(), { + boolean() | binary(), leveled_codec:term_expression() + }}, + {fold_keys_fun(), foldacc()} +) -> {async, runner_fun()}. %% @doc %% Secondary index query %% This has the special capability that it will expect a message to be thrown %% during the query - and handle this without crashing the penciller snapshot -%% This allows for this query to be used with a max_results check in the -%% applictaion - and to throw a stop message to be caught by the worker +%% This allows for this query to be used with a max_results check in the +%% applictaion - and to throw a stop message to be caught by the worker %% handling the runner. This behaviour will not prevent the snapshot from %% closing neatly, allowing delete_pending files to be cleared without waiting %% for a timeout index_query(SnapFun, {StartKey, EndKey, TermHandling}, FoldAccT) -> {FoldKeysFun, InitAcc} = FoldAccT, - - Runner = + + Runner = fun() -> {ok, LedgerSnapshot, _JournalSnapshot, AfterFun} = SnapFun(), Folder = @@ -155,24 +166,31 @@ index_query(SnapFun, {StartKey, EndKey, TermHandling}, FoldAccT) -> EndKey, leveled_codec:accumulate_index(TermHandling, FoldKeysFun), InitAcc, - by_runner), + by_runner + ), wrap_runner(Folder, AfterFun) end, {async, Runner}. --spec bucketkey_query(snap_fun(), - leveled_codec:tag(), - leveled_codec:key()|null, - {leveled_codec:single_key()|null, leveled_codec:single_key()|null}, - {fold_keys_fun(), foldacc()}, - leveled_codec:term_expression()) - -> {async, runner_fun()}. +-spec bucketkey_query( + snap_fun(), + leveled_codec:tag(), + leveled_codec:key() | null, + {leveled_codec:single_key() | null, leveled_codec:single_key() | null}, + {fold_keys_fun(), foldacc()}, + leveled_codec:term_expression() +) -> + {async, runner_fun()}. %% @doc %% Fold over all keys in `KeyRange' under tag (restricted to a given bucket) -bucketkey_query(SnapFun, Tag, Bucket, - {StartKey, EndKey}, - {FoldKeysFun, InitAcc}, - TermRegex) -> +bucketkey_query( + SnapFun, + Tag, + Bucket, + {StartKey, EndKey}, + {FoldKeysFun, InitAcc}, + TermRegex +) -> SK = leveled_codec:to_querykey(Bucket, StartKey, Tag), EK = leveled_codec:to_querykey(Bucket, EndKey, Tag), AccFun = accumulate_keys(FoldKeysFun, TermRegex), @@ -181,47 +199,56 @@ bucketkey_query(SnapFun, Tag, Bucket, {ok, LedgerSnapshot, _JournalSnapshot, AfterFun} = SnapFun(), Folder = leveled_penciller:pcl_fetchkeys( - LedgerSnapshot, SK, EK, AccFun, InitAcc, by_runner), + LedgerSnapshot, SK, EK, AccFun, InitAcc, by_runner + ), wrap_runner(Folder, AfterFun) end, {async, Runner}. --spec bucketkey_query(snap_fun(), - leveled_codec:tag(), - leveled_codec:key()|null, - {fold_keys_fun(), foldacc()}) -> {async, runner_fun()}. +-spec bucketkey_query( + snap_fun(), + leveled_codec:tag(), + leveled_codec:key() | null, + {fold_keys_fun(), foldacc()} +) -> {async, runner_fun()}. %% @doc %% Fold over all keys under tag (potentially restricted to a given bucket) bucketkey_query(SnapFun, Tag, Bucket, FunAcc) -> bucketkey_query(SnapFun, Tag, Bucket, {null, null}, FunAcc, undefined). --spec hashlist_query(snap_fun(), - leveled_codec:tag(), - boolean()) -> {async, runner_fun()}. +-spec hashlist_query( + snap_fun(), + leveled_codec:tag(), + boolean() +) -> {async, runner_fun()}. %% @doc %% Fold over the keys under a given Tag accumulating the hashes hashlist_query(SnapFun, Tag, JournalCheck) -> StartKey = leveled_codec:to_querykey(null, null, Tag), EndKey = leveled_codec:to_querykey(null, null, Tag), - Runner = + Runner = fun() -> {ok, LedgerSnapshot, JournalSnapshot, AfterFun} = SnapFun(), - AccFun = accumulate_hashes(JournalCheck, JournalSnapshot), + AccFun = accumulate_hashes(JournalCheck, JournalSnapshot), Acc = leveled_penciller:pcl_fetchkeys( - LedgerSnapshot, StartKey, EndKey, AccFun, []), + LedgerSnapshot, StartKey, EndKey, AccFun, [] + ), AfterFun(), Acc end, {async, Runner}. -spec tictactree( - snap_fun(), + snap_fun(), {leveled_codec:tag(), leveled_codec:key(), tuple()}, - boolean(), leveled_tictac:tree_size(), fold_filter_fun()) - -> {async, runner_fun()}. + boolean(), + leveled_tictac:tree_size(), + fold_filter_fun() +) -> + {async, runner_fun()}. %% @doc -%% Return a merkle tree from the fold, directly accessing hashes cached in the +%% Return a merkle tree from the fold, directly accessing hashes cached in the %% metadata tictactree(SnapFun, {Tag, Bucket, Query}, JournalCheck, TreeSize, Filter) -> % Journal check can be used for object key folds to confirm that the @@ -232,37 +259,42 @@ tictactree(SnapFun, {Tag, Bucket, Query}, JournalCheck, TreeSize, Filter) -> {ok, LedgerSnap, JournalSnap, AfterFun} = SnapFun(), % The start key and end key will vary depending on whether the % fold is to fold over an index or a key range - EnsureKeyBinaryFun = - fun(K, T) -> - case is_binary(K) of + EnsureKeyBinaryFun = + fun(K, T) -> + case is_binary(K) of true -> {K, T}; false -> {leveled_util:t2b(K), T} - end + end end, {StartKey, EndKey, ExtractFun} = case Tag of ?IDX_TAG -> {IdxFld, StartIdx, EndIdx} = Query, KeyDefFun = fun leveled_codec:to_querykey/5, - {KeyDefFun(Bucket, null, ?IDX_TAG, IdxFld, StartIdx), + { + KeyDefFun(Bucket, null, ?IDX_TAG, IdxFld, StartIdx), KeyDefFun(Bucket, null, ?IDX_TAG, IdxFld, EndIdx), - EnsureKeyBinaryFun}; + EnsureKeyBinaryFun + }; _ -> {StartOKey, EndOKey} = Query, - {leveled_codec:to_querykey(Bucket, StartOKey, Tag), + { + leveled_codec:to_querykey(Bucket, StartOKey, Tag), leveled_codec:to_querykey(Bucket, EndOKey, Tag), - fun(K, H) -> + fun(K, H) -> V = {is_hash, H}, EnsureKeyBinaryFun(K, V) - end} + end + } end, - AccFun = + AccFun = accumulate_tree(Filter, JournalCheck, JournalSnap, ExtractFun), - Acc = + Acc = leveled_penciller:pcl_fetchkeys( - LedgerSnap, StartKey, EndKey, AccFun, Tree), + LedgerSnap, StartKey, EndKey, AccFun, Tree + ), AfterFun(), Acc end, @@ -270,44 +302,58 @@ tictactree(SnapFun, {Tag, Bucket, Query}, JournalCheck, TreeSize, Filter) -> -spec foldheads_allkeys( snap_fun(), - leveled_codec:tag(), - fold_objects_fun()|{fold_objects_fun(), foldacc()}, - boolean()|defer, - false|list(integer()), - false|leveled_codec:lastmod_range(), - false|pos_integer()) -> {async, runner_fun()}. + leveled_codec:tag(), + fold_objects_fun() | {fold_objects_fun(), foldacc()}, + boolean() | defer, + false | list(integer()), + false | leveled_codec:lastmod_range(), + false | pos_integer() +) -> {async, runner_fun()}. %% @doc -%% Fold over all heads in the store for a given tag - applying the passed +%% Fold over all heads in the store for a given tag - applying the passed %% function to each proxy object -foldheads_allkeys(SnapFun, Tag, FoldFun, JournalCheck, - SegmentList, LastModRange, MaxObjectCount) -> +foldheads_allkeys( + SnapFun, + Tag, + FoldFun, + JournalCheck, + SegmentList, + LastModRange, + MaxObjectCount +) -> StartKey = leveled_codec:to_querykey(null, null, Tag), EndKey = leveled_codec:to_querykey(null, null, Tag), - foldobjects(SnapFun, - Tag, - [{StartKey, EndKey}], - FoldFun, - {true, JournalCheck}, - SegmentList, - LastModRange, - MaxObjectCount). - --spec foldobjects_allkeys(snap_fun(), - leveled_codec:tag(), - fold_objects_fun()|{fold_objects_fun(), foldacc()}, - key_order|sqn_order) - -> {async, runner_fun()}. + foldobjects( + SnapFun, + Tag, + [{StartKey, EndKey}], + FoldFun, + {true, JournalCheck}, + SegmentList, + LastModRange, + MaxObjectCount + ). + +-spec foldobjects_allkeys( + snap_fun(), + leveled_codec:tag(), + fold_objects_fun() | {fold_objects_fun(), foldacc()}, + key_order | sqn_order +) -> + {async, runner_fun()}. %% @doc %% Fold over all objects for a given tag foldobjects_allkeys(SnapFun, Tag, FoldFun, key_order) -> StartKey = leveled_codec:to_querykey(null, null, Tag), EndKey = leveled_codec:to_querykey(null, null, Tag), - foldobjects(SnapFun, - Tag, - [{StartKey, EndKey}], - FoldFun, - false, - false); + foldobjects( + SnapFun, + Tag, + [{StartKey, EndKey}], + FoldFun, + false, + false + ); foldobjects_allkeys(SnapFun, Tag, FoldObjectsFun, sqn_order) -> % Fold over the journal in order of receipt {FoldFun, InitAcc} = @@ -320,14 +366,14 @@ foldobjects_allkeys(SnapFun, Tag, FoldObjectsFun, sqn_order) -> % no initial accumulator passed, and so should be just a list {FoldObjectsFun, []} end, - + FilterFun = fun(JKey, JVal, _Pos, Acc, ExtractFun) -> {SQN, InkTag, LedgerKey} = JKey, case {InkTag, leveled_codec:from_ledgerkey(Tag, LedgerKey)} of {?INKT_STND, {B, K}} -> % Ignore tombstones and non-matching Tags and Key changes - % objects. + % objects. {MinSQN, MaxSQN, BatchAcc} = Acc, case SQN of SQN when SQN < MinSQN -> @@ -342,120 +388,137 @@ foldobjects_allkeys(SnapFun, Tag, FoldObjectsFun, sqn_order) -> true ), { - case SQN of MaxSQN -> stop; _ -> loop end, - {MinSQN, MaxSQN, [{B, K, SQN, Obj}|BatchAcc]} + case SQN of + MaxSQN -> stop; + _ -> loop + end, + {MinSQN, MaxSQN, [{B, K, SQN, Obj} | BatchAcc]} } end; _ -> {loop, Acc} - end + end end, InitAccFun = fun(_FN, _SQN) -> [] end, Folder = fun() -> - {ok, LedgerSnapshot, JournalSnapshot, AfterFun} = + {ok, LedgerSnapshot, JournalSnapshot, AfterFun} = case SnapFun() of {ok, LS, JS, AF} when is_pid(JS) -> {ok, LS, JS, AF} end, {ok, JournalSQN} = leveled_inker:ink_getjournalsqn(JournalSnapshot), - IsValidFun = + IsValidFun = fun(Bucket, Key, SQN) -> LedgerKey = leveled_codec:to_objectkey(Bucket, Key, Tag), CheckSQN = leveled_penciller:pcl_checksequencenumber( - LedgerSnapshot, LedgerKey, SQN), + LedgerSnapshot, LedgerKey, SQN + ), % Need to check that we have not folded past the point % at which the snapshot was taken - (JournalSQN >= SQN) and (CheckSQN == current) + JournalSQN >= SQN andalso CheckSQN == current end, - BatchFoldFun = + BatchFoldFun = fun(BatchAcc, ObjAcc) -> - ObjFun = - fun({B, K, SQN, Obj}, Acc) -> + ObjFun = + fun({B, K, SQN, Obj}, Acc) -> case IsValidFun(B, K, SQN) of true -> FoldFun(B, K, Obj, Acc); false -> Acc - end + end end, leveled_log:log(r0001, [length(BatchAcc)]), lists:foldr(ObjFun, ObjAcc, BatchAcc) end, - - InkFolder = + + InkFolder = leveled_inker:ink_fold( - JournalSnapshot, + JournalSnapshot, 0, {FilterFun, InitAccFun, BatchFoldFun}, - InitAcc), - wrap_runner(InkFolder, AfterFun) + InitAcc + ), + wrap_runner(InkFolder, AfterFun) end, {async, Folder}. - --spec foldobjects_bybucket(snap_fun(), - leveled_codec:tag(), - list(key_range()), - fold_objects_fun()|{fold_objects_fun(), foldacc()}) - -> {async, runner_fun()}. +-spec foldobjects_bybucket( + snap_fun(), + leveled_codec:tag(), + list(key_range()), + fold_objects_fun() | {fold_objects_fun(), foldacc()} +) -> + {async, runner_fun()}. %% @doc %% Fold over all objects within a given key range in a bucket foldobjects_bybucket(SnapFun, Tag, KeyRanges, FoldFun) -> foldobjects( - SnapFun, Tag, KeyRanges, FoldFun, false, false). - --spec foldheads_bybucket(snap_fun(), - leveled_codec:tag(), - list(key_range()), - fold_objects_fun()|{fold_objects_fun(), foldacc()}, - boolean()|defer, - false|list(integer()), - false|leveled_codec:lastmod_range(), - false|pos_integer()) - -> {async, runner_fun()}. + SnapFun, Tag, KeyRanges, FoldFun, false, false + ). + +-spec foldheads_bybucket( + snap_fun(), + leveled_codec:tag(), + list(key_range()), + fold_objects_fun() | {fold_objects_fun(), foldacc()}, + boolean() | defer, + false | list(integer()), + false | leveled_codec:lastmod_range(), + false | pos_integer() +) -> + {async, runner_fun()}. %% @doc %% Fold over all object metadata within a given key range in a bucket -foldheads_bybucket(SnapFun, - Tag, - KeyRanges, - FoldFun, - JournalCheck, - SegmentList, LastModRange, MaxObjectCount) -> - foldobjects(SnapFun, - Tag, - KeyRanges, - FoldFun, - {true, JournalCheck}, - SegmentList, - LastModRange, - MaxObjectCount). - --spec foldobjects_byindex(snap_fun(), - tuple(), - fold_objects_fun()|{fold_objects_fun(), foldacc()}) - -> {async, runner_fun()}. +foldheads_bybucket( + SnapFun, + Tag, + KeyRanges, + FoldFun, + JournalCheck, + SegmentList, + LastModRange, + MaxObjectCount +) -> + foldobjects( + SnapFun, + Tag, + KeyRanges, + FoldFun, + {true, JournalCheck}, + SegmentList, + LastModRange, + MaxObjectCount + ). + +-spec foldobjects_byindex( + snap_fun(), + tuple(), + fold_objects_fun() | {fold_objects_fun(), foldacc()} +) -> + {async, runner_fun()}. %% @doc -%% Folds over an index, fetching the objects associated with the keys returned +%% Folds over an index, fetching the objects associated with the keys returned %% and passing those objects into the fold function foldobjects_byindex(SnapFun, {Tag, Bucket, Field, FromTerm, ToTerm}, FoldFun) -> StartKey = leveled_codec:to_querykey(Bucket, null, ?IDX_TAG, Field, FromTerm), EndKey = leveled_codec:to_querykey(Bucket, null, ?IDX_TAG, Field, ToTerm), - foldobjects(SnapFun, - Tag, - [{StartKey, EndKey}], - FoldFun, - false, - false). - - + foldobjects( + SnapFun, + Tag, + [{StartKey, EndKey}], + FoldFun, + false, + false + ). %%%============================================================================ %%% Internal functions @@ -472,41 +535,55 @@ get_nextbucket(NextBucket, NextKey, Tag, LedgerSnapshot, BKList, {C, L}) -> end, R = leveled_penciller:pcl_fetchnextkey( - LedgerSnapshot, StartKey, EndKey, ExtractFun, null), + LedgerSnapshot, StartKey, EndKey, ExtractFun, null + ), case R of {1, null} -> - leveled_log:log(b0008,[]), + leveled_log:log(b0008, []), BKList; {0, {{B, K}, _V}} when is_binary(B); is_tuple(B) -> - leveled_log:log(b0009,[B]), + leveled_log:log(b0009, [B]), get_nextbucket( leveled_codec:next_key(B), null, Tag, LedgerSnapshot, - [{B, K}|BKList], + [{B, K} | BKList], {C + 1, L} ) end. - -spec foldobjects( snap_fun(), atom(), - list(), - fold_objects_fun()|{fold_objects_fun(), foldacc()}, - false|{true, boolean()}, false|list(integer())) - -> {async, runner_fun()}. + list(), + fold_objects_fun() | {fold_objects_fun(), foldacc()}, + false | {true, boolean()}, + false | list(integer()) +) -> + {async, runner_fun()}. foldobjects(SnapFun, Tag, KeyRanges, FoldObjFun, DeferredFetch, SegmentList) -> - foldobjects(SnapFun, Tag, KeyRanges, - FoldObjFun, DeferredFetch, SegmentList, false, false). - --spec foldobjects(snap_fun(), atom(), list(), - fold_objects_fun()|{fold_objects_fun(), foldacc()}, - false|{true, boolean()|defer}, - false|list(integer()), - false|leveled_codec:lastmod_range(), - false|pos_integer()) -> {async, runner_fun()}. + foldobjects( + SnapFun, + Tag, + KeyRanges, + FoldObjFun, + DeferredFetch, + SegmentList, + false, + false + ). + +-spec foldobjects( + snap_fun(), + atom(), + list(), + fold_objects_fun() | {fold_objects_fun(), foldacc()}, + false | {true, boolean() | defer}, + false | list(integer()), + false | leveled_codec:lastmod_range(), + false | pos_integer() +) -> {async, runner_fun()}. %% @doc %% The object folder should be passed DeferredFetch. %% DeferredFetch can either be false (which will return to the fold function @@ -514,8 +591,16 @@ foldobjects(SnapFun, Tag, KeyRanges, FoldObjFun, DeferredFetch, SegmentList) -> %% will be created that if understood by the fold function will allow the fold %% function to work on the head of the object, and defer fetching the body in %% case such a fetch is unnecessary. -foldobjects(SnapFun, Tag, KeyRanges, FoldObjFun, DeferredFetch, - SegmentList, LastModRange, MaxObjectCount) -> +foldobjects( + SnapFun, + Tag, + KeyRanges, + FoldObjFun, + DeferredFetch, + SegmentList, + LastModRange, + MaxObjectCount +) -> {FoldFun, InitAcc} = case is_tuple(FoldObjFun) of true -> @@ -526,34 +611,35 @@ foldobjects(SnapFun, Tag, KeyRanges, FoldObjFun, DeferredFetch, % no initial accumulator passed, and so should be just a list {FoldObjFun, []} end, - {LimitByCount, InitAcc0} = + {LimitByCount, InitAcc0} = case MaxObjectCount of false -> {false, InitAcc}; MOC when is_integer(MOC) -> {true, {MOC, InitAcc}} end, - + Folder = fun() -> {ok, LedgerSnapshot, JournalSnapshot, AfterFun} = SnapFun(), AccFun = accumulate_objects( - FoldFun, JournalSnapshot, Tag, DeferredFetch), - FoldFunGen = + FoldFun, JournalSnapshot, Tag, DeferredFetch + ), + FoldFunGen = fun({StartKey, EndKey}, FoldAcc) -> leveled_penciller:pcl_fetchkeysbysegment( LedgerSnapshot, - StartKey, - EndKey, - AccFun, - FoldAcc, - SegmentList, - LastModRange, - LimitByCount - ) + StartKey, + EndKey, + AccFun, + FoldAcc, + SegmentList, + LastModRange, + LimitByCount + ) end, - ListFoldFun = + ListFoldFun = fun(KeyRange, Acc) -> Folder = FoldFunGen(KeyRange, Acc), Folder() @@ -564,7 +650,6 @@ foldobjects(SnapFun, Tag, KeyRanges, FoldObjFun, DeferredFetch, end, {async, Folder}. - accumulate_size() -> AccFun = fun(Key, Value, {Size, Count}) -> @@ -575,7 +660,7 @@ accumulate_size() -> accumulate_hashes(JournalCheck, InkerClone) -> AddKeyFun = fun(B, K, H, Acc) -> - [{B, K, H}|Acc] + [{B, K, H} | Acc] end, get_hashaccumulator(JournalCheck, InkerClone, AddKeyFun). @@ -611,15 +696,20 @@ get_hashaccumulator(JournalCheck, InkerClone, AddKeyFun) -> AccFun. -spec accumulate_objects - (fold_objects_fun(), + ( + fold_objects_fun(), pid(), leveled_head:object_tag(), - false|{true, boolean()|defer}) - -> objectacc_fun(); - (fold_objects_fun(), - null, leveled_head:headonly_tag(), - {true, false}) - -> objectacc_fun(). + false | {true, boolean() | defer} + ) -> + objectacc_fun(); + ( + fold_objects_fun(), + null, + leveled_head:headonly_tag(), + {true, false} + ) -> + objectacc_fun(). accumulate_objects(FoldObjectsFun, InkerClone, Tag, DeferredFetch) -> AccFun = fun(LK, V, Acc) -> @@ -646,13 +736,14 @@ accumulate_objects(FoldObjectsFun, InkerClone, Tag, DeferredFetch) -> {true, false} when Tag == ?HEAD_TAG -> FoldObjectsFun(B, K, MD, Acc); {true, JournalCheck} when is_tuple(MD) -> - ProxyObj = + ProxyObj = leveled_codec:return_proxy(Tag, MD, InkerClone, JK), case {JournalCheck, InkerClone} of {true, InkerClone} when is_pid(InkerClone) -> InJournal = leveled_inker:ink_keycheck( - InkerClone, LK, SQN), + InkerClone, LK, SQN + ), case InJournal of probably -> FoldObjectsFun(B, K, ProxyObj, Acc); @@ -674,7 +765,6 @@ accumulate_objects(FoldObjectsFun, InkerClone, Tag, DeferredFetch) -> end, AccFun. - check_presence(Key, Value, InkerClone) -> {LedgerKey, SQN} = leveled_codec:strip_to_keyseqonly({Key, Value}), case leveled_inker:ink_keycheck(InkerClone, LedgerKey, SQN) of @@ -708,13 +798,15 @@ accumulate_keys(FoldKeysFun, TermRegex) -> %% It is assumed this is only used at present by index queries and key folds, %% but the wrap could be applied more generally with further work wrap_runner(FoldAction, AfterAction) -> - try FoldAction() - catch throw:Throw -> - throw(Throw) - after AfterAction() + try + FoldAction() + catch + throw:Throw -> + throw(Throw) + after + AfterAction() end. - %%%============================================================================ %%% Test %%%============================================================================ @@ -742,7 +834,7 @@ throw_test() -> fun() -> error end, - ?assertMatch({ok, ['1']}, wrap_runner(CompletedFolder, AfterAction)), + ?assertMatch({ok, ['1']}, wrap_runner(CompletedFolder, AfterAction)), ?assertException( throw, stop_fold, wrap_runner(StoppedFolder, AfterAction) ). @@ -750,6 +842,3 @@ throw_test() -> -endif. -endif. - - - diff --git a/src/leveled_setop.erl b/src/leveled_setop.erl index b4d70212..141182ce 100644 --- a/src/leveled_setop.erl +++ b/src/leveled_setop.erl @@ -1,23 +1,21 @@ %% -------- Set Operations --------- %% -%% Support for set operations (i.e on sets of keys) within leveled +%% Support for set operations (i.e on sets of keys) within leveled %% -module(leveled_setop). -export([generate_setop_function/1]). - %%%============================================================================ %%% External API %%%============================================================================ -spec generate_setop_function( - string()) -> - fun((#{non_neg_integer() => sets:set(binary())}) - -> sets:set(binary()) - )| - {error, term()}. + string() +) -> + fun((#{non_neg_integer() => sets:set(binary())}) -> sets:set(binary())) + | {error, term()}. generate_setop_function(EvalString) -> try {ok, ParsedEval} = generate_setop_expression(EvalString), @@ -45,18 +43,21 @@ apply_setop({setop, SetOp}, SetList) -> apply_setop({set_id, _, SetID}, SetList) -> get_set(SetID, SetList); apply_setop( - {SetFunctionName, {set_id, _, SetIDa}, {set_id, _, SetIDb}}, - SetList) -> + {SetFunctionName, {set_id, _, SetIDa}, {set_id, _, SetIDb}}, + SetList +) -> SetFunction = set_function(SetFunctionName), SetFunction(get_set(SetIDa, SetList), get_set(SetIDb, SetList)); apply_setop( - {SetFunctionName, {set_id, _, SetIDa}, Condition}, - SetList) -> + {SetFunctionName, {set_id, _, SetIDa}, Condition}, + SetList +) -> SetFunction = set_function(SetFunctionName), SetFunction(get_set(SetIDa, SetList), apply_setop(Condition, SetList)); apply_setop( - {SetFunctionName, Condition, {set_id, _, SetIDb}}, - SetList) -> + {SetFunctionName, Condition, {set_id, _, SetIDb}}, + SetList +) -> SetFunction = set_function(SetFunctionName), SetFunction(apply_setop(Condition, SetList), get_set(SetIDb, SetList)); apply_setop({SetFunctionName, ConditionA, ConditionB}, SetList) -> @@ -77,7 +78,6 @@ set_function('SUBTRACT') -> get_set(SetID, SetMap) -> maps:get(SetID, SetMap, sets:new()). - %%%============================================================================ %%% Test %%%============================================================================ @@ -102,7 +102,6 @@ parse_error_test() -> ?assertMatch({error, _E3}, generate_setop_function(Q3)), ?assertMatch({error, _E4}, generate_setop_function(Q4)). - parser_formal_test() -> Q1 = "($1 INTERSECT $2) UNION $3", Q2 = "($1 INTERSECT $2) UNION ($3 INTERSECT $4)", @@ -124,33 +123,33 @@ parser_tester(Q1, Q2, Q3, Q4) -> R1 = lists:sort( - sets:to_list(F1(#{1 => S1, 2 => S2, 3 => S3}) - ) - ), + sets:to_list(F1(#{1 => S1, 2 => S2, 3 => S3})) + ), R2 = lists:sort( - sets:to_list(F2(#{1 => S1, 2 => S2, 3 => S3, 4 => S4}) - ) - ), + sets:to_list(F2(#{1 => S1, 2 => S2, 3 => S3, 4 => S4})) + ), R3 = lists:sort( - sets:to_list(F3(#{1 => S1, 2 => S2, 3 => S3, 4 => S4, 5 => S5}) - ) - ), - R4 = + sets:to_list(F3(#{1 => S1, 2 => S2, 3 => S3, 4 => S4, 5 => S5})) + ), + R4 = lists:sort( - sets:to_list(F4(#{1 => S1, 2 => S2, 3 => S3, 4 => S4, 5 => S5}) - ) - ), + sets:to_list(F4(#{1 => S1, 2 => S2, 3 => S3, 4 => S4, 5 => S5})) + ), ?assertMatch( - [<<"K3">>, <<"K4">>, <<"K5">>, <<"K7">>, <<"K8">>, <<"K9">>], R1), + [<<"K3">>, <<"K4">>, <<"K5">>, <<"K7">>, <<"K8">>, <<"K9">>], R1 + ), ?assertMatch( - [<<"K3">>, <<"K4">>, <<"K5">>, <<"K7">>, <<"K9">>], R2), + [<<"K3">>, <<"K4">>, <<"K5">>, <<"K7">>, <<"K9">>], R2 + ), ?assertMatch( - [<<"K3">>, <<"K7">>, <<"K9">>], R3), + [<<"K3">>, <<"K7">>, <<"K9">>], R3 + ), ?assertMatch( - [<<"K3">>, <<"K8">>], R4). + [<<"K3">>, <<"K8">>], R4 + ). minimal_test() -> S1 = sets:from_list([<<"K1">>, <<"K2">>, <<"K3">>, <<"K4">>, <<"K5">>]), @@ -160,11 +159,10 @@ minimal_test() -> S2 = sets:from_list([<<"K3">>, <<"K4">>, <<"K5">>, <<"K6">>, <<"K7">>]), S3 = sets:from_list([<<"K1">>, <<"K2">>]), F2 = generate_setop_function_noerror("$1 INTERSECT ($2 UNION $3)"), - R2 = lists:sort(sets:to_list(F2(#{1 => S1, 2 => S2, 3 => S3}))), + R2 = lists:sort(sets:to_list(F2(#{1 => S1, 2 => S2, 3 => S3}))), ?assertMatch([<<"K1">>, <<"K2">>, <<"K3">>, <<"K4">>, <<"K5">>], R2), F3 = generate_setop_function_noerror("$1 INTERSECT ($2 UNION $2)"), - R3 = lists:sort(sets:to_list(F3(#{1 => S1, 2 => S2}))), + R3 = lists:sort(sets:to_list(F3(#{1 => S1, 2 => S2}))), ?assertMatch([<<"K3">>, <<"K4">>, <<"K5">>], R3). - --endif. \ No newline at end of file +-endif. diff --git a/src/leveled_sst.erl b/src/leveled_sst.erl index e035cc7a..93663572 100644 --- a/src/leveled_sst.erl +++ b/src/leveled_sst.erl @@ -66,15 +66,18 @@ % Test functions to ignore for equalizer -eqwalizer({nowarn_function, fetch_status_test/0}). --define(LOOK_SLOTSIZE, 128). % Maximum of 128 --define(LOOK_BLOCKSIZE, {24, 32}). % 4x + y = ?LOOK_SLOTSIZE +% Maximum of 128 +-define(LOOK_SLOTSIZE, 128). +% 4x + y = ?LOOK_SLOTSIZE +-define(LOOK_BLOCKSIZE, {24, 32}). -define(NOLOOK_SLOTSIZE, 256). --define(NOLOOK_BLOCKSIZE, {56, 32}). % 4x + y = ?NOLOOK_SLOTSIZE +% 4x + y = ?NOLOOK_SLOTSIZE +-define(NOLOOK_BLOCKSIZE, {56, 32}). -define(COMPRESSION_FACTOR, 1). - % When using native compression - how hard should the compression code - % try to reduce the size of the compressed output. 1 Is to imply minimal - % effort, 6 is default in OTP: - % https://www.erlang.org/doc/man/erlang.html#term_to_binary-2 +% When using native compression - how hard should the compression code +% try to reduce the size of the compressed output. 1 Is to imply minimal +% effort, 6 is default in OTP: +% https://www.erlang.org/doc/man/erlang.html#term_to_binary-2 -define(BINARY_SETTINGS, [{compressed, ?COMPRESSION_FACTOR}]). -define(MERGE_SCANWIDTH, 16). -define(DISCARD_EXT, ".discarded"). @@ -100,33 +103,39 @@ -define(START_OPTS, [{hibernate_after, ?HIBERNATE_TIMEOUT}]). --export([init/1, - callback_mode/0, - terminate/3, - code_change/4, - format_status/1]). +-export([ + init/1, + callback_mode/0, + terminate/3, + code_change/4, + format_status/1 +]). %% states --export([starting/3, - reader/3, - delete_pending/3]). - --export([sst_new/6, - sst_newmerge/8, - sst_newlevelzero/7, - sst_open/4, - sst_get/2, - sst_get/3, - sst_getsqn/3, - sst_expandpointer/5, - sst_getmaxsequencenumber/1, - sst_setfordelete/2, - sst_clear/1, - sst_checkready/1, - sst_switchlevels/2, - sst_deleteconfirmed/1, - sst_gettombcount/1, - sst_close/1]). +-export([ + starting/3, + reader/3, + delete_pending/3 +]). + +-export([ + sst_new/6, + sst_newmerge/8, + sst_newlevelzero/7, + sst_open/4, + sst_get/2, + sst_get/3, + sst_getsqn/3, + sst_expandpointer/5, + sst_getmaxsequencenumber/1, + sst_setfordelete/2, + sst_clear/1, + sst_checkready/1, + sst_switchlevels/2, + sst_deleteconfirmed/1, + sst_gettombcount/1, + sst_close/1 +]). -export([sst_newmerge/9]). @@ -136,123 +145,122 @@ -export([hmac/1, filterby_midblock/2]). --record(slot_index_value, - {slot_id :: integer(), - start_position :: integer(), - length :: integer()}). - --record(summary, - {first_key :: tuple(), - last_key :: tuple(), - index :: tuple() | undefined, - size :: integer(), - max_sqn :: integer()}). - %% DO NOT CHANGE - %% The summary record is persisted as part of the file format - %% Any change to this record will mean the change cannot be rolled back - --type slot_index_value() - :: #slot_index_value{}. --type press_method() - :: lz4|native|zstd|none. --type block_version() - :: 0|1. --type block_method() - :: {block_version(), press_method()}. --type range_endpoint() - :: all|leveled_codec:ledger_key(). --type slot_pointer() - :: {pointer, - pid(), slot_index_value(), range_endpoint(), range_endpoint()}. --type sst_pointer() +-record(slot_index_value, { + slot_id :: integer(), + start_position :: integer(), + length :: integer() +}). + +-record(summary, { + first_key :: tuple(), + last_key :: tuple(), + index :: tuple() | undefined, + size :: integer(), + max_sqn :: integer() +}). +%% DO NOT CHANGE +%% The summary record is persisted as part of the file format +%% Any change to this record will mean the change cannot be rolled back + +-type slot_index_value() :: + #slot_index_value{}. +-type press_method() :: + lz4 | native | zstd | none. +-type block_version() :: + 0 | 1. +-type block_method() :: + {block_version(), press_method()}. +-type range_endpoint() :: + all | leveled_codec:ledger_key(). +-type slot_pointer() :: + {pointer, pid(), slot_index_value(), range_endpoint(), range_endpoint()}. +-type sst_pointer() :: % Used in sst_new - :: {next, - leveled_pmanifest:manifest_entry(), + {next, leveled_pmanifest:manifest_entry(), range_endpoint()}. +-type sst_closed_pointer() :: + % used in expand_list_by_pointer + % (close point is added by maybe_expand_pointer + {next, leveled_pmanifest:manifest_entry(), range_endpoint(), range_endpoint()}. --type sst_closed_pointer() - % used in expand_list_by_pointer - % (close point is added by maybe_expand_pointer - :: {next, - leveled_pmanifest:manifest_entry(), - range_endpoint(), - range_endpoint()}. --type expandable_pointer() - :: slot_pointer()|sst_pointer()|sst_closed_pointer(). --type maybe_expanded_pointer() - :: leveled_codec:ledger_kv()|expandable_pointer(). +-type expandable_pointer() :: + slot_pointer() | sst_pointer() | sst_closed_pointer(). +-type maybe_expanded_pointer() :: + leveled_codec:ledger_kv() | expandable_pointer(). -type expanded_slot() :: {binary(), non_neg_integer(), range_endpoint(), range_endpoint()}. --type tuned_seglist() - :: false | list(non_neg_integer()). --type sst_options() - :: #sst_options{}. --type binary_slot() - :: { +-type tuned_seglist() :: + false | list(non_neg_integer()). +-type sst_options() :: + #sst_options{}. +-type binary_slot() :: + { binary(), binary(), list(leveled_codec:segment_hash()), leveled_codec:ledger_key() }. --type sst_summary() - :: #summary{}. --type blockindex_cache() - :: {non_neg_integer(), array:array(), non_neg_integer()}. --type fetch_cache() - :: array:array()|no_cache. --type cache_size() - :: no_cache|4|32|64. --type cache_hash() - :: no_cache|non_neg_integer(). --type summary_filter() - :: fun((leveled_codec:ledger_key()) -> any()). --type segment_check_fun() - :: non_neg_integer() - | {non_neg_integer(), non_neg_integer(), - fun((non_neg_integer()) -> boolean())} +-type sst_summary() :: + #summary{}. +-type blockindex_cache() :: + {non_neg_integer(), array:array(), non_neg_integer()}. +-type fetch_cache() :: + array:array() | no_cache. +-type cache_size() :: + no_cache | 4 | 32 | 64. +-type cache_hash() :: + no_cache | non_neg_integer(). +-type summary_filter() :: + fun((leveled_codec:ledger_key()) -> any()). +%% erlfmt:ignore - issues with editors when function definitions are split +-type segment_check_fun() :: + non_neg_integer() + | + { + non_neg_integer(), + non_neg_integer(), + fun((non_neg_integer()) -> boolean()) + } | false. --type fetch_levelzero_fun() - :: fun((pos_integer(), leveled_penciller:levelzero_returnfun()) -> ok). --type extract_hash() :: non_neg_integer()|no_lookup. - --record(read_state, - { - handle :: file:io_device(), - blockindex_cache :: blockindex_cache()|redacted, - fetch_cache :: fetch_cache()|redacted, - level :: leveled_pmanifest:lsm_level(), - filter_fun :: summary_filter() - } -). +-type fetch_levelzero_fun() :: + fun((pos_integer(), leveled_penciller:levelzero_returnfun()) -> ok). +-type extract_hash() :: non_neg_integer() | no_lookup. + +-record(read_state, { + handle :: file:io_device(), + blockindex_cache :: blockindex_cache() | redacted, + fetch_cache :: fetch_cache() | redacted, + level :: leveled_pmanifest:lsm_level(), + filter_fun :: summary_filter() +}). -type read_state() :: #read_state{}. --record(state, - { - summary, - penciller :: pid() | undefined | false, - root_path, - filename, - read_state :: read_state() | undefined, - block_method = {0, native} :: block_method(), - index_moddate = ?INDEX_MODDATE :: boolean(), - starting_pid :: pid()|undefined, - new_slots :: list()|undefined, - deferred_startup_tuple :: tuple()|undefined, - tomb_count = not_counted - :: non_neg_integer()|not_counted, - high_modified_date :: non_neg_integer()|undefined, - monitor = {no_monitor, 0} :: leveled_monitor:monitor() - } -). - --record(build_timings, - {slot_hashlist = 0 :: integer(), - slot_serialise = 0 :: integer(), - slot_finish = 0 :: integer(), - fold_toslot = 0 :: integer(), - last_timestamp = os:timestamp() :: erlang:timestamp()}). - --type build_timings() :: no_timing|#build_timings{}. +-record(state, { + summary, + penciller :: pid() | undefined | false, + root_path, + filename, + read_state :: read_state() | undefined, + block_method = {0, native} :: block_method(), + index_moddate = ?INDEX_MODDATE :: boolean(), + starting_pid :: pid() | undefined, + new_slots :: list() | undefined, + deferred_startup_tuple :: tuple() | undefined, + tomb_count = not_counted :: + non_neg_integer() | not_counted, + high_modified_date :: non_neg_integer() | undefined, + monitor = {no_monitor, 0} :: leveled_monitor:monitor() +}). + +-record(build_timings, { + slot_hashlist = 0 :: integer(), + slot_serialise = 0 :: integer(), + slot_finish = 0 :: integer(), + fold_toslot = 0 :: integer(), + last_timestamp = os:timestamp() :: erlang:timestamp() +}). + +-type build_timings() :: no_timing | #build_timings{}. -export_type( [ @@ -274,11 +282,10 @@ %%%============================================================================ -spec sst_open( - string(), string(), sst_options(), leveled_pmanifest:lsm_level()) - -> - {ok, pid(), - {leveled_codec:object_key(), leveled_codec:object_key()}, - binary()}. + string(), string(), sst_options(), leveled_pmanifest:lsm_level() +) -> + {ok, pid(), {leveled_codec:object_key(), leveled_codec:object_key()}, + binary()}. %% @doc %% Open an SST file at a given path and filename. The first and last keys %% are returned in response to the request - so that those keys can be used @@ -289,58 +296,69 @@ %% The filename should include the file extension. sst_open(RootPath, Filename, OptsSST, Level) -> {ok, Pid} = gen_statem:start_link(?MODULE, [], ?START_OPTS), - case gen_statem:call( - Pid, {sst_open, RootPath, Filename, OptsSST, Level}, infinity) of + case + gen_statem:call( + Pid, {sst_open, RootPath, Filename, OptsSST, Level}, infinity + ) + of {ok, {SK, EK}, Bloom} -> {ok, Pid, {SK, EK}, Bloom} end. --spec sst_new(string(), string(), leveled_pmanifest:lsm_level(), - list(leveled_codec:ledger_kv()), - integer(), sst_options()) - -> {ok, pid(), - {leveled_codec:object_key(), - leveled_codec:object_key()}, - binary()}. +-spec sst_new( + string(), + string(), + leveled_pmanifest:lsm_level(), + list(leveled_codec:ledger_kv()), + integer(), + sst_options() +) -> + {ok, pid(), {leveled_codec:object_key(), leveled_codec:object_key()}, + binary()}. %% @doc %% Start a new SST file at the assigned level passing in a list of Key, Value %% pairs. This should not be used for basement levels or unexpanded Key/Value %% lists as merge_lists will not be called. sst_new(RootPath, Filename, Level, KVList, MaxSQN, OptsSST) -> sst_new( - RootPath, Filename, Level, KVList, MaxSQN, OptsSST, ?INDEX_MODDATE). + RootPath, Filename, Level, KVList, MaxSQN, OptsSST, ?INDEX_MODDATE + ). sst_new(RootPath, Filename, Level, KVList, MaxSQN, OptsSST, IndexModDate) -> {ok, Pid} = gen_statem:start_link(?MODULE, [], ?START_OPTS), OptsSST0 = update_options(OptsSST, Level), - {[], [], SlotList, FK, _CountOfTombs} = + {[], [], SlotList, FK, _CountOfTombs} = merge_lists(KVList, OptsSST0, IndexModDate), - case gen_statem:call(Pid, {sst_new, - RootPath, - Filename, - Level, - {SlotList, FK}, - MaxSQN, - OptsSST0, - IndexModDate, - not_counted, - self()}, - infinity) of + case + gen_statem:call( + Pid, + {sst_new, RootPath, Filename, Level, {SlotList, FK}, MaxSQN, + OptsSST0, IndexModDate, not_counted, self()}, + infinity + ) + of {ok, {SK, EK}, Bloom} -> {ok, Pid, {SK, EK}, Bloom} end. --spec sst_newmerge(string(), string(), - list(maybe_expanded_pointer()), - list(maybe_expanded_pointer()), - boolean(), leveled_pmanifest:lsm_level(), - integer(), sst_options()) - -> empty|{ok, pid(), - {{list(leveled_codec:ledger_kv()), - list(leveled_codec:ledger_kv())}, - leveled_codec:object_key(), - leveled_codec:object_key()}, - binary()}. +-spec sst_newmerge( + string(), + string(), + list(maybe_expanded_pointer()), + list(maybe_expanded_pointer()), + boolean(), + leveled_pmanifest:lsm_level(), + integer(), + sst_options() +) -> + empty + | {ok, pid(), + { + {list(leveled_codec:ledger_kv()), list(leveled_codec:ledger_kv())}, + leveled_codec:object_key(), + leveled_codec:object_key() + }, + binary()}. %% @doc %% Start a new SST file at the assigned level passing in a two lists of %% {Key, Value} pairs to be merged. The merge_lists function will use the @@ -352,18 +370,41 @@ sst_new(RootPath, Filename, Level, KVList, MaxSQN, OptsSST, IndexModDate) -> %% be that the merge_lists returns nothing (for example when a basement file is %% all tombstones) - and the atom empty is returned in this case so that the %% file is not added to the manifest. -sst_newmerge(RootPath, Filename, - KVL1, KVL2, IsBasement, Level, - MaxSQN, OptsSST) when Level > 0 -> - sst_newmerge(RootPath, Filename, - KVL1, KVL2, IsBasement, Level, - MaxSQN, OptsSST, ?INDEX_MODDATE). - -sst_newmerge(RootPath, Filename, - KVL1, KVL2, IsBasement, Level, - MaxSQN, OptsSST, IndexModDate) -> +sst_newmerge( + RootPath, + Filename, + KVL1, + KVL2, + IsBasement, + Level, + MaxSQN, + OptsSST +) when Level > 0 -> + sst_newmerge( + RootPath, + Filename, + KVL1, + KVL2, + IsBasement, + Level, + MaxSQN, + OptsSST, + ?INDEX_MODDATE + ). + +sst_newmerge( + RootPath, + Filename, + KVL1, + KVL2, + IsBasement, + Level, + MaxSQN, + OptsSST, + IndexModDate +) -> OptsSST0 = update_options(OptsSST, Level), - {Rem1, Rem2, SlotList, FK, CountOfTombs} = + {Rem1, Rem2, SlotList, FK, CountOfTombs} = merge_lists( KVL1, KVL2, @@ -376,48 +417,41 @@ sst_newmerge(RootPath, Filename, empty; _ -> {ok, Pid} = gen_statem:start_link(?MODULE, [], ?START_OPTS), - {ok, {SK, EK}, Bloom} = + {ok, {SK, EK}, Bloom} = gen_statem:call( Pid, - {sst_new, - RootPath, - Filename, - Level, - {SlotList, FK}, - MaxSQN, - OptsSST0, - IndexModDate, - CountOfTombs, - self() - }, - infinity), + {sst_new, RootPath, Filename, Level, {SlotList, FK}, MaxSQN, + OptsSST0, IndexModDate, CountOfTombs, self()}, + infinity + ), {ok, Pid, {{Rem1, Rem2}, SK, EK}, Bloom} end. -spec sst_newlevelzero( - string(), string(), + string(), + string(), integer(), - fetch_levelzero_fun()|list(), - pid()|undefined, + fetch_levelzero_fun() | list(), + pid() | undefined, integer(), - sst_options()) -> {ok, pid(), noreply}. + sst_options() +) -> {ok, pid(), noreply}. %% @doc %% Start a new file at level zero. At this level the file size is not fixed - %% it will be as big as the input. Also the KVList is not passed in, it is %% fetched slot by slot using the FetchFun sst_newlevelzero( - RootPath, Filename, Slots, Fetcher, Penciller, MaxSQN, OptsSST) -> + RootPath, Filename, Slots, Fetcher, Penciller, MaxSQN, OptsSST +) -> OptsSST0 = update_options(OptsSST, 0), {ok, Pid} = gen_statem:start_link(?MODULE, [], ?START_OPTS), %% Initiate the file into the "starting" state - ok = gen_statem:call(Pid, {sst_newlevelzero, - RootPath, - Filename, - Penciller, - MaxSQN, - OptsSST0, - ?INDEX_MODDATE}, - infinity), + ok = gen_statem:call( + Pid, + {sst_newlevelzero, RootPath, Filename, Penciller, MaxSQN, OptsSST0, + ?INDEX_MODDATE}, + infinity + ), ok = case Fetcher of SlotList when is_list(SlotList) -> @@ -427,10 +461,10 @@ sst_newlevelzero( end, {ok, Pid, noreply}. - -spec sst_get( - pid(), leveled_codec:object_key()) -> - leveled_codec:ledger_kv()|not_present. + pid(), leveled_codec:object_key() +) -> + leveled_codec:ledger_kv() | not_present. %% @doc %% Return a Key, Value pair matching a Key or not_present if the Key is not in %% the store. The segment_hash function is used to accelerate the seeking of @@ -439,8 +473,9 @@ sst_get(Pid, LedgerKey) -> sst_get(Pid, LedgerKey, leveled_codec:segment_hash(LedgerKey)). -spec sst_get( - pid(), leveled_codec:object_key(), leveled_codec:segment_hash()) - -> leveled_codec:ledger_kv()|not_present. + pid(), leveled_codec:object_key(), leveled_codec:segment_hash() +) -> + leveled_codec:ledger_kv() | not_present. %% @doc %% Return a Key, Value pair matching a Key or not_present if the Key is not in %% the store (with the magic hash precalculated). @@ -448,8 +483,9 @@ sst_get(Pid, LedgerKey, Hash) -> gen_statem:call(Pid, {get_kv, LedgerKey, Hash, undefined}, infinity). -spec sst_getsqn( - pid(), leveled_codec:object_key(), leveled_codec:segment_hash()) - -> leveled_codec:sqn()|not_present. + pid(), leveled_codec:object_key(), leveled_codec:segment_hash() +) -> + leveled_codec:sqn() | not_present. %% @doc %% Return a SQN for the key or not_present if the key is not in %% the store (with the magic hash precalculated). @@ -467,7 +503,8 @@ sst_getmaxsequencenumber(Pid) -> list(expandable_pointer()), pos_integer(), segment_check_fun(), - non_neg_integer()) -> list(maybe_expanded_pointer()). + non_neg_integer() +) -> list(maybe_expanded_pointer()). %% @doc %% Expand out a list of pointer to return a list of Keys and Values with a %% tail of pointers (once the ScanWidth has been satisfied). @@ -476,9 +513,10 @@ sst_getmaxsequencenumber(Pid) -> %% or sst_getfilteredrange depending on the nature of the pointer. sst_expandpointer(Pointer, MorePointers, ScanWidth, SegChecker, LowLastMod) -> expand_list_by_pointer( - Pointer, MorePointers, ScanWidth, SegChecker, LowLastMod). + Pointer, MorePointers, ScanWidth, SegChecker, LowLastMod + ). --spec sst_setfordelete(pid(), pid()|false) -> ok. +-spec sst_setfordelete(pid(), pid() | false) -> ok. %% @doc %% If the SST is no longer in use in the active ledger it can be set for %% delete. Once set for delete it will poll the Penciller pid to see if @@ -488,7 +526,7 @@ sst_expandpointer(Pointer, MorePointers, ScanWidth, SegChecker, LowLastMod) -> sst_setfordelete(Pid, Penciller) -> gen_statem:call(Pid, {set_for_delete, Penciller}, infinity). --spec sst_gettombcount(pid()) -> non_neg_integer()|not_counted. +-spec sst_gettombcount(pid()) -> non_neg_integer() | not_counted. %% @doc %% Get the count of tombstones in this SST file, returning not_counted if this %% file was created with a version which did not support tombstone counting, or @@ -511,7 +549,7 @@ sst_clear(Pid) -> sst_deleteconfirmed(Pid) -> gen_statem:cast(Pid, close). --spec sst_checkready(pid()) -> +-spec sst_checkready(pid()) -> {ok, string(), leveled_codec:object_key(), leveled_codec:object_key()}. %% @doc %% If a file has been set to be built, check that it has been built. Returns @@ -544,34 +582,32 @@ callback_mode() -> init([]) -> {ok, starting, #state{}}. -starting({call, From}, - {sst_open, RootPath, Filename, OptsSST, Level}, - State) -> +starting( + {call, From}, + {sst_open, RootPath, Filename, OptsSST, Level}, + State +) -> leveled_log:save(OptsSST#sst_options.log_options), Monitor = OptsSST#sst_options.monitor, {UpdState, Bloom} = read_file( Filename, - State#state{root_path=RootPath}, + State#state{root_path = RootPath}, OptsSST#sst_options.pagecache_level >= Level, undefined, Level ), Summary = UpdState#state.summary, - {next_state, - reader, - UpdState#state{monitor = Monitor}, - [{reply, From, - {ok, - {Summary#summary.first_key, Summary#summary.last_key}, - Bloom} - }]}; -starting({call, From}, - {sst_new, - RootPath, Filename, Level, - {SlotList, FirstKey}, MaxSQN, - OptsSST, IdxModDate, CountOfTombs, StartingPID}, - State) -> + {next_state, reader, UpdState#state{monitor = Monitor}, [ + {reply, From, + {ok, {Summary#summary.first_key, Summary#summary.last_key}, Bloom}} + ]}; +starting( + {call, From}, + {sst_new, RootPath, Filename, Level, {SlotList, FirstKey}, MaxSQN, OptsSST, + IdxModDate, CountOfTombs, StartingPID}, + State +) -> SW = os:timestamp(), leveled_log:save(OptsSST#sst_options.log_options), Monitor = OptsSST#sst_options.monitor, @@ -584,10 +620,12 @@ starting({call, From}, BlockEntries, new_blockindex_cache(Length), undefined, - IdxModDate), + IdxModDate + ), SummaryBin = build_table_summary( - SlotIndex, Level, FirstKey, Length, MaxSQN, Bloom, CountOfTombs), + SlotIndex, Level, FirstKey, Length, MaxSQN, Bloom, CountOfTombs + ), ActualFilename = write_file( RootPath, @@ -601,43 +639,47 @@ starting({call, From}, {UpdState, Bloom} = read_file( ActualFilename, - State#state{root_path=RootPath}, + State#state{root_path = RootPath}, OptsSST#sst_options.pagecache_level >= Level, BlockIndexCache, Level ), Summary = UpdState#state.summary, leveled_log:log_timer( - sst08, [ActualFilename, Level, Summary#summary.max_sqn], SW), + sst08, [ActualFilename, Level, Summary#summary.max_sqn], SW + ), erlang:send_after(?STARTUP_TIMEOUT, self(), start_complete), - {next_state, - reader, + {next_state, reader, UpdState#state{ high_modified_date = HighModDate, starting_pid = StartingPID, - monitor = Monitor}, - [{reply, - From, - {ok, {Summary#summary.first_key, Summary#summary.last_key}, Bloom} - }]}; -starting({call, From}, {sst_newlevelzero, RootPath, Filename, - Penciller, MaxSQN, - OptsSST, IdxModDate}, State) -> + monitor = Monitor + }, + [ + {reply, From, + {ok, {Summary#summary.first_key, Summary#summary.last_key}, + Bloom}} + ]}; +starting( + {call, From}, + {sst_newlevelzero, RootPath, Filename, Penciller, MaxSQN, OptsSST, + IdxModDate}, + State +) -> DeferredStartupTuple = - {RootPath, Filename, Penciller, MaxSQN, OptsSST, - IdxModDate}, + {RootPath, Filename, Penciller, MaxSQN, OptsSST, IdxModDate}, {next_state, starting, State#state{ - deferred_startup_tuple = DeferredStartupTuple}, + deferred_startup_tuple = DeferredStartupTuple + }, [{reply, From, ok}]}; starting({call, From}, close, State) -> %% No file should have been created, so nothing to close. {stop_and_reply, normal, [{reply, From, ok}], State}; - starting(cast, {complete_l0startup, Slots}, State) -> - {keep_state, - State#state{new_slots = Slots}, - [{next_event, cast, complete_l0startup}]}; + {keep_state, State#state{new_slots = Slots}, [ + {next_event, cast, complete_l0startup} + ]}; starting(cast, complete_l0startup, State) -> {RootPath, Filename, Penciller, MaxSQN, OptsSST, IdxModDate} = State#state.deferred_startup_tuple, @@ -647,7 +689,7 @@ starting(cast, complete_l0startup, State) -> leveled_log:save(OptsSST#sst_options.log_options), Monitor = OptsSST#sst_options.monitor, PressMethod = OptsSST#sst_options.press_method, - BlockVersion= OptsSST#sst_options.block_version, + BlockVersion = OptsSST#sst_options.block_version, FetchFun = fun(Slot) -> lists:nth(Slot, FetchedSlots) end, KVList = leveled_pmem:to_list(length(FetchedSlots), FetchFun), Time0 = timer:now_diff(os:timestamp(), SW0), @@ -658,20 +700,22 @@ starting(cast, complete_l0startup, State) -> Time1 = timer:now_diff(os:timestamp(), SW1), SW2 = os:timestamp(), - {SlotCount, SlotIndex, BlockEntries, SlotsBin,Bloom} = + {SlotCount, SlotIndex, BlockEntries, SlotsBin, Bloom} = build_all_slots(SlotList), {_, BlockIndexCache, HighModDate} = update_blockindex_cache( BlockEntries, new_blockindex_cache(SlotCount), undefined, - IdxModDate), + IdxModDate + ), Time2 = timer:now_diff(os:timestamp(), SW2), SW3 = os:timestamp(), SummaryBin = build_table_summary( - SlotIndex, 0, FirstKey, SlotCount, MaxSQN, Bloom, not_counted), + SlotIndex, 0, FirstKey, SlotCount, MaxSQN, Bloom, not_counted + ), Time3 = timer:now_diff(os:timestamp(), SW3), SW4 = os:timestamp(), @@ -689,9 +733,11 @@ starting(cast, complete_l0startup, State) -> read_file( ActualFilename, State#state{ - root_path=RootPath, - new_slots=undefined, % Important to empty this from state - deferred_startup_tuple=undefined}, + root_path = RootPath, + % Important to empty this from state + new_slots = undefined, + deferred_startup_tuple = undefined + }, true, BlockIndexCache, 0 @@ -700,7 +746,8 @@ starting(cast, complete_l0startup, State) -> Time4 = timer:now_diff(os:timestamp(), SW4), leveled_log:log_timer( - sst08, [ActualFilename, 0, Summary#summary.max_sqn], SW0), + sst08, [ActualFilename, 0, Summary#summary.max_sqn], SW0 + ), leveled_log:log(sst11, [Time0, Time1, Time2, Time3, Time4]), case Penciller of @@ -712,19 +759,19 @@ starting(cast, complete_l0startup, State) -> UpdState#state.filename, Summary#summary.first_key, Summary#summary.last_key, - Bloom), + Bloom + ), ok end, - {next_state, - reader, - UpdState#state{ - high_modified_date = HighModDate, - monitor = Monitor}}; + {next_state, reader, UpdState#state{ + high_modified_date = HighModDate, + monitor = Monitor + }}; starting(cast, {sst_returnslot, FetchedSlot, FetchFun, SlotCount}, State) -> FetchedSlots = case {FetchedSlot, State#state.new_slots} of {FS, PreviousSlots} when FS =/= none, is_list(PreviousSlots) -> - [FetchedSlot|PreviousSlots]; + [FetchedSlot | PreviousSlots]; _ -> [] end, @@ -733,23 +780,26 @@ starting(cast, {sst_returnslot, FetchedSlot, FetchFun, SlotCount}, State) -> {keep_state, %% Reverse the slots so that they are back in the expected %% order - State#state{new_slots = lists:reverse(FetchedSlots)}, - [{next_event, cast, complete_l0startup}]}; + State#state{new_slots = lists:reverse(FetchedSlots)}, [ + {next_event, cast, complete_l0startup} + ]}; false -> Self = self(), ReturnFun = fun(NextSlot) -> gen_statem:cast( - Self, {sst_returnslot, NextSlot, FetchFun, SlotCount}) + Self, {sst_returnslot, NextSlot, FetchFun, SlotCount} + ) end, FetchFun(length(FetchedSlots) + 1, ReturnFun), - {keep_state, - State#state{new_slots = FetchedSlots}} + {keep_state, State#state{new_slots = FetchedSlots}} end. -reader({call, From}, - {get_kv, LedgerKey, Hash, Filter}, - State = #state{read_state = RS}) when ?IS_DEF(RS)-> +reader( + {call, From}, + {get_kv, LedgerKey, Hash, Filter}, + State = #state{read_state = RS} +) when ?IS_DEF(RS) -> % Get a KV value and potentially take sample timings Monitor = case Filter of @@ -758,7 +808,7 @@ reader({call, From}, _ -> {no_monitor, 0} end, - FetchResult = + FetchResult = fetch( LedgerKey, Hash, @@ -771,7 +821,8 @@ reader({call, From}, RS#read_state.fetch_cache, RS#read_state.handle, RS#read_state.level, - Monitor), + Monitor + ), FilterFun = case Filter of undefined -> fun(KV) -> KV end; @@ -782,23 +833,26 @@ reader({call, From}, {keep_state_and_data, [{reply, From, FilterFun(KV)}]}; {KV, no_update, no_update, FC} -> RS0 = RS#read_state{fetch_cache = FC}, - {keep_state, - State#state{read_state = RS0}, - [{reply, From, FilterFun(KV)}]}; + {keep_state, State#state{read_state = RS0}, [ + {reply, From, FilterFun(KV)} + ]}; {KV, BIC, undefined, no_update} when BIC =/= no_update -> RS0 = RS#read_state{blockindex_cache = BIC}, - {keep_state, - State#state{read_state = RS0}, - [{reply, From, FilterFun(KV)}]}; + {keep_state, State#state{read_state = RS0}, [ + {reply, From, FilterFun(KV)} + ]}; {KV, BIC, HMD, no_update} when BIC =/= no_update, HMD =/= no_update -> RS0 = RS#read_state{blockindex_cache = BIC}, {keep_state, - State#state{read_state = RS0, high_modified_date = HMD}, - [hibernate, {reply, From, FilterFun(KV)}]} + State#state{read_state = RS0, high_modified_date = HMD}, [ + hibernate, {reply, From, FilterFun(KV)} + ]} end; -reader({call, From}, - {fetch_range, StartKey, EndKey, LowLastMod}, - State = #state{read_state = RS}) when ?IS_DEF(RS) -> +reader( + {call, From}, + {fetch_range, StartKey, EndKey, LowLastMod}, + State = #state{read_state = RS} +) when ?IS_DEF(RS) -> SlotsToPoint = fetch_range( StartKey, @@ -808,12 +862,15 @@ reader({call, From}, check_modified( State#state.high_modified_date, LowLastMod, - State#state.index_moddate) - ), + State#state.index_moddate + ) + ), {keep_state_and_data, [{reply, From, SlotsToPoint}]}; -reader({call, From}, - {get_slots, SlotList, SegChecker, LowLastMod}, - State = #state{read_state = RS}) when ?IS_DEF(RS) -> +reader( + {call, From}, + {get_slots, SlotList, SegChecker, LowLastMod}, + State = #state{read_state = RS} +) when ?IS_DEF(RS) -> BlockMethod = State#state.block_method, IdxModDate = State#state.index_moddate, {NeedBlockIdx, SlotBins} = @@ -822,41 +879,40 @@ reader({call, From}, SlotList, {SegChecker, LowLastMod, RS#read_state.blockindex_cache}, State#state.block_method, - State#state.index_moddate), - {keep_state_and_data, - [{reply, From, {NeedBlockIdx, SlotBins, BlockMethod, IdxModDate}}]}; + State#state.index_moddate + ), + {keep_state_and_data, [ + {reply, From, {NeedBlockIdx, SlotBins, BlockMethod, IdxModDate}} + ]}; reader({call, From}, get_maxsequencenumber, State) -> Summary = State#state.summary, - {keep_state_and_data, - [{reply, From, Summary#summary.max_sqn}]}; + {keep_state_and_data, [{reply, From, Summary#summary.max_sqn}]}; reader({call, From}, {set_for_delete, Penciller}, State) -> leveled_log:log(sst06, [State#state.filename]), - {next_state, - delete_pending, - State#state{penciller=Penciller}, - [{reply, From,ok}, ?DELETE_TIMEOUT]}; + {next_state, delete_pending, State#state{penciller = Penciller}, [ + {reply, From, ok}, ?DELETE_TIMEOUT + ]}; reader({call, From}, background_complete, State) -> Summary = State#state.summary, - {keep_state_and_data, - [{reply, - From, - {ok, - State#state.filename, - Summary#summary.first_key, - Summary#summary.last_key} - }]}; + {keep_state_and_data, [ + {reply, From, + {ok, State#state.filename, Summary#summary.first_key, + Summary#summary.last_key}} + ]}; reader({call, From}, get_tomb_count, State) -> - {keep_state_and_data, - [{reply, From, State#state.tomb_count}]}; -reader({call, From}, - close, - State = #state{read_state = RS}) when ?IS_DEF(RS) -> + {keep_state_and_data, [{reply, From, State#state.tomb_count}]}; +reader( + {call, From}, + close, + State = #state{read_state = RS} +) when ?IS_DEF(RS) -> ok = file:close(RS#read_state.handle), {stop_and_reply, normal, [{reply, From, ok}], State}; - -reader(cast, - {switch_levels, NewLevel}, - State = #state{read_state = RS}) when ?IS_DEF(RS) -> +reader( + cast, + {switch_levels, NewLevel}, + State = #state{read_state = RS} +) when ?IS_DEF(RS) -> {keep_state, State#state{ read_state = @@ -875,9 +931,10 @@ reader(info, bic_complete, State) -> leveled_log:log(sst14, [State#state.filename]), {keep_state_and_data, [hibernate]}; reader( - info, - start_complete, - #state{starting_pid = StartingPid}) when ?IS_DEF(StartingPid) -> + info, + start_complete, + #state{starting_pid = StartingPid} +) when ?IS_DEF(StartingPid) -> % The SST file will be started by a clerk, but the clerk may be shut down % prior to the manifest being updated about the existence of this SST file. % If there is no activity after startup, check the clerk is still alive and @@ -892,13 +949,15 @@ reader( {stop, normal} end. - -delete_pending({call, From}, - {get_kv, LedgerKey, Hash, Filter}, - State = #state{read_state = RS}) when ?IS_DEF(RS) -> - {KeyValue, _BIC, _HMD, _FC} = +delete_pending( + {call, From}, + {get_kv, LedgerKey, Hash, Filter}, + State = #state{read_state = RS} +) when ?IS_DEF(RS) -> + {KeyValue, _BIC, _HMD, _FC} = fetch( - LedgerKey, Hash, + LedgerKey, + Hash, State#state.summary, State#state.block_method, State#state.high_modified_date, @@ -908,7 +967,8 @@ delete_pending({call, From}, RS#read_state.fetch_cache, RS#read_state.handle, RS#read_state.level, - {no_monitor, 0}), + {no_monitor, 0} + ), Result = case Filter of undefined -> @@ -918,9 +978,10 @@ delete_pending({call, From}, end, {keep_state_and_data, [{reply, From, Result}, ?DELETE_TIMEOUT]}; delete_pending( - {call, From}, - {fetch_range, StartKey, EndKey, LowLastMod}, - State = #state{read_state = RS}) when ?IS_DEF(RS) -> + {call, From}, + {fetch_range, StartKey, EndKey, LowLastMod}, + State = #state{read_state = RS} +) when ?IS_DEF(RS) -> SlotsToPoint = fetch_range( StartKey, @@ -930,13 +991,15 @@ delete_pending( check_modified( State#state.high_modified_date, LowLastMod, - State#state.index_moddate) - ), + State#state.index_moddate + ) + ), {keep_state_and_data, [{reply, From, SlotsToPoint}, ?DELETE_TIMEOUT]}; delete_pending( - {call, From}, - {get_slots, SlotList, SegChecker, LowLastMod}, - State = #state{read_state = RS}) when ?IS_DEF(RS) -> + {call, From}, + {get_slots, SlotList, SegChecker, LowLastMod}, + State = #state{read_state = RS} +) when ?IS_DEF(RS) -> BlockMethod = State#state.block_method, IdxModDate = State#state.index_moddate, {_NeedBlockIdx, SlotBins} = @@ -945,31 +1008,36 @@ delete_pending( SlotList, {SegChecker, LowLastMod, RS#read_state.blockindex_cache}, BlockMethod, - IdxModDate), - {keep_state_and_data, - [{reply, From, {false, SlotBins, BlockMethod, IdxModDate}}, - ?DELETE_TIMEOUT]}; + IdxModDate + ), + {keep_state_and_data, [ + {reply, From, {false, SlotBins, BlockMethod, IdxModDate}}, + ?DELETE_TIMEOUT + ]}; delete_pending( - {call, From}, - close, - State = #state{read_state = RS}) when ?IS_DEF(RS) -> + {call, From}, + close, + State = #state{read_state = RS} +) when ?IS_DEF(RS) -> leveled_log:log(sst07, [State#state.filename]), ok = file:close(RS#read_state.handle), - ok = + ok = file:delete( - filename:join(State#state.root_path, State#state.filename)), + filename:join(State#state.root_path, State#state.filename) + ), {stop_and_reply, normal, [{reply, From, ok}], State}; delete_pending( - cast, - close, - State = #state{read_state = RS}) when ?IS_DEF(RS) -> + cast, + close, + State = #state{read_state = RS} +) when ?IS_DEF(RS) -> leveled_log:log(sst07, [State#state.filename]), ok = file:close(RS#read_state.handle), - ok = + ok = file:delete( - filename:join(State#state.root_path, State#state.filename)), + filename:join(State#state.root_path, State#state.filename) + ), {stop, normal, State}; - delete_pending(info, _Event, _State) -> % Ignore messages when pending delete. The message may have interrupted % the delete timeout, so timeout straight away @@ -987,23 +1055,23 @@ delete_pending(timeout, _, State) -> {keep_state_and_data, [rand:uniform(10) * ?DELETE_TIMEOUT]}. handle_update_blockindex_cache( - BIC, - State = #state{read_state = RS}) when ?IS_DEF(RS) -> + BIC, + State = #state{read_state = RS} +) when ?IS_DEF(RS) -> {NeedBlockIdx, BlockIndexCache, HighModDate} = update_blockindex_cache( BIC, RS#read_state.blockindex_cache, State#state.high_modified_date, - State#state.index_moddate), + State#state.index_moddate + ), case NeedBlockIdx of true -> - {keep_state, - State#state{ - read_state = - RS#read_state{blockindex_cache = BlockIndexCache}, - high_modified_date = HighModDate - } - }; + {keep_state, State#state{ + read_state = + RS#read_state{blockindex_cache = BlockIndexCache}, + high_modified_date = HighModDate + }}; false -> keep_state_and_data end. @@ -1016,7 +1084,6 @@ terminate(Reason, _StateName, State) -> code_change(_OldVsn, StateName, State, _Extra) -> {ok, StateName, State}. - format_status(Status) -> case {maps:get(reason, Status, normal), maps:get(state, Status)} of {terminate, State = #state{read_state = RS}} when ?IS_DEF(RS) -> @@ -1028,14 +1095,13 @@ format_status(Status) -> blockindex_cache = redacted, fetch_cache = redacted } - }, + }, Status ); _ -> Status end. - %%%============================================================================ %%% External Functions %%%============================================================================ @@ -1043,7 +1109,8 @@ format_status(Status) -> -spec expand_list_by_pointer( expandable_pointer(), list(expandable_pointer()), - pos_integer()) -> list(maybe_expanded_pointer()). + pos_integer() +) -> list(maybe_expanded_pointer()). %% @doc %% Expand a list of pointers, maybe ending up with a list of keys and values %% with a tail of pointers @@ -1060,26 +1127,31 @@ expand_list_by_pointer(Pointer, Tail, Width) -> list(expandable_pointer()), pos_integer(), segment_check_fun(), - non_neg_integer()) -> list(maybe_expanded_pointer()). + non_neg_integer() +) -> list(maybe_expanded_pointer()). %% @doc %% With filters (as described in expand_list_by_pointer/3 expand_list_by_pointer( - {pointer, SSTPid, Slot, StartKey, EndKey}, - Tail, Width, SegChecker, LowLastMod) -> + {pointer, SSTPid, Slot, StartKey, EndKey}, + Tail, + Width, + SegChecker, + LowLastMod +) -> {PotentialPointers, Remainder} = lists:split(min(Width - 1, length(Tail)), Tail), {LocalPointers, OtherPointers} = split_localpointers(SSTPid, PotentialPointers), sst_getfilteredslots( SSTPid, - [{pointer, SSTPid, Slot, StartKey, EndKey}|LocalPointers], + [{pointer, SSTPid, Slot, StartKey, EndKey} | LocalPointers], SegChecker, LowLastMod, OtherPointers ++ Remainder ); expand_list_by_pointer( - {next, ME, StartKey, EndKey}, Tail, _Width, _SegChecker, LowLastMod - ) -> + {next, ME, StartKey, EndKey}, Tail, _Width, _SegChecker, LowLastMod +) -> % The first pointer is a pointer to a file - expand_list_by_pointer will % in this case convert this into list of pointers within that SST file % i.e. of the form {pointer, SSTPid, Slot, StartKey, EndKey} @@ -1090,48 +1162,53 @@ expand_list_by_pointer( ExpPointer = sst_getfilteredrange(SSTPid, StartKey, EndKey, LowLastMod), ExpPointer ++ Tail. +%% erlfmt:ignore-begin - issues with editors when function definitions are split -spec split_localpointers( - pid(), list(expandable_pointer())) -> - {list(slot_pointer()), list(expandable_pointer())}. + pid(), list(expandable_pointer()) +) -> + {list(slot_pointer()), list(expandable_pointer())}. split_localpointers(LocalPid, PotentialPointers) -> lists:partition( fun({pointer, PID, _S, _SK, _EK}) when PID == LocalPid -> - true; - (_) -> - false + true; + (_) -> + false end, PotentialPointers ). +%% erlfmt:ignore-end -spec sst_getfilteredrange( pid(), range_endpoint(), range_endpoint(), - non_neg_integer()) -> list(slot_pointer()). + non_neg_integer() +) -> list(slot_pointer()). %% @doc %% Get a list of slot_pointers that contain the information to look into those %% slots to find the actual {K, V} pairs between the range endpoints. %% Expanding these slot_pointers can be done using sst_getfilteredslots/5 -%% +%% %% Use segment_checker/1 to produce a segment_check_fun if the hashes of the %% keys to be found are known. The LowLastMod integer will skip any blocks %% where all keys were modified before thta date. sst_getfilteredrange(Pid, StartKey, EndKey, LowLastMod) -> gen_statem:call( - Pid, {fetch_range, StartKey, EndKey, LowLastMod}, infinity). - + Pid, {fetch_range, StartKey, EndKey, LowLastMod}, infinity + ). -spec sst_getfilteredslots( pid(), list(slot_pointer()), segment_check_fun(), non_neg_integer(), - list(expandable_pointer())) -> list(leveled_codec:ledger_kv()). + list(expandable_pointer()) +) -> list(leveled_codec:ledger_kv()). %% @doc %% Get a list of slots by their ID. The slot will be converted from the binary %% to term form outside of the FSM loop, unless a segment_check_fun is passed, %% and this process has cached the index to be used by the segment_check_fun, -%% and in this case the list of Slotbins will include the actual {K, V} pairs. +%% and in this case the list of Slotbins will include the actual {K, V} pairs. %% %% Use segment_checker/1 to produce a segment_check_fun if the hashes of the %% keys to be found are known. The LowLastMod integer will skip any blocks @@ -1141,10 +1218,12 @@ sst_getfilteredrange(Pid, StartKey, EndKey, LowLastMod) -> sst_getfilteredslots(Pid, SlotList, SegChecker, LowLastMod, Pointers) -> {NeedBlockIdx, SlotBins, PressMethod, IdxModDate} = gen_statem:call( - Pid, {get_slots, SlotList, SegChecker, LowLastMod}, infinity), + Pid, {get_slots, SlotList, SegChecker, LowLastMod}, infinity + ), {L, BIC} = binaryslot_reader( - SlotBins, PressMethod, IdxModDate, SegChecker, Pointers), + SlotBins, PressMethod, IdxModDate, SegChecker, Pointers + ), case NeedBlockIdx of true -> erlang:send(Pid, {update_blockindex_cache, BIC}); @@ -1154,7 +1233,8 @@ sst_getfilteredslots(Pid, SlotList, SegChecker, LowLastMod, Pointers) -> L. -spec find_pos( - binary(), segment_check_fun()) -> list(non_neg_integer()). + binary(), segment_check_fun() +) -> list(non_neg_integer()). %% @doc %% Find a list of positions where there is an element with a matching segment %% ID to the expected segments (which can either be a single segment, a list of @@ -1166,36 +1246,44 @@ find_pos(Bin, {Min, Max, CheckFun}) -> find_posmlt(Bin, Min, Max, CheckFun, [], 0). find_posint(<>, H, PosList, Count) -> - find_posint(T, H, [Count|PosList], Count + 1); -find_posint(<>, H, PosList, Count) - when Miss >= ?MIN_HASH -> + find_posint(T, H, [Count | PosList], Count + 1); +find_posint(<>, H, PosList, Count) when + Miss >= ?MIN_HASH +-> find_posint(T, H, PosList, Count + 1); find_posint(<>, H, PosList, Count) when NHC < 128 -> find_posint(T, H, PosList, Count + NHC + 1); find_posint(_BinRem, _H, PosList, _Count) -> lists:reverse(PosList). -find_posmlt(<>, Min, Max, CheckFun, PosList, Count) - when H >= Min, H =< Max -> +find_posmlt(<>, Min, Max, CheckFun, PosList, Count) when + H >= Min, H =< Max +-> case CheckFun(H) of true -> - find_posmlt(T, Min, Max, CheckFun, [Count|PosList], Count + 1); + find_posmlt(T, Min, Max, CheckFun, [Count | PosList], Count + 1); false -> find_posmlt(T, Min, Max, CheckFun, PosList, Count + 1) end; -find_posmlt(<>, Min, Max, CheckFun, PosList, Count) - when Miss >= ?MIN_HASH -> +find_posmlt( + <>, Min, Max, CheckFun, PosList, Count +) when + Miss >= ?MIN_HASH +-> find_posmlt(T, Min, Max, CheckFun, PosList, Count + 1); -find_posmlt(<>, Min, Max, CheckFun, PosList, Count) - when NHC < 128 -> +find_posmlt( + <>, Min, Max, CheckFun, PosList, Count +) when + NHC < 128 +-> find_posmlt(T, Min, Max, CheckFun, PosList, Count + NHC + 1); find_posmlt(_BinRem, _Min, _Max, _CheckFun, PosList, _Count) -> lists:reverse(PosList). - -spec segment_checker( - non_neg_integer()| list(non_neg_integer())| false) - -> segment_check_fun(). + non_neg_integer() | list(non_neg_integer()) | false +) -> + segment_check_fun(). segment_checker(Hash) when is_integer(Hash) -> Hash; segment_checker(HashList) when is_list(HashList) -> @@ -1216,21 +1304,21 @@ segment_checker(HashList) when is_list(HashList) -> segment_checker(false) -> false. --spec sqn_only(leveled_codec:ledger_kv()|not_present) - -> leveled_codec:sqn()|not_present. +-spec sqn_only(leveled_codec:ledger_kv() | not_present) -> + leveled_codec:sqn() | not_present. sqn_only(not_present) -> not_present; sqn_only(KV) -> leveled_codec:strip_to_seqonly(KV). -spec extract_hash( - leveled_codec:segment_hash()) -> extract_hash(). + leveled_codec:segment_hash() +) -> extract_hash(). extract_hash({SegHash, _ExtraHash}) when is_integer(SegHash) -> tune_hash(SegHash); extract_hash(NotHash) -> NotHash. - -spec new_cache(leveled_pmanifest:lsm_level()) -> fetch_cache(). new_cache(Level) -> case cache_size(Level) of @@ -1269,27 +1357,31 @@ cache_size(_LowerLevel) -> -spec get_from_fetchcache( cache_hash(), - fetch_cache()) -> none|leveled_codec:ledger_kv(). + fetch_cache() +) -> none | leveled_codec:ledger_kv(). get_from_fetchcache(_CacheHash, no_cache) -> none; get_from_fetchcache(CacheHash, Cache) when is_integer(CacheHash) -> % When defining array can use array:array/1 but this will lead to type % issues when using array:new - % eqwalizer:ignore + % eqwalizer:ignore array:get(CacheHash, Cache). -spec add_to_cache( - non_neg_integer()|no_cache, + non_neg_integer() | no_cache, leveled_codec:ledger_kv(), - fetch_cache()) -> fetch_cache(). + fetch_cache() +) -> fetch_cache(). add_to_cache( - CacheHash, KV, FetchCache) - when is_integer(CacheHash), FetchCache =/= no_cache -> + CacheHash, KV, FetchCache +) when + is_integer(CacheHash), FetchCache =/= no_cache +-> array:set(CacheHash, KV, FetchCache); add_to_cache(_CacheHash, _KV, _FetchCache) -> no_cache. --spec get_from_blockcache(pos_integer(), blockindex_cache()) -> binary()|none. +-spec get_from_blockcache(pos_integer(), blockindex_cache()) -> binary() | none. get_from_blockcache(SlotID, BIC) when is_integer(SlotID) -> case array:get(SlotID - 1, element(2, BIC)) of CBI when is_binary(CBI) -> @@ -1299,8 +1391,9 @@ get_from_blockcache(SlotID, BIC) when is_integer(SlotID) -> end. -spec add_to_blockcache( - pos_integer(), blockindex_cache(), binary(), non_neg_integer()|false) -> - blockindex_cache(). + pos_integer(), blockindex_cache(), binary(), non_neg_integer() | false +) -> + blockindex_cache(). add_to_blockcache(SlotID, {Cnt, Cache, HighLMD}, Block, LMD) -> { Cnt + 1, @@ -1337,12 +1430,10 @@ tune_seglist(SegList) -> false end. - %%%============================================================================ %%% Internal Functions %%%============================================================================ - -spec update_options(sst_options(), non_neg_integer()) -> sst_options(). update_options(OptsSST, Level) -> CompressLevel = OptsSST#sst_options.press_level, @@ -1352,8 +1443,11 @@ update_options(OptsSST, Level) -> maxslots_level(Level, OptsSST#sst_options.max_sstslots), OptsSST#sst_options{press_method = PressMethod0, max_sstslots = MaxSlots0}. +%% erlfmt:ignore - issues with editors when function definitions are split -spec updatebic_foldfun(boolean()) -> - fun(({integer(), binary()|none}, blockindex_cache()) -> blockindex_cache()). + fun(({integer(), binary() | none}, blockindex_cache()) + -> blockindex_cache() + ). updatebic_foldfun(HMDRequired) -> fun(CacheEntry, BIC) -> case CacheEntry of @@ -1368,7 +1462,7 @@ updatebic_foldfun(HMDRequired) -> ExtractLMD; false -> false - end, + end, add_to_blockcache(ID, BIC, Header, LMD); _ -> BIC @@ -1379,11 +1473,12 @@ updatebic_foldfun(HMDRequired) -> end. -spec update_blockindex_cache( - list({integer(), binary()|none}), - blockindex_cache(), - non_neg_integer()|undefined, - boolean()) -> - {boolean(), blockindex_cache(), non_neg_integer()|undefined}. + list({integer(), binary() | none}), + blockindex_cache(), + non_neg_integer() | undefined, + boolean() +) -> + {boolean(), blockindex_cache(), non_neg_integer() | undefined}. update_blockindex_cache(Entries, BIC, HighModDate, IdxModDate) -> case {element(1, BIC), size_of_blockcache(BIC)} of {N, N} -> @@ -1408,11 +1503,14 @@ update_blockindex_cache(Entries, BIC, HighModDate, IdxModDate) -> end end. --spec check_modified(non_neg_integer()|undefined, - non_neg_integer(), - boolean()) -> boolean(). -check_modified(HighLastModifiedInSST, LowModDate, true) - when is_integer(HighLastModifiedInSST) -> +-spec check_modified( + non_neg_integer() | undefined, + non_neg_integer(), + boolean() +) -> boolean(). +check_modified(HighLastModifiedInSST, LowModDate, true) when + is_integer(HighLastModifiedInSST) +-> LowModDate =< HighLastModifiedInSST; check_modified(_, _, _) -> true. @@ -1422,26 +1520,39 @@ check_modified(_, _, _) -> leveled_codec:segment_hash(), sst_summary(), block_method(), - non_neg_integer()|undefined, + non_neg_integer() | undefined, boolean(), summary_filter(), blockindex_cache(), fetch_cache(), file:fd(), leveled_pmanifest:lsm_level(), - leveled_monitor:monitor()) - -> {not_present|leveled_codec:ledger_kv(), - blockindex_cache()|no_update, - non_neg_integer()|undefined|no_update, - fetch_cache()|no_update}. + leveled_monitor:monitor() +) -> + { + not_present | leveled_codec:ledger_kv(), + blockindex_cache() | no_update, + non_neg_integer() | undefined | no_update, + fetch_cache() | no_update + }. %% @doc %% Fetch a key from the store, potentially taking timings. Result should be %% not_present if the key is not in the store. -fetch(LedgerKey, Hash, - Summary, - BlockMethod, HighModDate, IndexModDate, FilterFun, BIC, FetchCache, - Handle, Level, Monitor) -> +fetch( + LedgerKey, + Hash, + Summary, + BlockMethod, + HighModDate, + IndexModDate, + FilterFun, + BIC, + FetchCache, + Handle, + Level, + Monitor +) -> SW0 = leveled_monitor:maybe_time(Monitor), Slot = lookup_slot(LedgerKey, Summary#summary.index, FilterFun), @@ -1453,17 +1564,21 @@ fetch(LedgerKey, Hash, SlotBin = read_slot(Handle, Slot), {Result, Header} = binaryslot_get( - SlotBin, LedgerKey, Hash, BlockMethod, IndexModDate), + SlotBin, LedgerKey, Hash, BlockMethod, IndexModDate + ), {_UpdateState, BIC0, HMD0} = update_blockindex_cache( - [{SlotID, Header}], BIC, HighModDate, IndexModDate), + [{SlotID, Header}], BIC, HighModDate, IndexModDate + ), case Result of not_present -> maybelog_fetch_timing( - Monitor, Level, not_found, SW0); + Monitor, Level, not_found, SW0 + ); _ -> maybelog_fetch_timing( - Monitor, Level, slot_noncachedblock, SW0) + Monitor, Level, slot_noncachedblock, SW0 + ) end, {Result, BIC0, HMD0, no_update}; {BlockLengths, _LMD, PosBin} -> @@ -1481,7 +1596,8 @@ fetch(LedgerKey, Hash, case get_from_fetchcache(CacheHash, FetchCache) of {LedgerKey, V} -> maybelog_fetch_timing( - Monitor, Level, fetch_cache, SW0), + Monitor, Level, fetch_cache, SW0 + ), {{LedgerKey, V}, no_update, no_update, no_update}; _ -> StartPos = Slot#slot_index_value.start_position, @@ -1498,16 +1614,20 @@ fetch(LedgerKey, Hash, case Result of not_present -> maybelog_fetch_timing( - Monitor, Level, not_found, SW0), - {not_present, - no_update, no_update, no_update}; + Monitor, Level, not_found, SW0 + ), + {not_present, no_update, no_update, + no_update}; {LK, LV} -> FetchCache0 = add_to_cache( - CacheHash, {LK, LV}, FetchCache), + CacheHash, {LK, LV}, FetchCache + ), maybelog_fetch_timing( - Monitor, Level, slot_cachedblock, SW0), - {{LK, LV}, + Monitor, Level, slot_cachedblock, SW0 + ), + { + {LK, LV}, no_update, no_update, FetchCache0 @@ -1517,53 +1637,64 @@ fetch(LedgerKey, Hash, end end. - -spec fetch_range( range_endpoint(), range_endpoint(), sst_summary(), summary_filter(), - boolean()) -> list(slot_pointer()). + boolean() +) -> list(slot_pointer()). %% @doc %% Fetch pointers to the slots the SST file covered by a given key range. fetch_range(StartKey, EndKey, Summary, FilterFun, true) -> {Slots, RTrim} = lookup_slots(StartKey, EndKey, Summary#summary.index, FilterFun), Self = self(), - case {Slots, if not RTrim -> all; RTrim -> EndKey end} of + case + {Slots, + if + not RTrim -> all; + RTrim -> EndKey + end} + of {[Slot], LastKey} -> [{pointer, Self, Slot, StartKey, LastKey}]; - {[Hd|Rest], all} -> + {[Hd | Rest], all} -> RightPointers = lists:map( fun(S) -> {pointer, Self, S, all, all} end, - Rest), - [{pointer, Self, Hd, StartKey, all}|RightPointers]; - {[Hd|Rest], LastKey} -> + Rest + ), + [{pointer, Self, Hd, StartKey, all} | RightPointers]; + {[Hd | Rest], LastKey} -> {MidSlots, [Last]} = lists:split(length(Rest) - 1, Rest), MidSlotPointers = lists:map( fun(S) -> {pointer, Self, S, all, all} end, - MidSlots), - [{pointer, Self, Hd, StartKey, all}|MidSlotPointers] - ++ [{pointer, Self, Last, all, LastKey}] + MidSlots + ), + [{pointer, Self, Hd, StartKey, all} | MidSlotPointers] ++ + [{pointer, Self, Last, all, LastKey}] end; fetch_range(_StartKey, _EndKey, _Summary, _FilterFun, false) -> %% This has been pre-checked to be uninteresting (i.e. due to modified date) []. -spec compress_level( - non_neg_integer(), non_neg_integer(), press_method()) -> press_method(). + non_neg_integer(), non_neg_integer(), press_method() +) -> press_method(). %% @doc %% Disable compression at higher levels for improved performance compress_level( - Level, LevelToCompress, _PressMethod) when Level < LevelToCompress -> + Level, LevelToCompress, _PressMethod +) when Level < LevelToCompress -> none; compress_level(_Level, _LevelToCompress, PressMethod) -> PressMethod. -spec maxslots_level( - leveled_pmanifest:lsm_level(), pos_integer()|infinity) -> pos_integer()|infinity. + leveled_pmanifest:lsm_level(), pos_integer() | infinity +) -> pos_integer() | infinity. maxslots_level(_Level, infinity) -> infinity; maxslots_level(Level, MaxSlotCount) when Level < ?DOUBLESIZE_LEVEL -> @@ -1571,23 +1702,31 @@ maxslots_level(Level, MaxSlotCount) when Level < ?DOUBLESIZE_LEVEL -> maxslots_level(_Level, MaxSlotCount) -> 2 * MaxSlotCount. -write_file(RootPath, Filename, SummaryBin, SlotsBin, - BlockMethod, IdxModDate, CountOfTombs) -> +write_file( + RootPath, + Filename, + SummaryBin, + SlotsBin, + BlockMethod, + IdxModDate, + CountOfTombs +) -> SummaryLength = byte_size(SummaryBin), SlotsLength = byte_size(SlotsBin), {PendingName, FinalName} = generate_filenames(Filename), FileVersion = gen_fileversion(BlockMethod, IdxModDate, CountOfTombs), case filelib:is_file(filename:join(RootPath, FinalName)) of true -> - AltName = filename:join(RootPath, filename:basename(FinalName)) - ++ ?DISCARD_EXT, + AltName = + filename:join(RootPath, filename:basename(FinalName)) ++ + ?DISCARD_EXT, leveled_log:log(sst05, [FinalName, AltName]), ok = file:rename(filename:join(RootPath, FinalName), AltName); false -> ok end, - ok = + ok = leveled_util:safe_rename( filename:join(RootPath, PendingName), filename:join(RootPath, FinalName), @@ -1606,16 +1745,19 @@ read_file(Filename, State, LoadPageCache, BIC, Level) -> {Handle, FileVersion, SummaryBin} = open_reader( filename:join(State#state.root_path, Filename), - LoadPageCache), + LoadPageCache + ), UpdState0 = imp_fileversion(FileVersion, State), {Summary, Bloom, SlotList, TombCount} = read_table_summary(SummaryBin, UpdState0#state.tomb_count), {SlotIndex, FilterFun} = from_list( - SlotList, Summary#summary.first_key, Summary#summary.last_key), + SlotList, Summary#summary.first_key, Summary#summary.last_key + ), UpdSummary = Summary#summary{index = SlotIndex}, leveled_log:log( - sst03, [Filename, Summary#summary.size, Summary#summary.max_sqn]), + sst03, [Filename, Summary#summary.size, Summary#summary.max_sqn] + ), ReadState = #read_state{ handle = Handle, @@ -1631,20 +1773,22 @@ read_file(Filename, State, LoadPageCache, BIC, Level) -> filter_fun = FilterFun }, - {UpdState0#state{ + { + UpdState0#state{ summary = UpdSummary, filename = Filename, tomb_count = TombCount, read_state = ReadState }, - Bloom}. + Bloom + }. gen_fileversion({BlockVersion, PressMethod}, IdxModDate, CountOfTombs) -> - % Native or none can be treated the same once written, as reader - % does not need to know as compression info will be in header of the + % Native or none can be treated the same once written, as reader + % does not need to know as compression info will be in header of the % block - Bit1 = - case PressMethod of + Bit1 = + case PressMethod of lz4 -> 1; native -> 0; none -> 0; @@ -1657,7 +1801,7 @@ gen_fileversion({BlockVersion, PressMethod}, IdxModDate, CountOfTombs) -> false -> 0 end, - Bit3 = + Bit3 = case CountOfTombs of not_counted -> 0; @@ -1670,7 +1814,7 @@ gen_fileversion({BlockVersion, PressMethod}, IdxModDate, CountOfTombs) -> 8; _ -> 0 - end, + end, Bit5 = case BlockVersion of 0 -> @@ -1681,8 +1825,8 @@ gen_fileversion({BlockVersion, PressMethod}, IdxModDate, CountOfTombs) -> Bit1 + Bit2 + Bit3 + Bit4 + Bit5. imp_fileversion(VersionInt, State) -> - CompressionMethod0 = - case VersionInt band 1 of + CompressionMethod0 = + case VersionInt band 1 of 0 -> native; 1 -> @@ -1722,9 +1866,8 @@ imp_fileversion(VersionInt, State) -> open_reader(Filename, LoadPageCache) -> {ok, Handle} = file:open(Filename, [binary, raw, read]), {ok, Lengths} = file:pread(Handle, 0, 9), - <> = Lengths, + <> = + Lengths, case LoadPageCache of true -> file:advise(Handle, 9, SlotsLength, will_need); @@ -1735,17 +1878,20 @@ open_reader(Filename, LoadPageCache) -> {Handle, FileVersion, SummaryBin}. build_table_summary( - SlotIndex, _Level, FirstKey, SlotCount, MaxSQN, Bloom, CountOfTombs) -> - [{LastKey, _LastV}|_Rest] = SlotIndex, + SlotIndex, _Level, FirstKey, SlotCount, MaxSQN, Bloom, CountOfTombs +) -> + [{LastKey, _LastV} | _Rest] = SlotIndex, Summary = #summary{ first_key = FirstKey, last_key = LastKey, size = SlotCount, - max_sqn = MaxSQN}, + max_sqn = MaxSQN + }, SummBin0 = term_to_binary( - {Summary, Bloom, lists:reverse(SlotIndex)}, ?BINARY_SETTINGS), + {Summary, Bloom, lists:reverse(SlotIndex)}, ?BINARY_SETTINGS + ), SummBin = case CountOfTombs of not_counted -> @@ -1756,11 +1902,13 @@ build_table_summary( SummCRC = hmac(SummBin), <>. --spec read_table_summary(binary(), not_counted|non_neg_integer()) -> - {sst_summary(), - leveled_ebloom:bloom(), - list(tuple()), - not_counted|non_neg_integer()}. +-spec read_table_summary(binary(), not_counted | non_neg_integer()) -> + { + sst_summary(), + leveled_ebloom:bloom(), + list(tuple()), + not_counted | non_neg_integer() + }. %% @doc %% Read the table summary - format varies depending on file version (presence %% of tomb count) @@ -1773,40 +1921,54 @@ read_table_summary(BinWithCheck, TombCount) -> case TombCount of not_counted -> erlang:append_element( - binary_to_term(SummBin), not_counted); + binary_to_term(SummBin), not_counted + ); _ -> <> = SummBin, erlang:append_element(binary_to_term(SummBin0), I) end end. - build_all_slots(SlotList) -> SlotCount = length(SlotList), {SlotIndex, BlockIndex, SlotsBin, HashLists} = build_all_slots( - SlotList, 9, 1, [], [], <<>>, []), + SlotList, 9, 1, [], [], <<>>, [] + ), Bloom = leveled_ebloom:create_bloom(HashLists), {SlotCount, SlotIndex, BlockIndex, SlotsBin, Bloom}. build_all_slots( - [], - _Pos, _SlotID, SlotIdxAcc, BlockIdxAcc, SlotBinAcc, HashLists) -> + [], + _Pos, + _SlotID, + SlotIdxAcc, + BlockIdxAcc, + SlotBinAcc, + HashLists +) -> {SlotIdxAcc, BlockIdxAcc, SlotBinAcc, HashLists}; build_all_slots( - [SlotD|Rest], - Pos, SlotID, SlotIdxAcc, BlockIdxAcc, SlotBinAcc, HashLists) -> + [SlotD | Rest], + Pos, + SlotID, + SlotIdxAcc, + BlockIdxAcc, + SlotBinAcc, + HashLists +) -> {BlockIdx, SlotBin, HashList, LastKey} = SlotD, Length = byte_size(SlotBin), SlotIndexV = #slot_index_value{ - slot_id = SlotID, start_position = Pos, length = Length}, + slot_id = SlotID, start_position = Pos, length = Length + }, build_all_slots( Rest, Pos + Length, SlotID + 1, - [{LastKey, SlotIndexV}|SlotIdxAcc], - [{SlotID, BlockIdx}|BlockIdxAcc], + [{LastKey, SlotIndexV} | SlotIdxAcc], + [{SlotID, BlockIdx} | BlockIdxAcc], <>, lists:append(HashList, HashLists) ). @@ -1816,16 +1978,20 @@ generate_filenames(RootFilename) -> Components = filename:split(RootFilename), case Ext of [] -> - {filename:join(Components) ++ ".pnd", - filename:join(Components) ++ ".sst"}; + { + filename:join(Components) ++ ".pnd", + filename:join(Components) ++ ".sst" + }; Ext -> DN = filename:dirname(RootFilename), FP_NOEXT = filename:basename(RootFilename, Ext), - {filename:join(DN, FP_NOEXT) ++ ".pnd", - filename:join(DN, FP_NOEXT) ++ ".sst"} + { + filename:join(DN, FP_NOEXT) ++ ".pnd", + filename:join(DN, FP_NOEXT) ++ ".sst" + } end. --spec hmac(binary()|integer()) -> integer(). +-spec hmac(binary() | integer()) -> integer(). %% @doc %% Perform a CRC check on an input hmac(Bin) when is_binary(Bin) -> @@ -1846,17 +2012,27 @@ from_list(SlotList, FirstKey, LastKey) -> FilterFun = get_filterfun(FirstKey, LastKey), FilteredList = lists:map(fun({K, S}) -> {FilterFun(K), S} end, SlotList), - {leveled_tree:from_orderedlist(FilteredList, ?TREE_TYPE, ?TREE_SIZE), - FilterFun}. + { + leveled_tree:from_orderedlist(FilteredList, ?TREE_TYPE, ?TREE_SIZE), + FilterFun + }. +%% erlfmt:ignore - issues with editors when function definitions are split -spec get_filterfun( - leveled_codec:ledger_key(), leveled_codec:ledger_key()) -> - fun((leveled_codec:ledger_key()) - -> leveled_codec:ledger_key()|leveled_codec:slimmed_key()). + leveled_codec:ledger_key(), leveled_codec:ledger_key() +) -> + fun((leveled_codec:ledger_key()) + -> leveled_codec:ledger_key() | leveled_codec:slimmed_key() + ). get_filterfun( - {?IDX_TAG, B, {Field, FT}, FK}, {?IDX_TAG, B, {Field, LT}, LK}) - when is_binary(Field), - is_binary(FT), is_binary(FK), is_binary(LT), is_binary(LK) -> + {?IDX_TAG, B, {Field, FT}, FK}, {?IDX_TAG, B, {Field, LT}, LK} +) when + is_binary(Field), + is_binary(FT), + is_binary(FK), + is_binary(LT), + is_binary(LK) +-> case {binary:longest_common_prefix([FT, LT]), byte_size(FT)} of {N, M} when N > 0, M >= N -> <> = FT, @@ -1865,15 +2041,16 @@ get_filterfun( fun term_filter/1 end; get_filterfun( - {Tag, B, FK, null}, {Tag, B, LK, null}) - when is_binary(FK), is_binary(LK) -> + {Tag, B, FK, null}, {Tag, B, LK, null} +) when + is_binary(FK), is_binary(LK) +-> case {binary:longest_common_prefix([FK, LK]), byte_size(FK)} of {N, M} when N > 0, M >= N -> <> = FK, key_prefix_filter(N, Prefix); _ -> fun key_filter/1 - end; get_filterfun(_FirstKey, _LastKey) -> fun null_filter/1. @@ -1885,12 +2062,13 @@ null_filter(Key) -> Key. key_filter({_Tag, _Bucket, Key, null}) -> Key. -spec term_filter(leveled_codec:ledger_key()) -> leveled_codec:slimmed_key(). -term_filter({_Tag, _Bucket, {_Field, Term}, Key}) -> +term_filter({_Tag, _Bucket, {_Field, Term}, Key}) -> {Term, Key}. -spec key_prefix_filter( - pos_integer(), binary()) -> - fun((leveled_codec:ledger_key()) -> leveled_codec:slimmed_key()). + pos_integer(), binary() +) -> + fun((leveled_codec:ledger_key()) -> leveled_codec:slimmed_key()). key_prefix_filter(N, Prefix) -> fun({_Tag, _Bucket, Key, null}) -> case Key of @@ -1902,8 +2080,9 @@ key_prefix_filter(N, Prefix) -> end. -spec term_prefix_filter( - pos_integer(), binary()) -> - fun((leveled_codec:ledger_key()) -> leveled_codec:slimmed_key()). + pos_integer(), binary() +) -> + fun((leveled_codec:ledger_key()) -> leveled_codec:slimmed_key()). term_prefix_filter(N, Prefix) -> fun({_Tag, _Bucket, {_Field, Term}, Key}) -> case Term of @@ -1948,11 +2127,13 @@ lookup_slots(StartKey, EndKey, Tree, FilterFun) -> FilteredStartKey, FilteredEndKey, Tree, - StartKeyFun), + StartKeyFun + ), {EK, _EndSlot} = lists:last(SlotList), - {lists:map(MapFun, SlotList), - leveled_codec:endkey_passed(FilteredEndKey, EK)}. - + { + lists:map(MapFun, SlotList), + leveled_codec:endkey_passed(FilteredEndKey, EK) + }. %%%============================================================================ %%% Slot Implementation @@ -1978,24 +2159,29 @@ lookup_slots(StartKey, EndKey, Tree, FilterFun) -> %% The binary index is cacheable and doubles as a not_present filter, as it is %% based on a 15-bit hash. - -spec accumulate_positions( - list(leveled_codec:ledger_kv()), - {binary(), - non_neg_integer(), - list(leveled_codec:segment_hash()), - leveled_codec:last_moddate()}) -> - {binary(), - non_neg_integer(), - list(leveled_codec:segment_hash()), - non_neg_integer()}. + list(leveled_codec:ledger_kv()), + { + binary(), + non_neg_integer(), + list(leveled_codec:segment_hash()), + leveled_codec:last_moddate() + } +) -> + { + binary(), + non_neg_integer(), + list(leveled_codec:segment_hash()), + non_neg_integer() + }. %% @doc %% Fold function use to accumulate the position information needed to %% populate the summary of the slot accumulate_positions( - [], {PosBin, NoHashCount, HashAcc, LMDAcc}) when is_integer(LMDAcc) -> + [], {PosBin, NoHashCount, HashAcc, LMDAcc} +) when is_integer(LMDAcc) -> {PosBin, NoHashCount, HashAcc, LMDAcc}; -accumulate_positions([{K, V}|T], {PosBin, NoHashCount, HashAcc, LMDAcc}) -> +accumulate_positions([{K, V} | T], {PosBin, NoHashCount, HashAcc, LMDAcc}) -> {_SQN, H1, LMD} = leveled_codec:strip_to_indexdetails({K, V}), LMDAcc0 = take_max_lastmoddate(LMD, LMDAcc), case extract_hash(H1) of @@ -2004,10 +2190,12 @@ accumulate_positions([{K, V}|T], {PosBin, NoHashCount, HashAcc, LMDAcc}) -> 0 -> accumulate_positions( T, - {<>, + { + <>, 0, - [H1|HashAcc], - LMDAcc0} + [H1 | HashAcc], + LMDAcc0 + } ); N when N =< 128 -> % The No Hash Count is an integer between 0 and 127 @@ -2015,20 +2203,24 @@ accumulate_positions([{K, V}|T], {PosBin, NoHashCount, HashAcc, LMDAcc}) -> NHC = N - 1, accumulate_positions( T, - {<>, + { + <>, 0, - [H1|HashAcc], - LMDAcc0}) + [H1 | HashAcc], + LMDAcc0 + } + ) end; _ -> accumulate_positions( - T, {PosBin, NoHashCount + 1, HashAcc, LMDAcc0}) + T, {PosBin, NoHashCount + 1, HashAcc, LMDAcc0} + ) end. - -spec take_max_lastmoddate( - leveled_codec:last_moddate(), leveled_codec:last_moddate()) - -> leveled_codec:last_moddate(). + leveled_codec:last_moddate(), leveled_codec:last_moddate() +) -> + leveled_codec:last_moddate(). %% @doc %% Get the last modified date. If no Last Modified Date on any object, can't %% add the accelerator and should check each object in turn @@ -2039,15 +2231,17 @@ take_max_lastmoddate(LMD, LMDAcc) -> -spec generate_binary_slot( leveled_codec:maybe_lookup(), - {forward|reverse, list(leveled_codec:ledger_kv())}, + {forward | reverse, list(leveled_codec:ledger_kv())}, block_method(), boolean(), - build_timings()) -> {binary_slot(), build_timings()}. + build_timings() +) -> {binary_slot(), build_timings()}. %% @doc %% Generate the serialised slot to be used when storing this sublist of keys %% and values generate_binary_slot( - Lookup, {DR, KVL0}, BlockMethod, IndexModDate, BuildTimings0) -> + Lookup, {DR, KVL0}, BlockMethod, IndexModDate, BuildTimings0 +) -> % The slot should be received reversed - get last key before flipping % accumulate_positions/2 should use the reversed KVL for efficiency {KVL, KVLr} = @@ -2196,29 +2390,30 @@ generate_binary_slot( {{Header, SlotBin, HashL, LastKey}, BuildTimings3}. - -spec check_blocks_allkeys( list(integer()), - binary()|{file:io_device(), integer()}, + binary() | {file:io_device(), integer()}, binary(), integer(), block_method(), boolean(), - list()) -> - list(leveled_codec:ledger_kv()). + list() +) -> + list(leveled_codec:ledger_kv()). %% @doc %% Acc should start as not_present if LedgerKey is a key, and a list if %% LedgerKey is false check_blocks_allkeys([], _BP, _BLs, _PBL, _BM, _IMD, Acc) -> lists:reverse(Acc); check_blocks_allkeys( - [Pos|Rest], - BlockPointer, - BlockLengths, - PosBinLength, - BlockMethod, - IdxModDate, - Acc) -> + [Pos | Rest], + BlockPointer, + BlockLengths, + PosBinLength, + BlockMethod, + IdxModDate, + Acc +) -> {BlockNumber, BlockPos} = revert_position(Pos), BlockBin = read_block( @@ -2237,40 +2432,43 @@ check_blocks_allkeys( PosBinLength, BlockMethod, IdxModDate, - [{K, V}|Acc] + [{K, V} | Acc] ) end. -spec check_blocks_matchkey( list(integer()), - binary()|{file:io_device(), integer()}, + binary() | {file:io_device(), integer()}, binary(), integer(), leveled_codec:ledger_key(), block_method(), - boolean()) -> - not_present|leveled_codec:ledger_kv(). + boolean() +) -> + not_present | leveled_codec:ledger_kv(). %% @doc %% Acc should start as not_present if LedgerKey is a key, and a list if %% LedgerKey is false check_blocks_matchkey([], _BP, _BLs, _PBL, _LKTC, _PM, _IMD) -> not_present; check_blocks_matchkey( - [Pos|Rest], - BlockPointer, - BlockLengths, - PosBinLength, - LedgerKeyToCheck, - BlockMethod, - IdxModDate) -> + [Pos | Rest], + BlockPointer, + BlockLengths, + PosBinLength, + LedgerKeyToCheck, + BlockMethod, + IdxModDate +) -> {BlockNumber, BlockPos} = revert_position(Pos), BlockBin = - read_block(BlockPointer, - BlockLengths, - PosBinLength, - BlockNumber, - additional_offset(IdxModDate) - ), + read_block( + BlockPointer, + BlockLengths, + PosBinLength, + BlockNumber, + additional_offset(IdxModDate) + ), CheckResult = spawn_check_block(BlockPos, BlockBin, BlockMethod), case {CheckResult, LedgerKeyToCheck} of {{K, V}, K} -> @@ -2285,10 +2483,10 @@ check_blocks_matchkey( BlockMethod, IdxModDate ) -end. + end. --spec spawn_check_block(non_neg_integer(), binary(), block_method()) - -> not_present|leveled_codec:ledger_kv(). +-spec spawn_check_block(non_neg_integer(), binary(), block_method()) -> + not_present | leveled_codec:ledger_kv(). spawn_check_block(BlockPos, BlockBin, BlockMethod) -> Parent = self(), Pid = @@ -2297,7 +2495,9 @@ spawn_check_block(BlockPos, BlockBin, BlockMethod) -> check_block(Parent, BlockPos, BlockBin, BlockMethod) end ), - receive {checked_block, Pid, R} -> R end. + receive + {checked_block, Pid, R} -> R + end. check_block(From, BlockPos, BlockBin, BlockMethod) -> R = leveled_sstblock:get_nth(BlockPos, BlockBin, BlockMethod), @@ -2312,13 +2512,13 @@ additional_offset(true) -> additional_offset(false) -> ?BLOCK_LENGTHS_LENGTH + 4 + 4 + 4. - read_block({Handle, StartPos}, BlockLengths, PosBinLength, BlockID, AO) -> {Offset, Length} = block_offsetandlength(BlockLengths, BlockID), {ok, BlockBin} = file:pread( Handle, - StartPos + Offset + PosBinLength + AO, Length + StartPos + Offset + PosBinLength + AO, + Length ), BlockBin; read_block(SlotBin, BlockLengths, PosBinLength, BlockID, AO) -> @@ -2337,20 +2537,35 @@ read_slot(Handle, Slot) -> SlotBin. -spec pointer_mapfun( - slot_pointer()) -> - {non_neg_integer(), non_neg_integer(), non_neg_integer(), - range_endpoint(), range_endpoint()}. + slot_pointer() +) -> + { + non_neg_integer(), + non_neg_integer(), + non_neg_integer(), + range_endpoint(), + range_endpoint() + }. pointer_mapfun({pointer, _Pid, Slot, SK, EK}) -> - {Slot#slot_index_value.start_position, + { + Slot#slot_index_value.start_position, Slot#slot_index_value.length, Slot#slot_index_value.slot_id, SK, - EK}. + EK + }. +%% erlfmt:ignore - issues with editors when function definitions are split -type slotbin_fun() :: - fun(({non_neg_integer(), non_neg_integer(), non_neg_integer(), - range_endpoint(), range_endpoint()}) - -> expanded_slot() + fun(( + { + non_neg_integer(), + non_neg_integer(), + non_neg_integer(), + range_endpoint(), + range_endpoint() + } + ) -> expanded_slot() ). -spec binarysplit_mapfun(binary(), integer()) -> slotbin_fun(). @@ -2365,12 +2580,13 @@ binarysplit_mapfun(MultiSlotBin, StartPos) -> end. -spec read_slots( - file:io_device(), - list(), - {segment_check_fun(), non_neg_integer(), blockindex_cache()}, - block_method(), - boolean()) - -> {boolean(), list(expanded_slot()|leveled_codec:ledger_kv())}. + file:io_device(), + list(), + {segment_check_fun(), non_neg_integer(), blockindex_cache()}, + block_method(), + boolean() +) -> + {boolean(), list(expanded_slot() | leveled_codec:ledger_kv())}. %% @doc %% Reading slots is generally unfiltered, but in the special case when %% querting across slots when only matching segment IDs are required the @@ -2380,13 +2596,23 @@ binarysplit_mapfun(MultiSlotBin, StartPos) -> %% any key comparison between levels should allow for a non-matching key to %% be considered as superior to a matching key - as otherwise a matching key %% may be intermittently removed from the result set -read_slots(Handle, SlotList, {false, 0, _BlockIndexCache}, - _BlockMethod, _IdxModDate) -> +read_slots( + Handle, + SlotList, + {false, 0, _BlockIndexCache}, + _BlockMethod, + _IdxModDate +) -> % No list of segments passed or useful Low LastModified Date % Just read slots in SlotList {false, read_slotlist(SlotList, Handle)}; -read_slots(Handle, SlotList, {SegChecker, LowLastMod, BlockIndexCache}, - BlockMethod, IdxModDate) -> +read_slots( + Handle, + SlotList, + {SegChecker, LowLastMod, BlockIndexCache}, + BlockMethod, + IdxModDate +) -> % Potentially need to check the low last modified date, and also the % segment_check_fun against the index. If the index is cached, return the % KV pairs at this point, otherwise return the slot pointer so that the @@ -2404,8 +2630,9 @@ read_slots(Handle, SlotList, {SegChecker, LowLastMod, BlockIndexCache}, { true, append( - read_slotlist([Pointer], Handle), Acc) - }; + read_slotlist([Pointer], Handle), Acc + ) + }; {BlockLengths, LMD, BlockIdx} -> % If there is a BlockIndex cached then we can use it to % check to see if any of the expected segments are @@ -2439,7 +2666,8 @@ read_slots(Handle, SlotList, {SegChecker, LowLastMod, BlockIndexCache}, BlockMethod, IdxModDate, SegChecker, - {SK, EK}), + {SK, EK} + ), {NeededBlockIdx, append(TrimmedKVL, Acc)} end end @@ -2447,19 +2675,25 @@ read_slots(Handle, SlotList, {SegChecker, LowLastMod, BlockIndexCache}, end, lists:foldr(BinMapFun, {false, []}, SlotList). - -spec checkblocks_segandrange( - binary(), - binary()|{file:io_device(), integer()}, - binary(), - block_method(), - boolean(), - segment_check_fun(), - {range_endpoint(), range_endpoint()}) - -> list(leveled_codec:ledger_kv()). + binary(), + binary() | {file:io_device(), integer()}, + binary(), + block_method(), + boolean(), + segment_check_fun(), + {range_endpoint(), range_endpoint()} +) -> + list(leveled_codec:ledger_kv()). checkblocks_segandrange( - BlockIdx, SlotOrHandle, BlockLengths, - BlockMethod, IdxModDate, SegChecker, {StartKey, EndKey}) -> + BlockIdx, + SlotOrHandle, + BlockLengths, + BlockMethod, + IdxModDate, + SegChecker, + {StartKey, EndKey} +) -> PositionList = find_pos(BlockIdx, SegChecker), KVL = check_blocks_allkeys( @@ -2473,20 +2707,19 @@ checkblocks_segandrange( ), in_range(KVL, StartKey, EndKey). - read_slotlist(SlotList, Handle) -> LengthList = lists:map(fun pointer_mapfun/1, SlotList), {MultiSlotBin, StartPos} = read_length_list(Handle, LengthList), lists:map(binarysplit_mapfun(MultiSlotBin, StartPos), LengthList). - -spec binaryslot_reader( list(expanded_slot()), block_method(), boolean(), segment_check_fun(), - list(expandable_pointer())) - -> {list({tuple(), tuple()}), list({integer(), binary()})}. + list(expandable_pointer()) +) -> + {list({tuple(), tuple()}), list({integer(), binary()})}. %% @doc %% Read the binary slots converting them to {K, V} pairs if they were not %% already {K, V} pairs. If they are already {K, V} pairs it is assumed @@ -2499,7 +2732,8 @@ read_slotlist(SlotList, Handle) -> %% endpoints of the block are outside of the range, and leaving blocks already %% proven to be outside of the range unopened. binaryslot_reader( - SlotBinsToFetch, BlockMethod, IdxModDate, SegChecker, SlotsToPoint) -> + SlotBinsToFetch, BlockMethod, IdxModDate, SegChecker, SlotsToPoint +) -> % Two accumulators are added. % One to collect the list of keys and values found in the binary slots % (subject to range filtering if the slot is still deserialised at this @@ -2521,8 +2755,13 @@ binaryslot_reader( binaryslot_reader([], _BlockMethod, _IdxModDate, _SegChecker, Acc, BIAcc) -> {Acc, BIAcc}; binaryslot_reader( - [{SlotBin, ID, SK, EK}|Tail], - BlockMethod, IdxModDate, SegChecker, Acc, BIAcc) -> + [{SlotBin, ID, SK, EK} | Tail], + BlockMethod, + IdxModDate, + SegChecker, + Acc, + BIAcc +) -> % The start key and end key here, may not the start key and end key the % application passed into the query. If the slot is known to lie entirely % inside the range, on either of both sides, the SK and EK may be @@ -2538,27 +2777,28 @@ binaryslot_reader( IdxModDate, SegChecker, TrimmedL, - [{ID, BICache}|BIAcc] + [{ID, BICache} | BIAcc] ); -binaryslot_reader([{K, V}|Tail], BM, IMD, SC, Acc, BIAcc) -> - binaryslot_reader(Tail, BM, IMD, SC, [{K, V}|Acc], BIAcc). - +binaryslot_reader([{K, V} | Tail], BM, IMD, SC, Acc, BIAcc) -> + binaryslot_reader(Tail, BM, IMD, SC, [{K, V} | Acc], BIAcc). read_length_list(Handle, LengthList) -> StartPos = element(1, lists:nth(1, LengthList)), - EndPos = element(1, lists:last(LengthList)) - + element(2, lists:last(LengthList)), + EndPos = + element(1, lists:last(LengthList)) + + element(2, lists:last(LengthList)), {ok, MultiSlotBin} = file:pread(Handle, StartPos, EndPos - StartPos), {MultiSlotBin, StartPos}. - -spec extract_header( - binary()|none, boolean()) -> {binary(), non_neg_integer(), binary()}|none. + binary() | none, boolean() +) -> {binary(), non_neg_integer(), binary()} | none. %% @doc %% Helper for extracting the binaries from the header ignoring the missing LMD %% if LMD is not indexed extract_header(none, _IdxModDate) -> - none; % used when the block cache has returned none + % used when the block cache has returned none + none; extract_header(Header, true) -> BL = ?BLOCK_LENGTHS_LENGTH, <> = Header, @@ -2573,7 +2813,8 @@ extract_header(Header, false) -> leveled_codec:ledger_key(), leveled_codec:segment_hash(), block_method(), - boolean()) -> {not_present|leveled_codec:ledger_kv(), binary()|none}. + boolean() +) -> {not_present | leveled_codec:ledger_kv(), binary() | none}. binaryslot_get(FullBin, Key, Hash, BlockMethod, IdxModDate) -> case crc_check_slot(FullBin) of {Header, Blocks} -> @@ -2584,32 +2825,29 @@ binaryslot_get(FullBin, Key, Hash, BlockMethod, IdxModDate) -> HashExtract when is_integer(HashExtract) -> find_pos(PosBinIndex, segment_checker(HashExtract)) end, - {fetch_value(PosList, BlockLengths, Blocks, Key, BlockMethod), - Header}; + { + fetch_value(PosList, BlockLengths, Blocks, Key, BlockMethod), + Header + }; crc_wonky -> {not_present, none} end. -spec binaryslot_tolist( - binary(), - block_method(), - boolean(), - list(leveled_codec:ledger_kv()|expandable_pointer())) - -> list(leveled_codec:ledger_kv()|expandable_pointer()). + binary(), + block_method(), + boolean(), + list(leveled_codec:ledger_kv() | expandable_pointer()) +) -> + list(leveled_codec:ledger_kv() | expandable_pointer()). binaryslot_tolist(FullBin, BlockMethod, IdxModDate, InitAcc) -> case crc_check_slot(FullBin) of {Header, Blocks} -> {BlockLengths, _LMD, _PosBinIndex} = extract_header(Header, IdxModDate), - <> = BlockLengths, - <> = Blocks, lists:foldl( fun(B, Acc) -> @@ -2623,24 +2861,27 @@ binaryslot_tolist(FullBin, BlockMethod, IdxModDate, InitAcc) -> end. -spec binaryslot_trimmed( - binary(), - range_endpoint(), - range_endpoint(), - block_method(), - boolean(), - segment_check_fun(), - list(leveled_codec:ledger_kv()|expandable_pointer()) - ) -> - {list(leveled_codec:ledger_kv()|expandable_pointer()), - list({integer(), binary()})|none}. + binary(), + range_endpoint(), + range_endpoint(), + block_method(), + boolean(), + segment_check_fun(), + list(leveled_codec:ledger_kv() | expandable_pointer()) +) -> + { + list(leveled_codec:ledger_kv() | expandable_pointer()), + list({integer(), binary()}) | none + }. %% @doc %% Must return a trimmed and reversed list of results in the range binaryslot_trimmed( - FullBin, all, all, BlockMethod, IdxModDate, false, Acc) -> + FullBin, all, all, BlockMethod, IdxModDate, false, Acc +) -> {binaryslot_tolist(FullBin, BlockMethod, IdxModDate, Acc), none}; binaryslot_trimmed( - FullBin, StartKey, EndKey, BlockMethod, IdxModDate, SegmentChecker, Acc - ) -> + FullBin, StartKey, EndKey, BlockMethod, IdxModDate, SegmentChecker, Acc +) -> case {crc_check_slot(FullBin), SegmentChecker} of % Get a trimmed list of keys in the slot based on the range, trying % to minimise the number of blocks which are deserialised by @@ -2648,19 +2889,20 @@ binaryslot_trimmed( {{Header, Blocks}, false} -> {BlockLengths, _LMD, _PosBinIndex} = extract_header(Header, IdxModDate), - <> = BlockLengths, - <> = Blocks, TrimmedKVL = blocks_required( {StartKey, EndKey}, - Block1, Block2, MidBlock, Block4, Block5, - BlockMethod), + Block1, + Block2, + MidBlock, + Block4, + Block5, + BlockMethod + ), {append(TrimmedKVL, Acc), none}; {{Header, _Blocks}, SegmentChecker} -> {BlockLengths, _LMD, BlockIdx} = @@ -2673,18 +2915,25 @@ binaryslot_trimmed( BlockMethod, IdxModDate, SegmentChecker, - {StartKey, EndKey}), + {StartKey, EndKey} + ), {append(TrimmedKVL, Acc), Header}; {crc_wonky, _} -> {Acc, none} end. -spec blocks_required( - {range_endpoint(), range_endpoint()}, - binary(), binary(), binary(), binary(), binary(), - block_method()) -> list(leveled_codec:ledger_kv()). + {range_endpoint(), range_endpoint()}, + binary(), + binary(), + binary(), + binary(), + binary(), + block_method() +) -> list(leveled_codec:ledger_kv()). blocks_required( - {StartKey, EndKey}, B1, B2, MidBlock, B4, B5, BlockMethod) -> + {StartKey, EndKey}, B1, B2, MidBlock, B4, B5, BlockMethod +) -> {Top, Tail, MidBlockFetchFun} = leveled_sstblock:get_topandtail(MidBlock, BlockMethod), case filterby_midblock({Top, Tail}, {StartKey, EndKey}) of @@ -2703,14 +2952,16 @@ blocks_required( append( in_range( get_lefthand_blocks( - B1, B2, BlockMethod, StartKey, EndKey), + B1, B2, BlockMethod, StartKey, EndKey + ), StartKey, all ), MidBlockFetchFun(all), in_range( get_righthand_blocks( - B4, B5, BlockMethod, StartKey, EndKey), + B4, B5, BlockMethod, StartKey, EndKey + ), all, EndKey ) @@ -2718,7 +2969,8 @@ blocks_required( lt_mid -> in_range( get_lefthand_blocks( - B1, B2, BlockMethod, StartKey, EndKey), + B1, B2, BlockMethod, StartKey, EndKey + ), StartKey, EndKey ); @@ -2726,7 +2978,8 @@ blocks_required( in_range( append( get_lefthand_blocks( - B1, B2, BlockMethod, StartKey, EndKey), + B1, B2, BlockMethod, StartKey, EndKey + ), MidBlockFetchFun(all) ), StartKey, @@ -2739,7 +2992,8 @@ blocks_required( append( MidBlockFetchFun(all), get_righthand_blocks( - B4, B5, BlockMethod, all, EndKey) + B4, B5, BlockMethod, all, EndKey + ) ), StartKey, EndKey @@ -2747,7 +3001,8 @@ blocks_required( gt_mid -> in_range( get_righthand_blocks( - B4, B5, BlockMethod, StartKey, EndKey), + B4, B5, BlockMethod, StartKey, EndKey + ), StartKey, EndKey ) @@ -2796,10 +3051,12 @@ get_righthand_blocks(B4, B5, BlockMethod, StartKey, EndKey) -> filterby_midblock({not_present, not_present}, _RangeKeys) -> empty; filterby_midblock( - {_MidFirst, MidLast}, {StartKey, _EndKey}) when StartKey > MidLast -> + {_MidFirst, MidLast}, {StartKey, _EndKey} +) when StartKey > MidLast -> gt_mid; filterby_midblock( - {MidFirst, MidLast}, {StartKey, EndKey}) when StartKey >= MidFirst -> + {MidFirst, MidLast}, {StartKey, EndKey} +) when StartKey >= MidFirst -> case leveled_codec:endkey_passed(EndKey, MidLast) of true -> mid_only; @@ -2824,7 +3081,7 @@ this_leftblock_required(_, all) -> true; this_leftblock_required({Top, _Tail}, EndKey) -> not leveled_codec:endkey_passed(EndKey, Top). - + this_rightblock_required({not_present, not_present}, _StartKey) -> true; this_rightblock_required(_, all) -> @@ -2845,9 +3102,10 @@ next_block_required({_FK, LK}, EndKey) -> not leveled_codec:endkey_passed(EndKey, LK). -spec in_range( - list(leveled_codec:ledger_kv()), - range_endpoint(), - range_endpoint()) -> list(leveled_codec:ledger_kv()). + list(leveled_codec:ledger_kv()), + range_endpoint(), + range_endpoint() +) -> list(leveled_codec:ledger_kv()). %% @doc %% Is the ledger key in the range. in_range(KVL, all, all) -> @@ -2864,8 +3122,9 @@ in_range(KVL, SK, EK) -> before_end(KVL, EK, Acc) when length(KVL) > ?MAX_AHEAD -> SkipCheck = - leveled_codec:endkey_passed( - EK, element(1, lists:nth(?CHECK_AHEAD, KVL))), + leveled_codec:endkey_passed( + EK, element(1, lists:nth(?CHECK_AHEAD, KVL)) + ), case SkipCheck of true -> append( @@ -2900,15 +3159,12 @@ after_start(KVL, SK) when length(KVL) > ?MAX_AHEAD -> after_start(KVL, SK) -> lists:dropwhile(fun({K, _V}) -> K < SK end, KVL). - crc_check_slot(FullBin) -> - <> = FullBin, + <> = + FullBin, PosBL0 = min(PosBL, byte_size(FullBin) - 12), - % If the position has been bit-flipped to beyond the maximum possible - % length, use the maximum possible length + % If the position has been bit-flipped to beyond the maximum possible + % length, use the maximum possible length <> = Rest, case {hmac(Header), hmac(PosBL0)} of {CRC32H, CRC32PBL} -> @@ -2927,23 +3183,15 @@ block_offsetandlength(BlockLengths, BlockID) -> <> = BlockLengths, {B1L, B2L}; 3 -> - <> = BlockLengths, + <> = + BlockLengths, {B1L + B2L, B3L}; 4 -> - <> = BlockLengths, {B1L + B2L + B3L, B4L}; 5 -> - <> = BlockLengths, {B1L + B2L + B3L + B4L, B5L} end. @@ -2953,15 +3201,16 @@ block_offsetandlength(BlockLengths, BlockID) -> binary(), binary(), leveled_codec:ledger_key(), - block_method()) -> not_present|leveled_codec:ledger_kv(). + block_method() +) -> not_present | leveled_codec:ledger_kv(). fetch_value([], _BlockLengths, _Blocks, _Key, _BlockMethod) -> not_present; -fetch_value([Pos|Rest], BlockLengths, Blocks, Key, BlockMethod) -> +fetch_value([Pos | Rest], BlockLengths, Blocks, Key, BlockMethod) -> {BlockNumber, BlockPos} = revert_position(Pos), {Offset, Length} = block_offsetandlength(BlockLengths, BlockNumber), <<_Pre:Offset/binary, Block:Length/binary, _Rest/binary>> = Blocks, R = leveled_sstblock:get_nth(BlockPos, Block, BlockMethod), - case R of + case R of {K, V} when K == Key -> {K, V}; _ -> @@ -2979,8 +3228,10 @@ revert_position(Pos) -> {3, ((Pos - 2 * SideBlockSize) rem MidBlockSize) + 1}; false -> TailPos = Pos - 2 * SideBlockSize - MidBlockSize, - {(TailPos div SideBlockSize) + 4, - (TailPos rem SideBlockSize) + 1} + { + (TailPos div SideBlockSize) + 4, + (TailPos rem SideBlockSize) + 1 + } end end. @@ -2988,7 +3239,6 @@ revert_position(Pos) -> %%% Optimised list functions %%%============================================================================ - %% @doc See eunit test append_performance_test_/0 and also %% https://github.com/erlang/otp/pull/8743 %% On OTP 26.2.1 - @@ -3045,9 +3295,14 @@ append(L1, L2, L3, L4) -> %% there are matching keys then the highest sequence number must be chosen and %% any lower sequence numbers should be compacted out of existence --spec merge_lists(list(), sst_options(), boolean()) - -> {list(), list(), list(binary_slot()), - tuple()|null, non_neg_integer()|not_counted}. +-spec merge_lists(list(), sst_options(), boolean()) -> + { + list(), + list(), + list(binary_slot()), + tuple() | null, + non_neg_integer() | not_counted + }. %% @doc %% %% Merge from a single list (i.e. at Level 0) @@ -3057,7 +3312,8 @@ merge_lists(KVList1, SSTOpts, IdxModDate) -> [], [], split_lists( - KVList1,[], + KVList1, + [], SlotCount, { SSTOpts#sst_options.block_version, @@ -3074,32 +3330,39 @@ split_lists([], SlotLists, 0, _BlockMethod, _IdxModDate) -> split_lists(LastPuff, SlotLists, 0, BlockMethod, IdxModDate) -> {SlotD, _} = generate_binary_slot( - lookup, {forward, LastPuff}, BlockMethod, IdxModDate, no_timing), - lists:reverse([SlotD|SlotLists]); + lookup, {forward, LastPuff}, BlockMethod, IdxModDate, no_timing + ), + lists:reverse([SlotD | SlotLists]); split_lists(KVList1, SlotLists, N, BlockMethod, IdxModDate) -> {Slot, KVListRem} = lists:split(?LOOK_SLOTSIZE, KVList1), {SlotD, _} = generate_binary_slot( - lookup, {forward, Slot}, BlockMethod, IdxModDate, no_timing), - split_lists(KVListRem, [SlotD|SlotLists], N - 1, BlockMethod, IdxModDate). + lookup, {forward, Slot}, BlockMethod, IdxModDate, no_timing + ), + split_lists(KVListRem, [SlotD | SlotLists], N - 1, BlockMethod, IdxModDate). -spec merge_lists( list(maybe_expanded_pointer()), list(maybe_expanded_pointer()), {boolean(), non_neg_integer()}, - sst_options(), boolean()) -> - {list(maybe_expanded_pointer()), - list(maybe_expanded_pointer()), - list(binary_slot()), - leveled_codec:ledger_key()|null, - non_neg_integer()}. + sst_options(), + boolean() +) -> + { + list(maybe_expanded_pointer()), + list(maybe_expanded_pointer()), + list(binary_slot()), + leveled_codec:ledger_key() | null, + non_neg_integer() + }. %% @doc %% Merge lists when merging across more than one file. KVLists that are %% provided may include pointers to fetch more Keys/Values from the source %% file merge_lists( - KVList1, KVList2, {IsBase, L}, SSTOpts, IndexModDate) -> - BuildTimings = + KVList1, KVList2, {IsBase, L}, SSTOpts, IndexModDate +) -> + BuildTimings = case IsBase orelse lists:member(L, ?LOG_BUILDTIMINGS_LEVELS) of true -> #build_timings{}; @@ -3120,36 +3383,73 @@ merge_lists( BuildTimings ). - -spec merge_lists( list(maybe_expanded_pointer()), list(maybe_expanded_pointer()), {boolean(), non_neg_integer()}, list(binary_slot()), - leveled_codec:ledger_key()|null, + leveled_codec:ledger_key() | null, non_neg_integer(), - pos_integer()|infinity, + pos_integer() | infinity, block_method(), boolean(), non_neg_integer(), - build_timings()) -> - {list(maybe_expanded_pointer()), list(maybe_expanded_pointer()), - list(binary_slot()), leveled_codec:ledger_key()|null, - non_neg_integer()}. + build_timings() +) -> + { + list(maybe_expanded_pointer()), + list(maybe_expanded_pointer()), + list(binary_slot()), + leveled_codec:ledger_key() | null, + non_neg_integer() + }. -merge_lists(KVL1, KVL2, LI, SlotList, FirstKey, MaxSlots, MaxSlots, - _BlockMethod, _IdxModDate, CountOfTombs, T0) -> +merge_lists( + KVL1, + KVL2, + LI, + SlotList, + FirstKey, + MaxSlots, + MaxSlots, + _BlockMethod, + _IdxModDate, + CountOfTombs, + T0 +) -> % This SST file is full, move to complete file, and return the % remainder log_buildtimings(T0, LI), {KVL1, KVL2, lists:reverse(SlotList), FirstKey, CountOfTombs}; -merge_lists([], [], LI, SlotList, FirstKey, _SlotCount, _MaxSlots, - _BlockMethod, _IdxModDate, CountOfTombs, T0) -> +merge_lists( + [], + [], + LI, + SlotList, + FirstKey, + _SlotCount, + _MaxSlots, + _BlockMethod, + _IdxModDate, + CountOfTombs, + T0 +) -> % the source files are empty, complete the file log_buildtimings(T0, LI), {[], [], lists:reverse(SlotList), FirstKey, CountOfTombs}; -merge_lists(KVL1, KVL2, LI, SlotList, FirstKey, SlotCount, MaxSlots, - BlockMethod, IdxModDate, CountOfTombs, T0) -> +merge_lists( + KVL1, + KVL2, + LI, + SlotList, + FirstKey, + SlotCount, + MaxSlots, + BlockMethod, + IdxModDate, + CountOfTombs, + T0 +) -> % Form a slot by merging the two lists until the next 128 K/V pairs have % been determined {KVRem1, KVRem2, Slot, FK0} = @@ -3176,33 +3476,38 @@ merge_lists(KVL1, KVL2, LI, SlotList, FirstKey, SlotCount, MaxSlots, % metadata {SlotD, T2} = generate_binary_slot( - Lookup, {reverse, KVL}, BlockMethod, IdxModDate, T1), + Lookup, {reverse, KVL}, BlockMethod, IdxModDate, T1 + ), merge_lists( KVRem1, - KVRem2, - LI, - [SlotD|SlotList], - FK0, - SlotCount + 1, - MaxSlots, - BlockMethod, - IdxModDate, - leveled_codec:count_tombs(KVL, CountOfTombs), - T2 - ) + KVRem2, + LI, + [SlotD | SlotList], + FK0, + SlotCount + 1, + MaxSlots, + BlockMethod, + IdxModDate, + leveled_codec:count_tombs(KVL, CountOfTombs), + T2 + ) end. -spec form_slot( list(maybe_expanded_pointer()), list(maybe_expanded_pointer()), {boolean(), non_neg_integer()}, - lookup|no_lookup, + lookup | no_lookup, non_neg_integer(), list(leveled_codec:ledger_kv()), - leveled_codec:ledger_key()|null) -> - {list(maybe_expanded_pointer()), list(maybe_expanded_pointer()), - {lookup|no_lookup, list(leveled_codec:ledger_kv())}, - leveled_codec:ledger_key()|null}. + leveled_codec:ledger_key() | null +) -> + { + list(maybe_expanded_pointer()), + list(maybe_expanded_pointer()), + {lookup | no_lookup, list(leveled_codec:ledger_kv())}, + leveled_codec:ledger_key() | null + }. %% @doc %% Merge together Key Value lists to provide a reverse-ordered slot of KVs form_slot([], [], _LI, Type, _Size, Slot, FK) -> @@ -3215,14 +3520,19 @@ form_slot(KVList1, KVList2, LevelInfo, lookup, Size, Slot, FK) -> case key_dominates(KVList1, KVList2, LevelInfo) of {{next_key, TopKV}, Rem1, Rem2} -> form_slot( - Rem1, Rem2, LevelInfo, lookup, Size + 1, [TopKV|Slot], FK); + Rem1, Rem2, LevelInfo, lookup, Size + 1, [TopKV | Slot], FK + ); {skipped_key, Rem1, Rem2} -> form_slot(Rem1, Rem2, LevelInfo, lookup, Size, Slot, FK) end; form_slot(KVList1, KVList2, LevelInfo, no_lookup, Size, Slot, FK) -> case key_dominates(KVList1, KVList2, LevelInfo) of {{next_key, {TopK, TopV}}, Rem1, Rem2} -> - FK0 = case FK of null -> TopK; _ -> FK end, + FK0 = + case FK of + null -> TopK; + _ -> FK + end, case leveled_codec:to_lookup(TopK) of no_lookup -> form_slot( @@ -3231,8 +3541,9 @@ form_slot(KVList1, KVList2, LevelInfo, no_lookup, Size, Slot, FK) -> LevelInfo, no_lookup, Size + 1, - [{TopK, TopV}|Slot], - FK0); + [{TopK, TopV} | Slot], + FK0 + ); lookup -> case Size >= ?LOOK_SLOTSIZE of true when FK =/= null -> @@ -3244,8 +3555,9 @@ form_slot(KVList1, KVList2, LevelInfo, no_lookup, Size, Slot, FK) -> LevelInfo, lookup, Size + 1, - [{TopK, TopV}|Slot], - FK0) + [{TopK, TopV} | Slot], + FK0 + ) end end; {skipped_key, Rem1, Rem2} -> @@ -3253,52 +3565,72 @@ form_slot(KVList1, KVList2, LevelInfo, no_lookup, Size, Slot, FK) -> end. -spec key_dominates( + list(maybe_expanded_pointer()), + list(maybe_expanded_pointer()), + {boolean(), leveled_pmanifest:lsm_level()} +) -> + { + {next_key, leveled_codec:ledger_kv()} | skipped_key, list(maybe_expanded_pointer()), - list(maybe_expanded_pointer()), - {boolean(), leveled_pmanifest:lsm_level()}) - -> - {{next_key, leveled_codec:ledger_kv()}|skipped_key, - list(maybe_expanded_pointer()), - list(maybe_expanded_pointer())}. -key_dominates([{pointer, SSTPid, Slot, StartKey, all}|T1], KL2, Level) -> + list(maybe_expanded_pointer()) + }. +key_dominates([{pointer, SSTPid, Slot, StartKey, all} | T1], KL2, Level) -> key_dominates( expand_list_by_pointer( + {pointer, SSTPid, Slot, StartKey, all}, % As the head is a pointer, the tail must be pointers too % So eqwalizer is wrong that this may be % [leveled_codec:ledger_kv()] % eqwalizer:ignore - {pointer, SSTPid, Slot, StartKey, all}, T1, ?MERGE_SCANWIDTH), + T1, + ?MERGE_SCANWIDTH + ), KL2, - Level); -key_dominates([{next, ManEntry, StartKey}|T1], KL2, Level) -> + Level + ); +key_dominates([{next, ManEntry, StartKey} | T1], KL2, Level) -> key_dominates( expand_list_by_pointer( + {next, ManEntry, StartKey, all}, % See above % eqwalizer:ignore - {next, ManEntry, StartKey, all}, T1, ?MERGE_SCANWIDTH), + T1, + ?MERGE_SCANWIDTH + ), KL2, - Level); -key_dominates(KL1, [{pointer, SSTPid, Slot, StartKey, all}|T2], Level) -> + Level + ); +key_dominates(KL1, [{pointer, SSTPid, Slot, StartKey, all} | T2], Level) -> key_dominates( KL1, expand_list_by_pointer( + {pointer, SSTPid, Slot, StartKey, all}, % See above % eqwalizer:ignore - {pointer, SSTPid, Slot, StartKey, all}, T2, ?MERGE_SCANWIDTH), - Level); -key_dominates(KL1, [{next, ManEntry, StartKey}|T2], Level) -> + T2, + ?MERGE_SCANWIDTH + ), + Level + ); +key_dominates(KL1, [{next, ManEntry, StartKey} | T2], Level) -> key_dominates( KL1, expand_list_by_pointer( + {next, ManEntry, StartKey, all}, % See above % eqwalizer:ignore - {next, ManEntry, StartKey, all}, T2, ?MERGE_SCANWIDTH), - Level); + T2, + ?MERGE_SCANWIDTH + ), + Level + ); key_dominates( - [{K1, _V1}|_T1]=Rest1, [{K2, V2}|Rest2], {false, _TS}) when K2 < K1 -> + [{K1, _V1} | _T1] = Rest1, [{K2, V2} | Rest2], {false, _TS} +) when K2 < K1 -> {{next_key, {K2, V2}}, Rest1, Rest2}; key_dominates( - [{K1, V1}|Rest1], [{K2, _V2}|_T2]=Rest2, {false, _TS}) when K1 < K2 -> + [{K1, V1} | Rest1], [{K2, _V2} | _T2] = Rest2, {false, _TS} +) when K1 < K2 -> {{next_key, {K1, V1}}, Rest1, Rest2}; key_dominates(KL1, KL2, Level) -> case key_dominates_comparison(KL1, KL2) of @@ -3314,30 +3646,35 @@ key_dominates(KL1, KL2, Level) -> end. -spec key_dominates_comparison( + list(maybe_expanded_pointer()), + list(maybe_expanded_pointer()) +) -> + % first item in each list must be leveled_codec:ledger_kv() + { + {next_key, leveled_codec:ledger_kv()} | skipped_key, list(maybe_expanded_pointer()), - list(maybe_expanded_pointer())) - % first item in each list must be leveled_codec:ledger_kv() - -> {{next_key, leveled_codec:ledger_kv()}|skipped_key, - list(maybe_expanded_pointer()), - list(maybe_expanded_pointer())}. -key_dominates_comparison([{K1, V1}|T1], []) -> + list(maybe_expanded_pointer()) + }. +key_dominates_comparison([{K1, V1} | T1], []) -> {{next_key, {K1, V1}}, T1, []}; -key_dominates_comparison([], [{K2, V2}|T2]) -> +key_dominates_comparison([], [{K2, V2} | T2]) -> {{next_key, {K2, V2}}, [], T2}; -key_dominates_comparison([{K1, _V1}|_T1]=LHL, [{K2, V2}|T2]) when K2 < K1 -> +key_dominates_comparison([{K1, _V1} | _T1] = LHL, [{K2, V2} | T2]) when + K2 < K1 +-> {{next_key, {K2, V2}}, LHL, T2}; -key_dominates_comparison([{K1, V1}|T1], [{K2, _V2}|_T2]=RHL) when K1 < K2 -> +key_dominates_comparison([{K1, V1} | T1], [{K2, _V2} | _T2] = RHL) when + K1 < K2 +-> {{next_key, {K1, V1}}, T1, RHL}; -key_dominates_comparison([{K1, V1}|T1], [{K2, V2}|T2]) -> +key_dominates_comparison([{K1, V1} | T1], [{K2, V2} | T2]) -> case leveled_codec:key_dominates({K1, V1}, {K2, V2}) of true -> - {skipped_key, [{K1, V1}|T1], T2}; + {skipped_key, [{K1, V1} | T1], T2}; false -> - {skipped_key, T1, [{K2, V2}|T2]} + {skipped_key, T1, [{K2, V2} | T2]} end. - - %%%============================================================================ %%% Timing Functions %%%============================================================================ @@ -3381,20 +3718,27 @@ log_buildtimings(no_timing, _LI) -> log_buildtimings(Timings, LI) -> leveled_log:log( sst13, - [Timings#build_timings.fold_toslot, + [ + Timings#build_timings.fold_toslot, Timings#build_timings.slot_hashlist, Timings#build_timings.slot_serialise, Timings#build_timings.slot_finish, - element(1, LI), element(2, LI)]). + element(1, LI), + element(2, LI) + ] + ). -spec maybelog_fetch_timing( - leveled_monitor:monitor(), - leveled_pmanifest:lsm_level(), - leveled_monitor:sst_fetch_type(), - erlang:timestamp()|no_timing) -> ok. + leveled_monitor:monitor(), + leveled_pmanifest:lsm_level(), + leveled_monitor:sst_fetch_type(), + erlang:timestamp() | no_timing +) -> ok. maybelog_fetch_timing(_Monitor, _Level, _Type, no_timing) -> ok; -maybelog_fetch_timing({Pid, _SlotFreq}, Level, Type, SW) when is_pid(Pid), SW =/= no_timing -> +maybelog_fetch_timing({Pid, _SlotFreq}, Level, Type, SW) when + is_pid(Pid), SW =/= no_timing +-> TS1 = case leveled_monitor:step_time(SW) of {TS, _NextSW} when is_integer(TS) -> TS @@ -3412,7 +3756,8 @@ maybelog_fetch_timing({Pid, _SlotFreq}, Level, Type, SW) when is_pid(Pid), SW =/ -define(TEST_AREA, "test/test_area/"). binaryslot_trimmed( - FullBin, StartKey, EndKey, BlockMethod, IdxModDate, SegmentChecker) -> + FullBin, StartKey, EndKey, BlockMethod, IdxModDate, SegmentChecker +) -> binaryslot_trimmed( FullBin, StartKey, EndKey, BlockMethod, IdxModDate, SegmentChecker, [] ). @@ -3420,61 +3765,82 @@ binaryslot_trimmed( binaryslot_tolist(FullBin, BlockMethod, IdxModDate) -> binaryslot_tolist(FullBin, BlockMethod, IdxModDate, []). - sst_getkvrange(Pid, StartKey, EndKey, ScanWidth) -> sst_getkvrange(Pid, StartKey, EndKey, ScanWidth, false, 0). -spec sst_getkvrange( - pid(), - range_endpoint(), - range_endpoint(), - integer(), - segment_check_fun(), - non_neg_integer()) -> list(maybe_expanded_pointer()). + pid(), + range_endpoint(), + range_endpoint(), + integer(), + segment_check_fun(), + non_neg_integer() +) -> list(maybe_expanded_pointer()). %% @doc %% Get a range of {Key, Value} pairs as a list between StartKey and EndKey %% (inclusive). The ScanWidth is the maximum size of the range, a pointer %% will be placed on the tail of the resulting list if results expand beyond %% the Scan Width sst_getkvrange(Pid, StartKey, EndKey, ScanWidth, SegChecker, LowLastMod) -> - [Pointer|MorePointers] = + [Pointer | MorePointers] = sst_getfilteredrange(Pid, StartKey, EndKey, LowLastMod), sst_expandpointer( - Pointer, MorePointers, ScanWidth, SegChecker, LowLastMod). + Pointer, MorePointers, ScanWidth, SegChecker, LowLastMod + ). -spec sst_getslots( - pid(), list(slot_pointer())) -> list(leveled_codec:ledger_kv()). + pid(), list(slot_pointer()) +) -> list(leveled_codec:ledger_kv()). %% @doc %% Get a list of slots by their ID. The slot will be converted from the binary -%% to term form outside of the FSM loop, this is to stop the copying of the +%% to term form outside of the FSM loop, this is to stop the copying of the %% converted term to the calling process. sst_getslots(Pid, SlotList) -> sst_getfilteredslots(Pid, SlotList, false, 0, []). testsst_new( - RootPath, Filename, Level, KVList, MaxSQN, {BV, PM}) -> + RootPath, Filename, Level, KVList, MaxSQN, {BV, PM} +) -> OptsSST = #sst_options{ - press_method=PM, - block_version=BV, - log_options=leveled_log:get_opts() + press_method = PM, + block_version = BV, + log_options = leveled_log:get_opts() }, sst_new(RootPath, Filename, Level, KVList, MaxSQN, OptsSST, false). -testsst_new(RootPath, Filename, - KVL1, KVL2, IsBasement, Level, MaxSQN, {BV, PM}) -> +testsst_new( + RootPath, + Filename, + KVL1, + KVL2, + IsBasement, + Level, + MaxSQN, + {BV, PM} +) -> OptsSST = #sst_options{ - press_method=PM, - block_version=BV, - log_options=leveled_log:get_opts() + press_method = PM, + block_version = BV, + log_options = leveled_log:get_opts() }, - sst_newmerge(RootPath, Filename, KVL1, KVL2, IsBasement, Level, MaxSQN, - OptsSST, false). + sst_newmerge( + RootPath, + Filename, + KVL1, + KVL2, + IsBasement, + Level, + MaxSQN, + OptsSST, + false + ). generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) -> generate_randomkeys( - Seqn, Count, [], BucketRangeLow, BucketRangeHigh). + Seqn, Count, [], BucketRangeLow, BucketRangeHigh + ). generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) -> Acc; @@ -3497,11 +3863,13 @@ generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) -> ?assertMatch(undefined, element(3, MD)), MD0 = [{magic_md, [<<0:32/integer>>, base64:encode(Chunk)]}], MV0 = setelement(4, MV, setelement(3, MD, MD0)), - generate_randomkeys(Seqn + 1, - Count - 1, - [{LK, MV0}|Acc], - BucketLow, - BRange). + generate_randomkeys( + Seqn + 1, + Count - 1, + [{LK, MV0} | Acc], + BucketLow, + BRange + ). generate_indexkeys(Count) -> generate_indexkeys(Count, []). @@ -3604,16 +3972,17 @@ tombcount_tester(Level) -> {RP, Filename} = {?TEST_AREA, "tombcount_test"}, OptsSST = #sst_options{ - press_method=native, log_options=leveled_log:get_opts()}, + press_method = native, log_options = leveled_log:get_opts() + }, {ok, SST2, _KD1, _BB1} = sst_newmerge( - RP, Filename, KVL1, KVL2, false, Level, N, OptsSST, true), + RP, Filename, KVL1, KVL2, false, Level, N, OptsSST, true + ), ?assertMatch(ExpectedCount, sst_gettombcount(SST2)), ok = sst_close(SST2), ok = file:delete(filename:join(RP, Filename ++ ".sst")). - form_slot_test() -> % If a skip key happens, mustn't switch to loookup by accident as could be % over the expected size @@ -3623,14 +3992,21 @@ form_slot_test() -> {9999, tomb, {1234568, 1234567}, {}} }, Slot = - [{{o, <<"B1">>, <<"K5">>, null}, - {5, {active, infinity}, {99234568, 99234567}, {}}}], - R1 = form_slot([SkippingKV], [], - {true, 99999999}, - no_lookup, - ?LOOK_SLOTSIZE + 1, - Slot, - {o, <<"B1">>, <<"K5">>, null}), + [ + { + {o, <<"B1">>, <<"K5">>, null}, + {5, {active, infinity}, {99234568, 99234567}, {}} + } + ], + R1 = form_slot( + [SkippingKV], + [], + {true, 99999999}, + no_lookup, + ?LOOK_SLOTSIZE + 1, + Slot, + {o, <<"B1">>, <<"K5">>, null} + ), ?assertMatch( {[], [], {no_lookup, Slot}, {o, <<"B1">>, <<"K5">>, null}}, R1 @@ -3639,21 +4015,40 @@ form_slot_test() -> merge_tombstonelist_test() -> % Merge lists with nothing but tombstones, and file at basement level SkippingKV1 = - {{o, <<"B1">>, <<"K9995">>, null}, {9995, tomb, {1234568, 1234567}, {}}}, + { + {o, <<"B1">>, <<"K9995">>, null}, + {9995, tomb, {1234568, 1234567}, {}} + }, SkippingKV2 = - {{o, <<"B1">>, <<"K9996">>, null}, {9996, tomb, {1234568, 1234567}, {}}}, + { + {o, <<"B1">>, <<"K9996">>, null}, + {9996, tomb, {1234568, 1234567}, {}} + }, SkippingKV3 = - {{o, <<"B1">>, <<"K9997">>, null}, {9997, tomb, {1234568, 1234567}, {}}}, + { + {o, <<"B1">>, <<"K9997">>, null}, + {9997, tomb, {1234568, 1234567}, {}} + }, SkippingKV4 = - {{o, <<"B1">>, <<"K9998">>, null}, {9998, tomb, {1234568, 1234567}, {}}}, + { + {o, <<"B1">>, <<"K9998">>, null}, + {9998, tomb, {1234568, 1234567}, {}} + }, SkippingKV5 = - {{o, <<"B1">>, <<"K9999">>, null}, {9999, tomb, {1234568, 1234567}, {}}}, - R = merge_lists([SkippingKV1, SkippingKV3, SkippingKV5], - [SkippingKV2, SkippingKV4], - {true, 9999999}, - #sst_options{press_method = native, - max_sstslots = 256}, - ?INDEX_MODDATE), + { + {o, <<"B1">>, <<"K9999">>, null}, + {9999, tomb, {1234568, 1234567}, {}} + }, + R = merge_lists( + [SkippingKV1, SkippingKV3, SkippingKV5], + [SkippingKV2, SkippingKV4], + {true, 9999999}, + #sst_options{ + press_method = native, + max_sstslots = 256 + }, + ?INDEX_MODDATE + ), ?assertMatch({[], [], [], null, 0}, R). @@ -3667,10 +4062,13 @@ indexed_list_test() -> {{_PosBinIndex1, FullBin, _HL, _LK}, no_timing} = generate_binary_slot( - lookup, {forward, KVL1}, {0, native}, ?INDEX_MODDATE, no_timing), - io:format(user, - "Indexed list created slot in ~w microseconds of size ~w~n", - [timer:now_diff(os:timestamp(), SW0), byte_size(FullBin)]), + lookup, {forward, KVL1}, {0, native}, ?INDEX_MODDATE, no_timing + ), + io:format( + user, + "Indexed list created slot in ~w microseconds of size ~w~n", + [timer:now_diff(os:timestamp(), SW0), byte_size(FullBin)] + ), {TestK1, TestV1} = lists:nth(20, KVL1), MH1 = leveled_codec:segment_hash(TestK1), @@ -3689,7 +4087,6 @@ indexed_list_test() -> test_binary_slot(FullBin, TestK4, MH4, {TestK4, TestV4}), test_binary_slot(FullBin, TestK5, MH5, {TestK5, TestV5}). - indexed_list_mixedkeys_test() -> KVL0 = lists:ukeysort(1, generate_randomkeys(1, 50, 1, 4)), KVL1 = lists:sublist(KVL0, 33), @@ -3697,7 +4094,8 @@ indexed_list_mixedkeys_test() -> {{_PosBinIndex1, FullBin, _HL, _LK}, no_timing} = generate_binary_slot( - lookup, {forward, Keys}, {0, native}, ?INDEX_MODDATE, no_timing), + lookup, {forward, Keys}, {0, native}, ?INDEX_MODDATE, no_timing + ), {TestK1, TestV1} = lists:nth(4, KVL1), MH1 = leveled_codec:segment_hash(TestK1), @@ -3725,28 +4123,39 @@ indexed_list_mixedkeys2_test() -> Keys = IdxKeys1 ++ KVL1 ++ IdxKeys2, {{_Header, FullBin, _HL, _LK}, no_timing} = generate_binary_slot( - lookup, {forward, Keys}, {0, native}, ?INDEX_MODDATE, no_timing), - lists:foreach(fun({K, V}) -> - MH = leveled_codec:segment_hash(K), - test_binary_slot(FullBin, K, MH, {K, V}) - end, - KVL1). + lookup, {forward, Keys}, {0, native}, ?INDEX_MODDATE, no_timing + ), + lists:foreach( + fun({K, V}) -> + MH = leveled_codec:segment_hash(K), + test_binary_slot(FullBin, K, MH, {K, V}) + end, + KVL1 + ). indexed_list_allindexkeys_test() -> - Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)), - ?LOOK_SLOTSIZE), + Keys = lists:sublist( + lists:ukeysort(1, generate_indexkeys(150)), + ?LOOK_SLOTSIZE + ), {{HeaderT, FullBinT, HL, LK}, no_timing} = generate_binary_slot( - lookup, {forward, Keys}, {0, native}, true, no_timing), + lookup, {forward, Keys}, {0, native}, true, no_timing + ), {{HeaderF, FullBinF, HL, LK}, no_timing} = generate_binary_slot( - lookup, {forward, Keys}, {0, native}, false, no_timing), + lookup, {forward, Keys}, {0, native}, false, no_timing + ), EmptySlotSize = ?LOOK_SLOTSIZE - 1, LMD = ?FLIPPER32, - ?assertMatch(<<_BL:20/binary, LMD:32/integer, EmptySlotSize:8/integer>>, - HeaderT), - ?assertMatch(<<_BL:20/binary, EmptySlotSize:8/integer>>, - HeaderF), + ?assertMatch( + <<_BL:20/binary, LMD:32/integer, EmptySlotSize:8/integer>>, + HeaderT + ), + ?assertMatch( + <<_BL:20/binary, EmptySlotSize:8/integer>>, + HeaderF + ), % SW = os:timestamp(), BinToListT = binaryslot_tolist(FullBinT, {0, native}, true), BinToListF = binaryslot_tolist(FullBinF, {0, native}, false), @@ -3758,19 +4167,26 @@ indexed_list_allindexkeys_test() -> ?assertMatch( {Keys, none}, binaryslot_trimmed( - FullBinT, all, all, {0, native}, true, false)), + FullBinT, all, all, {0, native}, true, false + ) + ), ?assertMatch(Keys, BinToListF), ?assertMatch( {Keys, none}, binaryslot_trimmed( - FullBinF, all, all, {0, native}, false, false)). + FullBinF, all, all, {0, native}, false, false + ) + ). indexed_list_allindexkeys_nolookup_test() -> - Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(1000)), - ?NOLOOK_SLOTSIZE), + Keys = lists:sublist( + lists:ukeysort(1, generate_indexkeys(1000)), + ?NOLOOK_SLOTSIZE + ), {{Header, FullBin, _HL, _LK}, no_timing} = generate_binary_slot( - no_lookup, {forward, Keys}, {0, native}, ?INDEX_MODDATE,no_timing), + no_lookup, {forward, Keys}, {0, native}, ?INDEX_MODDATE, no_timing + ), ?assertMatch(<<_BL:20/binary, _LMD:32/integer, 127:8/integer>>, Header), % SW = os:timestamp(), BinToList = @@ -3781,34 +4197,44 @@ indexed_list_allindexkeys_nolookup_test() -> ?assertMatch(Keys, BinToList), ?assertMatch( {Keys, none}, - binaryslot_trimmed(FullBin, all, all, {0, native}, ?INDEX_MODDATE, false)). + binaryslot_trimmed( + FullBin, all, all, {0, native}, ?INDEX_MODDATE, false + ) + ). indexed_list_allindexkeys_trimmed_test() -> - Keys = lists:sublist(lists:ukeysort(1, generate_indexkeys(150)), - ?LOOK_SLOTSIZE), + Keys = lists:sublist( + lists:ukeysort(1, generate_indexkeys(150)), + ?LOOK_SLOTSIZE + ), {{Header, FullBin, _HL, _LK}, no_timing} = generate_binary_slot( - lookup, {forward, Keys}, {0, native}, ?INDEX_MODDATE, no_timing), + lookup, {forward, Keys}, {0, native}, ?INDEX_MODDATE, no_timing + ), EmptySlotSize = ?LOOK_SLOTSIZE - 1, ?assertMatch( <<_BL:20/binary, _LMD:32/integer, EmptySlotSize:8/integer>>, - Header), + Header + ), ?assertMatch( - {Keys, none}, + {Keys, none}, binaryslot_trimmed( FullBin, {i, "Bucket", {"t1_int", 0}, null}, {i, "Bucket", {"t1_int", 99999}, null}, {0, native}, ?INDEX_MODDATE, - false)), + false + ) + ), {SK1, _} = lists:nth(10, Keys), {EK1, _} = lists:nth(100, Keys), R1 = lists:sublist(Keys, 10, 91), {O1, none} = binaryslot_trimmed( - FullBin, SK1, EK1, {0, native}, ?INDEX_MODDATE, false), + FullBin, SK1, EK1, {0, native}, ?INDEX_MODDATE, false + ), ?assertMatch(91, length(O1)), ?assertMatch(R1, O1), @@ -3816,7 +4242,9 @@ indexed_list_allindexkeys_trimmed_test() -> {EK2, _} = lists:nth(20, Keys), R2 = lists:sublist(Keys, 10, 11), {O2, none} = - binaryslot_trimmed(FullBin, SK2, EK2, {0, native}, ?INDEX_MODDATE, false), + binaryslot_trimmed( + FullBin, SK2, EK2, {0, native}, ?INDEX_MODDATE, false + ), ?assertMatch(11, length(O2)), ?assertMatch(R2, O2), @@ -3824,11 +4252,12 @@ indexed_list_allindexkeys_trimmed_test() -> {EK3, _} = lists:nth(?LOOK_SLOTSIZE, Keys), R3 = lists:sublist(Keys, ?LOOK_SLOTSIZE - 1, 2), {O3, none} = - binaryslot_trimmed(FullBin, SK3, EK3, {0, native}, ?INDEX_MODDATE, false), + binaryslot_trimmed( + FullBin, SK3, EK3, {0, native}, ?INDEX_MODDATE, false + ), ?assertMatch(2, length(O3)), ?assertMatch(R3, O3). - findposfrag_test() -> ?assertMatch([], find_pos(<<128:8/integer>>, segment_checker(1))). @@ -3838,31 +4267,33 @@ indexed_list_mixedkeys_bitflip_test() -> Keys = lists:ukeysort(1, generate_indexkeys(60) ++ KVL1), {{Header, SlotBin, _HL, LK}, no_timing} = generate_binary_slot( - lookup, {forward, Keys}, {0, native}, ?INDEX_MODDATE, no_timing), + lookup, {forward, Keys}, {0, native}, ?INDEX_MODDATE, no_timing + ), ?assertMatch(LK, element(1, lists:last(Keys))), - <> = Header, + <> = Header, TestKey1 = element(1, lists:nth(1, KVL1)), TestKey2 = element(1, lists:nth(33, KVL1)), MH1 = leveled_codec:segment_hash(TestKey1), MH2 = leveled_codec:segment_hash(TestKey2), - + test_binary_slot(SlotBin, TestKey1, MH1, lists:nth(1, KVL1)), test_binary_slot(SlotBin, TestKey2, MH2, lists:nth(33, KVL1)), ToList = binaryslot_tolist(SlotBin, {0, native}, ?INDEX_MODDATE), ?assertMatch(Keys, ToList), - EH1 = case extract_hash(MH1) of Int1 when is_integer(Int1) -> Int1 end, - EH2 = case extract_hash(MH2) of Int2 when is_integer(Int2) -> Int2 end, + EH1 = + case extract_hash(MH1) of + Int1 when is_integer(Int1) -> Int1 + end, + EH2 = + case extract_hash(MH2) of + Int2 when is_integer(Int2) -> Int2 + end, [Pos1] = find_pos(PosBin, segment_checker(EH1)), [Pos2] = find_pos(PosBin, segment_checker(EH2)), {BN1, _BP1} = revert_position(Pos1), @@ -3896,7 +4327,9 @@ indexed_list_mixedkeys_bitflip_test() -> {SK1, _} = lists:nth(10, Keys), {EK1, _} = lists:nth(20, Keys), {O1, none} = - binaryslot_trimmed(SlotBin3, SK1, EK1, {0, native}, ?INDEX_MODDATE, false), + binaryslot_trimmed( + SlotBin3, SK1, EK1, {0, native}, ?INDEX_MODDATE, false + ), ?assertMatch([], O1), SlotBin4 = flip_byte(SlotBin, 0, 20), @@ -3911,13 +4344,16 @@ indexed_list_mixedkeys_bitflip_test() -> ?assertMatch([], ToList4), ?assertMatch([], ToList5), {O4, none} = - binaryslot_trimmed(SlotBin4, SK1, EK1, {0, native}, ?INDEX_MODDATE, false), + binaryslot_trimmed( + SlotBin4, SK1, EK1, {0, native}, ?INDEX_MODDATE, false + ), {O5, none} = - binaryslot_trimmed(SlotBin4, SK1, EK1, {0, native}, ?INDEX_MODDATE, false), + binaryslot_trimmed( + SlotBin4, SK1, EK1, {0, native}, ?INDEX_MODDATE, false + ), ?assertMatch([], O4), ?assertMatch([], O5). - flip_byte(Binary, Offset, Length) -> Byte1 = rand:uniform(Length) + Offset - 1, <> = Binary, @@ -3928,14 +4364,13 @@ flip_byte(Binary, Offset, Length) -> <> end. - test_binary_slot(FullBin, Key, Hash, ExpectedValue) -> % SW = os:timestamp(), {ReturnedValue, _Header} = binaryslot_get(FullBin, Key, Hash, {0, native}, ?INDEX_MODDATE), ?assertMatch(ExpectedValue, ReturnedValue). - % io:format(user, "Fetch success in ~w microseconds ~n", - % [timer:now_diff(os:timestamp(), SW)]). +% io:format(user, "Fetch success in ~w microseconds ~n", +% [timer:now_diff(os:timestamp(), SW)]). doublesize_test_() -> {timeout, 300, fun doublesize_tester/0}. @@ -3947,9 +4382,9 @@ doublesize_tester() -> fun({K, V}, {L1, L2}) -> case length(L1) > length(L2) of true -> - {L1, [{K, V}|L2]}; + {L1, [{K, V} | L2]}; _ -> - {[{K, V}|L1], L2} + {[{K, V} | L1], L2} end end, {KVL1, KVL2} = lists:foldr(SplitFun, {[], []}, Contents), @@ -3966,7 +4401,9 @@ size_tester(KVL1, KVL2, N) -> {RP, Filename} = {?TEST_AREA, "doublesize_test"}, Opts = - #sst_options{press_method=native, log_options=leveled_log:get_opts()}, + #sst_options{ + press_method = native, log_options = leveled_log:get_opts() + }, {ok, SST1, _KD, _BB} = sst_newmerge( RP, Filename, KVL1, KVL2, false, ?DOUBLESIZE_LEVEL, N, Opts, false @@ -3988,8 +4425,12 @@ size_tester(KVL1, KVL2, N) -> end, KBIn = length(lists:filter(CheckBloomFun, KVL1 ++ KVL2)), KBOut = - length(lists:filter(CheckBloomFun, - generate_randomkeys(1, 1000, 7, 9))), + length( + lists:filter( + CheckBloomFun, + generate_randomkeys(1, 1000, 7, 9) + ) + ), ?assertMatch(N, KBIn), @@ -3998,12 +4439,10 @@ size_tester(KVL1, KVL2, N) -> ok = sst_close(SST2), ok = file:delete(filename:join(RP, Filename ++ ".sst")). - merge_test() -> filelib:ensure_dir(?TEST_AREA), merge_tester(fun testsst_new/6, fun testsst_new/8). - merge_tester(NewFunS, NewFunM) -> N = 3000, KVL1 = lists:ukeysort(1, generate_randomkeys(N + 1, N, 1, 20)), @@ -4025,37 +4464,49 @@ merge_tester(NewFunS, NewFunM) -> DSK = {o, <<"B">>, <<"SK">>, null}, DEK = {o, <<"E">>, <<"EK">>, null}, ML1 = - [{ - next, - leveled_pmanifest:new_entry(DSK, DEK, P1, "P1", none), - FK1 - }], + [ + { + next, + leveled_pmanifest:new_entry(DSK, DEK, P1, "P1", none), + FK1 + } + ], ML2 = - [{ - next, - leveled_pmanifest:new_entry(DSK, DEK, P2, "P2", none), - FK2 - }], + [ + { + next, + leveled_pmanifest:new_entry(DSK, DEK, P2, "P2", none), + FK2 + } + ], NewR = - NewFunM(?TEST_AREA, "level2_merge", ML1, ML2, false, 2, N * 2, {0, native}), + NewFunM( + ?TEST_AREA, "level2_merge", ML1, ML2, false, 2, N * 2, {0, native} + ), {ok, P3, {{Rem1, Rem2}, FK3, LK3}, _Bloom3} = NewR, ?assertMatch([], Rem1), ?assertMatch([], Rem2), ?assertMatch(true, FK3 == min(FK1, FK2)), io:format("LK1 ~w LK2 ~w LK3 ~w~n", [LK1, LK2, LK3]), ?assertMatch(true, LK3 == max(LK1, LK2)), - io:format(user, - "Created and merged two files of size ~w in ~w microseconds~n", - [N, timer:now_diff(os:timestamp(), SW0)]), + io:format( + user, + "Created and merged two files of size ~w in ~w microseconds~n", + [N, timer:now_diff(os:timestamp(), SW0)] + ), SW1 = os:timestamp(), - lists:foreach(fun({K, V}) -> - ?assertMatch({K, V}, sst_get(P3, K)) - end, - KVL3), - io:format(user, - "Checked presence of all ~w objects in ~w microseconds~n", - [length(KVL3), timer:now_diff(os:timestamp(), SW1)]), + lists:foreach( + fun({K, V}) -> + ?assertMatch({K, V}, sst_get(P3, K)) + end, + KVL3 + ), + io:format( + user, + "Checked presence of all ~w objects in ~w microseconds~n", + [length(KVL3), timer:now_diff(os:timestamp(), SW1)] + ), ok = sst_close(P1), ok = sst_close(P2), @@ -4064,7 +4515,6 @@ merge_tester(NewFunS, NewFunM) -> ok = file:delete(?TEST_AREA ++ "/level2_src.sst"), ok = file:delete(?TEST_AREA ++ "/level2_merge.sst"). - simple_persisted_range_test() -> simple_persisted_range_tester(fun testsst_new/6). @@ -4072,7 +4522,7 @@ simple_persisted_range_tester(SSTNewFun) -> {RP, Filename} = {?TEST_AREA, "simple_test"}, KVList0 = generate_randomkeys(1, ?LOOK_SLOTSIZE * 16, 1, 20), KVList1 = lists:ukeysort(1, KVList0), - [{FirstKey, _FV}|_Rest] = KVList1, + [{FirstKey, _FV} | _Rest] = KVList1, {LastKey, _LV} = lists:last(KVList1), {ok, Pid, {FirstKey, LastKey}, _Bloom} = SSTNewFun(RP, Filename, 1, KVList1, length(KVList1), {0, native}), @@ -4106,7 +4556,6 @@ simple_persisted_range_tester(SSTNewFun) -> TL5 = lists:map(fun(EK) -> {SK5, EK} end, [EK2, EK3, EK4, EK5]), lists:foreach(TestFun, TL2 ++ TL3 ++ TL4 ++ TL5). - simple_persisted_rangesegfilter_test() -> simple_persisted_rangesegfilter_tester(fun testsst_new/6). @@ -4114,7 +4563,7 @@ simple_persisted_rangesegfilter_tester(SSTNewFun) -> {RP, Filename} = {?TEST_AREA, "range_segfilter_test"}, KVList0 = generate_randomkeys(1, ?LOOK_SLOTSIZE * 16, 1, 20), KVList1 = lists:ukeysort(1, KVList0), - [{FirstKey, _FV}|_Rest] = KVList1, + [{FirstKey, _FV} | _Rest] = KVList1, {LastKey, _LV} = lists:last(KVList1), {ok, Pid, {FirstKey, LastKey}, _Bloom} = SSTNewFun(RP, Filename, 1, KVList1, length(KVList1), {0, native}), @@ -4143,8 +4592,10 @@ simple_persisted_rangesegfilter_tester(SSTNewFun) -> end end, SegList = - lists:map(GetSegFun, - [SK1, SK2, SK3, SK4, SK5, EK1, EK2, EK3, EK4, EK5]), + lists:map( + GetSegFun, + [SK1, SK2, SK3, SK4, SK5, EK1, EK2, EK3, EK4, EK5] + ), SegChecker = segment_checker(tune_seglist(SegList)), TestFun = @@ -4161,56 +4612,69 @@ simple_persisted_rangesegfilter_tester(SSTNewFun) -> lists:foreach(CheckOutFun, OutList) end, - lists:foldl(fun(SK0, Acc) -> - TestFun(SK0, EK1, [EK2, EK3, EK4, EK5] ++ Acc), - [SK0|Acc] - end, - [], - [SK1, SK2, SK3, SK4, SK5]), - lists:foldl(fun(SK0, Acc) -> - TestFun(SK0, EK2, [EK3, EK4, EK5] ++ Acc), - [SK0|Acc] - end, - [], - [SK1, SK2, SK3, SK4, SK5]), - lists:foldl(fun(SK0, Acc) -> - TestFun(SK0, EK3, [EK4, EK5] ++ Acc), - [SK0|Acc] - end, - [], - [SK1, SK2, SK3, SK4, SK5]), - lists:foldl(fun(SK0, Acc) -> - TestFun(SK0, EK4, [EK5] ++ Acc), - [SK0|Acc] - end, - [], - [SK1, SK2, SK3, SK4, SK5]), + lists:foldl( + fun(SK0, Acc) -> + TestFun(SK0, EK1, [EK2, EK3, EK4, EK5] ++ Acc), + [SK0 | Acc] + end, + [], + [SK1, SK2, SK3, SK4, SK5] + ), + lists:foldl( + fun(SK0, Acc) -> + TestFun(SK0, EK2, [EK3, EK4, EK5] ++ Acc), + [SK0 | Acc] + end, + [], + [SK1, SK2, SK3, SK4, SK5] + ), + lists:foldl( + fun(SK0, Acc) -> + TestFun(SK0, EK3, [EK4, EK5] ++ Acc), + [SK0 | Acc] + end, + [], + [SK1, SK2, SK3, SK4, SK5] + ), + lists:foldl( + fun(SK0, Acc) -> + TestFun(SK0, EK4, [EK5] ++ Acc), + [SK0 | Acc] + end, + [], + [SK1, SK2, SK3, SK4, SK5] + ), ok = sst_clear(Pid). - - additional_range_test() -> % Test fetching ranges that fall into odd situations with regards to the % summary index % - ranges which fall between entries in summary % - ranges which go beyond the end of the range of the sst % - ranges which match to an end key in the summary index - IK1 = lists:foldl(fun(X, Acc) -> - Acc ++ generate_indexkey(X, X) - end, - [], - lists:seq(1, ?NOLOOK_SLOTSIZE)), + IK1 = lists:foldl( + fun(X, Acc) -> + Acc ++ generate_indexkey(X, X) + end, + [], + lists:seq(1, ?NOLOOK_SLOTSIZE) + ), Gap = 2, - IK2 = lists:foldl(fun(X, Acc) -> - Acc ++ generate_indexkey(X, X) - end, - [], - lists:seq(?NOLOOK_SLOTSIZE + Gap + 1, - 2 * ?NOLOOK_SLOTSIZE + Gap)), + IK2 = lists:foldl( + fun(X, Acc) -> + Acc ++ generate_indexkey(X, X) + end, + [], + lists:seq( + ?NOLOOK_SLOTSIZE + Gap + 1, + 2 * ?NOLOOK_SLOTSIZE + Gap + ) + ), {ok, P1, {{Rem1, Rem2}, SK, EK}, _Bloom1} = testsst_new( - ?TEST_AREA, "range1_src", IK1, IK2, false, 1, 9999, {0, native}), + ?TEST_AREA, "range1_src", IK1, IK2, false, 1, 9999, {0, native} + ), ?assertMatch([], Rem1), ?assertMatch([], Rem2), ?assertMatch(SK, element(1, lists:nth(1, IK1))), @@ -4236,15 +4700,19 @@ additional_range_test() -> ?assertMatch([], R3), % Testing beyond the range - [PastEKV] = generate_indexkey(2 * ?NOLOOK_SLOTSIZE + Gap + 1, - 2 * ?NOLOOK_SLOTSIZE + Gap + 1), + [PastEKV] = generate_indexkey( + 2 * ?NOLOOK_SLOTSIZE + Gap + 1, + 2 * ?NOLOOK_SLOTSIZE + Gap + 1 + ), R4 = sst_getkvrange(P1, element(1, GapSKV), element(1, PastEKV), 2), ?assertMatch(IK2, R4), R5 = sst_getkvrange(P1, SK, element(1, PastEKV), 2), IKAll = IK1 ++ IK2, ?assertMatch(IKAll, R5), - [MidREKV] = generate_indexkey(?NOLOOK_SLOTSIZE + Gap + 2, - ?NOLOOK_SLOTSIZE + Gap + 2), + [MidREKV] = generate_indexkey( + ?NOLOOK_SLOTSIZE + Gap + 2, + ?NOLOOK_SLOTSIZE + Gap + 2 + ), io:format(user, "Mid second range to past range test~n", []), R6 = sst_getkvrange(P1, element(1, MidREKV), element(1, PastEKV), 2), Exp6 = lists:sublist(IK2, 2, length(IK2)), @@ -4255,10 +4723,10 @@ additional_range_test() -> R7 = sst_getkvrange(P1, SK, Slot1EK, 2), ?assertMatch(IK1, R7). - % Testing beyond end (should never happen if manifest behaves) - % Test blows up anyway - % R8 = sst_getkvrange(P1, element(1, PastEKV), element(1, PastEKV), 2), - % ?assertMatch([], R8). +% Testing beyond end (should never happen if manifest behaves) +% Test blows up anyway +% R8 = sst_getkvrange(P1, element(1, PastEKV), element(1, PastEKV), 2), +% ?assertMatch([], R8). simple_switchcache_test_() -> {timeout, 60, fun simple_switchcache_tester/0}. @@ -4267,76 +4735,99 @@ simple_switchcache_tester() -> {RP, Filename} = {?TEST_AREA, "simple_switchcache_test"}, KVList0 = generate_randomkeys(1, ?LOOK_SLOTSIZE * 2, 1, 20), KVList1 = lists:sublist(lists:ukeysort(1, KVList0), ?LOOK_SLOTSIZE), - [{FirstKey, _FV}|_Rest] = KVList1, + [{FirstKey, _FV} | _Rest] = KVList1, {LastKey, _LV} = lists:last(KVList1), {ok, OpenP4, {FirstKey, LastKey}, _Bloom1} = testsst_new(RP, Filename, 4, KVList1, length(KVList1), {0, native}), - lists:foreach(fun({K, V}) -> - ?assertMatch({K, V}, sst_get(OpenP4, K)) - end, - KVList1), + lists:foreach( + fun({K, V}) -> + ?assertMatch({K, V}, sst_get(OpenP4, K)) + end, + KVList1 + ), ok = sst_switchlevels(OpenP4, 5), - lists:foreach(fun({K, V}) -> - ?assertMatch({K, V}, sst_get(OpenP4, K)) - end, - KVList1), - lists:foreach(fun({K, V}) -> - ?assertMatch({K, V}, sst_get(OpenP4, K)) - end, - KVList1), + lists:foreach( + fun({K, V}) -> + ?assertMatch({K, V}, sst_get(OpenP4, K)) + end, + KVList1 + ), + lists:foreach( + fun({K, V}) -> + ?assertMatch({K, V}, sst_get(OpenP4, K)) + end, + KVList1 + ), timer:sleep(?HIBERNATE_TIMEOUT + 10), - lists:foreach(fun({K, V}) -> - ?assertMatch({K, V}, sst_get(OpenP4, K)) - end, - KVList1), + lists:foreach( + fun({K, V}) -> + ?assertMatch({K, V}, sst_get(OpenP4, K)) + end, + KVList1 + ), ok = sst_close(OpenP4), OptsSST = - #sst_options{press_method=native, log_options=leveled_log:get_opts()}, + #sst_options{ + press_method = native, log_options = leveled_log:get_opts() + }, {ok, OpenP5, {FirstKey, LastKey}, _Bloom2} = sst_open(RP, Filename ++ ".sst", OptsSST, 5), - lists:foreach(fun({K, V}) -> - ?assertMatch({K, V}, sst_get(OpenP5, K)) - end, - KVList1), - lists:foreach(fun({K, V}) -> - ?assertMatch({K, V}, sst_get(OpenP5, K)) - end, - KVList1), + lists:foreach( + fun({K, V}) -> + ?assertMatch({K, V}, sst_get(OpenP5, K)) + end, + KVList1 + ), + lists:foreach( + fun({K, V}) -> + ?assertMatch({K, V}, sst_get(OpenP5, K)) + end, + KVList1 + ), ok = sst_switchlevels(OpenP5, 6), - lists:foreach(fun({K, V}) -> - ?assertMatch({K, V}, sst_get(OpenP5, K)) - end, - KVList1), + lists:foreach( + fun({K, V}) -> + ?assertMatch({K, V}, sst_get(OpenP5, K)) + end, + KVList1 + ), ok = sst_switchlevels(OpenP5, 7), - lists:foreach(fun({K, V}) -> - ?assertMatch({K, V}, sst_get(OpenP5, K)) - end, - KVList1), + lists:foreach( + fun({K, V}) -> + ?assertMatch({K, V}, sst_get(OpenP5, K)) + end, + KVList1 + ), timer:sleep(?HIBERNATE_TIMEOUT + 10), - lists:foreach(fun({K, V}) -> - ?assertMatch({K, V}, sst_get(OpenP5, K)) - end, - KVList1), + lists:foreach( + fun({K, V}) -> + ?assertMatch({K, V}, sst_get(OpenP5, K)) + end, + KVList1 + ), ok = sst_close(OpenP5), ok = file:delete(filename:join(RP, Filename ++ ".sst")). - simple_persisted_slotsize_test() -> simple_persisted_slotsize_tester(fun testsst_new/6). simple_persisted_slotsize_tester(SSTNewFun) -> {RP, Filename} = {?TEST_AREA, "simple_slotsize_test"}, KVList0 = generate_randomkeys(1, ?LOOK_SLOTSIZE * 2, 1, 20), - KVList1 = lists:sublist(lists:ukeysort(1, KVList0), - ?LOOK_SLOTSIZE), - [{FirstKey, _FV}|_Rest] = KVList1, + KVList1 = lists:sublist( + lists:ukeysort(1, KVList0), + ?LOOK_SLOTSIZE + ), + [{FirstKey, _FV} | _Rest] = KVList1, {LastKey, _LV} = lists:last(KVList1), {ok, Pid, {FirstKey, LastKey}, _Bloom} = SSTNewFun(RP, Filename, 1, KVList1, length(KVList1), {0, native}), - lists:foreach(fun({K, V}) -> - ?assertMatch({K, V}, sst_get(Pid, K)) - end, - KVList1), + lists:foreach( + fun({K, V}) -> + ?assertMatch({K, V}, sst_get(Pid, K)) + end, + KVList1 + ), ok = sst_close(Pid), ok = file:delete(filename:join(RP, Filename ++ ".sst")). @@ -4347,7 +4838,7 @@ reader_hibernate_tester() -> {RP, Filename} = {?TEST_AREA, "readerhibernate_test"}, KVList0 = generate_randomkeys(1, ?LOOK_SLOTSIZE * 32, 1, 20), KVList1 = lists:ukeysort(1, KVList0), - [{FirstKey, FV}|_Rest] = KVList1, + [{FirstKey, FV} | _Rest] = KVList1, {LastKey, _LV} = lists:last(KVList1), {ok, Pid, {FirstKey, LastKey}, _Bloom} = testsst_new(RP, Filename, 1, KVList1, length(KVList1), {0, native}), @@ -4355,7 +4846,8 @@ reader_hibernate_tester() -> SQN = leveled_codec:strip_to_seqonly({FirstKey, FV}), ?assertMatch( SQN, - sst_getsqn(Pid, FirstKey, leveled_codec:segment_hash(FirstKey))), + sst_getsqn(Pid, FirstKey, leveled_codec:segment_hash(FirstKey)) + ), timer:sleep(?HIBERNATE_TIMEOUT + 1000), ?assertMatch({FirstKey, FV}, sst_get(Pid, FirstKey)). @@ -4367,7 +4859,7 @@ delete_pending_tester() -> {RP, Filename} = {?TEST_AREA, "deletepending_test"}, KVList0 = generate_randomkeys(1, ?LOOK_SLOTSIZE * 32, 1, 20), KVList1 = lists:ukeysort(1, KVList0), - [{FirstKey, _FV}|_Rest] = KVList1, + [{FirstKey, _FV} | _Rest] = KVList1, {LastKey, _LV} = lists:last(KVList1), {ok, Pid, {FirstKey, LastKey}, _Bloom} = testsst_new(RP, Filename, 1, KVList1, length(KVList1), {0, native}), @@ -4380,12 +4872,12 @@ fetch_status_test() -> {RP, Filename} = {?TEST_AREA, "fetchstatus_test"}, KVList0 = generate_randomkeys(1, ?LOOK_SLOTSIZE * 4, 1, 20), KVList1 = lists:ukeysort(1, KVList0), - [{FirstKey, _FV}|_Rest] = KVList1, + [{FirstKey, _FV} | _Rest] = KVList1, {LastKey, _LV} = lists:last(KVList1), {ok, Pid, {FirstKey, LastKey}, _Bloom} = testsst_new(RP, Filename, 1, KVList1, length(KVList1), {0, native}), {status, Pid, {module, gen_statem}, SItemL} = sys:get_status(Pid), - {data,[{"State", {reader, S}}]} = lists:nth(3, lists:nth(5, SItemL)), + {data, [{"State", {reader, S}}]} = lists:nth(3, lists:nth(5, SItemL)), RS = S#state.read_state, true = is_integer(array:size(RS#read_state.fetch_cache)), true = is_integer(array:size(element(2, RS#read_state.blockindex_cache))), @@ -4412,7 +4904,7 @@ simple_persisted_tester(SSTNewFun) -> {RP, Filename} = {?TEST_AREA, "simple_test"}, KVList0 = generate_randomkeys(1, ?LOOK_SLOTSIZE * 32, 1, 20), KVList1 = lists:ukeysort(1, KVList0), - [{FirstKey, _FV}|_Rest] = KVList1, + [{FirstKey, _FV} | _Rest] = KVList1, {LastKey, _LV} = lists:last(KVList1), {ok, Pid, {FirstKey, LastKey}, Bloom} = SSTNewFun(RP, Filename, Level, KVList1, length(KVList1), {0, native}), @@ -4420,53 +4912,67 @@ simple_persisted_tester(SSTNewFun) -> B0 = check_binary_references(Pid), SW0 = os:timestamp(), - lists:foreach(fun({K, V}) -> - ?assertMatch({K, V}, sst_get(Pid, K)) - end, - KVList1), - io:format(user, - "Checking for ~w keys (once) in file with cache hit took ~w " - ++ "microseconds~n", - [length(KVList1), timer:now_diff(os:timestamp(), SW0)]), + lists:foreach( + fun({K, V}) -> + ?assertMatch({K, V}, sst_get(Pid, K)) + end, + KVList1 + ), + io:format( + user, + "Checking for ~w keys (once) in file with cache hit took ~w " ++ + "microseconds~n", + [length(KVList1), timer:now_diff(os:timestamp(), SW0)] + ), SW1 = os:timestamp(), - lists:foreach(fun({K, V}) -> - ?assertMatch({K, V}, sst_get(Pid, K)), - ?assertMatch({K, V}, sst_get(Pid, K)) - end, - KVList1), - io:format(user, - "Checking for ~w keys (twice) in file with cache hit took ~w " - ++ "microseconds~n", - [length(KVList1), timer:now_diff(os:timestamp(), SW1)]), + lists:foreach( + fun({K, V}) -> + ?assertMatch({K, V}, sst_get(Pid, K)), + ?assertMatch({K, V}, sst_get(Pid, K)) + end, + KVList1 + ), + io:format( + user, + "Checking for ~w keys (twice) in file with cache hit took ~w " ++ + "microseconds~n", + [length(KVList1), timer:now_diff(os:timestamp(), SW1)] + ), KVList2 = generate_randomkeys(1, ?LOOK_SLOTSIZE * 32, 1, 20), MapFun = fun({K, V}, Acc) -> In = lists:keymember(K, 1, KVList1), case {K > FirstKey, LastKey > K, In} of {true, true, false} -> - [{K, leveled_codec:segment_hash(K), V}|Acc]; + [{K, leveled_codec:segment_hash(K), V} | Acc]; _ -> Acc end end, - true = [] == MapFun({FirstKey, "V"}, []), % coverage cheat within MapFun + % coverage cheat within MapFun + true = [] == MapFun({FirstKey, "V"}, []), KVList3 = lists:foldl(MapFun, [], KVList2), SW2 = os:timestamp(), - lists:foreach(fun({K, H, _V}) -> - ?assertMatch(not_present, sst_get(Pid, K, H)) - end, - KVList3), - io:format(user, - "Checking for ~w missing keys took ~w microseconds~n", - [length(KVList3), timer:now_diff(os:timestamp(), SW2)]), + lists:foreach( + fun({K, H, _V}) -> + ?assertMatch(not_present, sst_get(Pid, K, H)) + end, + KVList3 + ), + io:format( + user, + "Checking for ~w missing keys took ~w microseconds~n", + [length(KVList3), timer:now_diff(os:timestamp(), SW2)] + ), FetchList1 = sst_getkvrange(Pid, all, all, 2), FoldFun = fun(X, Acc) -> - case X of - {pointer, P, S, SK, EK} -> - Acc ++ sst_getslots(P, [{pointer, P, S, SK, EK}]); - _ -> - Acc ++ [X] - end end, + case X of + {pointer, P, S, SK, EK} -> + Acc ++ sst_getslots(P, [{pointer, P, S, SK, EK}]); + _ -> + Acc ++ [X] + end + end, FetchedList1 = lists:foldl(FoldFun, [], FetchList1), ?assertMatch(KVList1, FetchedList1), @@ -4495,10 +5001,12 @@ simple_persisted_tester(SSTNewFun) -> ?assertMatch(SubKVListA1L, length(FetchedListB2)), ?assertMatch(SubKVListA1, FetchedListB2), - FetchListB3 = sst_getkvrange(Pid, - Eight000Key, - {o, null, null, null}, - 4), + FetchListB3 = sst_getkvrange( + Pid, + Eight000Key, + {o, null, null, null}, + 4 + ), FetchedListB3 = lists:foldl(FoldFun, [], FetchListB3), SubKVListA3 = lists:nthtail(800 - 1, KVList1), SubKVListA3L = length(SubKVListA3), @@ -4507,10 +5015,12 @@ simple_persisted_tester(SSTNewFun) -> ?assertMatch(SubKVListA3, FetchedListB3), io:format("Eight hundredth key ~w~n", [Eight000Key]), - FetchListB4 = sst_getkvrange(Pid, - Eight000Key, - Eight000Key, - 4), + FetchListB4 = sst_getkvrange( + Pid, + Eight000Key, + Eight000Key, + 4 + ), FetchedListB4 = lists:foldl(FoldFun, [], FetchListB4), ?assertMatch([{Eight000Key, V800}], FetchedListB4), @@ -4519,27 +5029,33 @@ simple_persisted_tester(SSTNewFun) -> ok = sst_close(Pid), io:format(user, "Reopen SST file~n", []), - OptsSST = - #sst_options{press_method=native, log_options=leveled_log:get_opts()}, + OptsSST = + #sst_options{ + press_method = native, log_options = leveled_log:get_opts() + }, {ok, OpenP, {FirstKey, LastKey}, Bloom} = sst_open(RP, Filename ++ ".sst", OptsSST, Level), B2 = check_binary_references(OpenP), - lists:foreach(fun({K, V}) -> - ?assertMatch({K, V}, sst_get(OpenP, K)), - ?assertMatch({K, V}, sst_get(OpenP, K)) - end, - KVList1), + lists:foreach( + fun({K, V}) -> + ?assertMatch({K, V}, sst_get(OpenP, K)), + ?assertMatch({K, V}, sst_get(OpenP, K)) + end, + KVList1 + ), garbage_collect(OpenP), B3 = check_binary_references(OpenP), - ?assertMatch(0, B2), % Opens with an empty cache - ?assertMatch(true, B3 > B2), % Now has headers in cache + % Opens with an empty cache + ?assertMatch(0, B2), + % Now has headers in cache + ?assertMatch(true, B3 > B2), ?assertMatch(false, B3 > B0 * 2), - % Not significantly bigger than when created new + % Not significantly bigger than when created new ?assertMatch(false, B3 > B1 * 2), - % Not significantly bigger than when created new + % Not significantly bigger than when created new ok = sst_close(OpenP), ok = file:delete(filename:join(RP, Filename ++ ".sst")). @@ -4566,51 +5082,92 @@ key_dominates_test() -> KV7 = MakeKVFun(<<"Key1">>, 99, tomb), KL1 = [KV1, KV2], KL2 = [KV3, KV4], - ?assertMatch({{next_key, KV1}, [KV2], KL2}, - key_dominates(KL1, KL2, {false, 1})), - ?assertMatch({{next_key, KV1}, KL2, [KV2]}, - key_dominates(KL2, KL1, {false, 1})), - ?assertMatch({skipped_key, KL2, KL1}, - key_dominates([KV5|KL2], KL1, {false, 1})), - ?assertMatch({{next_key, KV1}, [KV2], []}, - key_dominates(KL1, [], {false, 1})), - ?assertMatch({skipped_key, [KV6|KL2], [KV2]}, - key_dominates([KV6|KL2], KL1, {false, 1})), - ?assertMatch({{next_key, KV6}, KL2, [KV2]}, - key_dominates([KV6|KL2], [KV2], {false, 1})), - ?assertMatch({skipped_key, [KV6|KL2], [KV2]}, - key_dominates([KV6|KL2], KL1, {true, 1})), - ?assertMatch({skipped_key, [KV6|KL2], [KV2]}, - key_dominates([KV6|KL2], KL1, {true, 1000})), - ?assertMatch({{next_key, KV6}, KL2, [KV2]}, - key_dominates([KV6|KL2], [KV2], {true, 1})), - ?assertMatch({skipped_key, KL2, [KV2]}, - key_dominates([KV6|KL2], [KV2], {true, 1000})), - ?assertMatch({skipped_key, [], []}, - key_dominates([KV6], [], {true, 1000})), - ?assertMatch({skipped_key, [], []}, - key_dominates([], [KV6], {true, 1000})), - ?assertMatch({{next_key, KV6}, [], []}, - key_dominates([KV6], [], {true, 1})), - ?assertMatch({{next_key, KV6}, [], []}, - key_dominates([], [KV6], {true, 1})), - ?assertMatch({skipped_key, [], []}, - key_dominates([KV7], [], {true, 1})), - ?assertMatch({skipped_key, [], []}, - key_dominates([], [KV7], {true, 1})), - ?assertMatch({skipped_key, [KV7|KL2], [KV2]}, - key_dominates([KV7|KL2], KL1, {false, 1})), - ?assertMatch({{next_key, KV7}, KL2, [KV2]}, - key_dominates([KV7|KL2], [KV2], {false, 1})), - ?assertMatch({skipped_key, [KV7|KL2], [KV2]}, - key_dominates([KV7|KL2], KL1, {true, 1})), - ?assertMatch({skipped_key, KL2, [KV2]}, - key_dominates([KV7|KL2], [KV2], {true, 1})). + ?assertMatch( + {{next_key, KV1}, [KV2], KL2}, + key_dominates(KL1, KL2, {false, 1}) + ), + ?assertMatch( + {{next_key, KV1}, KL2, [KV2]}, + key_dominates(KL2, KL1, {false, 1}) + ), + ?assertMatch( + {skipped_key, KL2, KL1}, + key_dominates([KV5 | KL2], KL1, {false, 1}) + ), + ?assertMatch( + {{next_key, KV1}, [KV2], []}, + key_dominates(KL1, [], {false, 1}) + ), + ?assertMatch( + {skipped_key, [KV6 | KL2], [KV2]}, + key_dominates([KV6 | KL2], KL1, {false, 1}) + ), + ?assertMatch( + {{next_key, KV6}, KL2, [KV2]}, + key_dominates([KV6 | KL2], [KV2], {false, 1}) + ), + ?assertMatch( + {skipped_key, [KV6 | KL2], [KV2]}, + key_dominates([KV6 | KL2], KL1, {true, 1}) + ), + ?assertMatch( + {skipped_key, [KV6 | KL2], [KV2]}, + key_dominates([KV6 | KL2], KL1, {true, 1000}) + ), + ?assertMatch( + {{next_key, KV6}, KL2, [KV2]}, + key_dominates([KV6 | KL2], [KV2], {true, 1}) + ), + ?assertMatch( + {skipped_key, KL2, [KV2]}, + key_dominates([KV6 | KL2], [KV2], {true, 1000}) + ), + ?assertMatch( + {skipped_key, [], []}, + key_dominates([KV6], [], {true, 1000}) + ), + ?assertMatch( + {skipped_key, [], []}, + key_dominates([], [KV6], {true, 1000}) + ), + ?assertMatch( + {{next_key, KV6}, [], []}, + key_dominates([KV6], [], {true, 1}) + ), + ?assertMatch( + {{next_key, KV6}, [], []}, + key_dominates([], [KV6], {true, 1}) + ), + ?assertMatch( + {skipped_key, [], []}, + key_dominates([KV7], [], {true, 1}) + ), + ?assertMatch( + {skipped_key, [], []}, + key_dominates([], [KV7], {true, 1}) + ), + ?assertMatch( + {skipped_key, [KV7 | KL2], [KV2]}, + key_dominates([KV7 | KL2], KL1, {false, 1}) + ), + ?assertMatch( + {{next_key, KV7}, KL2, [KV2]}, + key_dominates([KV7 | KL2], [KV2], {false, 1}) + ), + ?assertMatch( + {skipped_key, [KV7 | KL2], [KV2]}, + key_dominates([KV7 | KL2], KL1, {true, 1}) + ), + ?assertMatch( + {skipped_key, KL2, [KV2]}, + key_dominates([KV7 | KL2], [KV2], {true, 1}) + ). nonsense_coverage_test() -> ?assertMatch( {ok, reader, #state{}}, - code_change(nonsense, reader, #state{}, nonsense)), + code_change(nonsense, reader, #state{}, nonsense) + ), SampleBin = <<0:128/integer>>, FlippedBin = flip_byte(SampleBin, 0, 16), ?assertMatch(false, FlippedBin == SampleBin). @@ -4620,38 +5177,36 @@ hashmatching_bytreesize_test() -> V = leveled_head:riak_metadata_to_binary( term_to_binary([{"actor1", 1}]), - <<1:32/integer, - 0:32/integer, - 0:32/integer>> + <<1:32/integer, 0:32/integer, 0:32/integer>> ), GenKeyFun = fun(X) -> LK = - {?RIAK_TAG, - B, - list_to_binary("Key" ++ integer_to_list(X)), + {?RIAK_TAG, B, list_to_binary("Key" ++ integer_to_list(X)), null}, LKV = leveled_codec:generate_ledgerkv( - LK, X, V, byte_size(V), infinity), + LK, X, V, byte_size(V), infinity + ), {_Bucket, _Key, MetaValue, _Hashes, _LastMods} = LKV, {LK, MetaValue} end, KVL = lists:map(GenKeyFun, lists:seq(1, 128)), {{PosBinIndex1, _FullBin, _HL, _LK}, no_timing} = generate_binary_slot( - lookup, {forward, KVL}, {0, native}, ?INDEX_MODDATE, no_timing), + lookup, {forward, KVL}, {0, native}, ?INDEX_MODDATE, no_timing + ), check_segment_match(PosBinIndex1, KVL, small), check_segment_match(PosBinIndex1, KVL, medium). - check_segment_match(PosBinIndex1, KVL, TreeSize) -> CheckFun = fun({{_T, B, K, null}, _V}) -> Seg = leveled_tictac:get_segment( - leveled_tictac:keyto_segment32(<>), - TreeSize), + leveled_tictac:keyto_segment32(<>), + TreeSize + ), SegChecker = segment_checker(tune_seglist([Seg])), PosList = find_pos(PosBinIndex1, SegChecker), ?assertMatch(true, length(PosList) >= 1) @@ -4673,7 +5228,8 @@ stop_whenstarter_stopped_testto() -> TestFun = fun(X, Acc) -> case Acc of - false -> false; + false -> + false; true -> timer:sleep(X), is_process_alive(RP) @@ -4707,24 +5263,29 @@ corrupted_block_rangetester(BlockMethod, TestCount) -> RandomRanges = lists:map(RandomRangesFun, lists:seq(1, TestCount)), B1 = leveled_sstblock:serialise_block( - no_lookup, BlockMethod, lists:sublist(KVL1, 1, 20)), + no_lookup, BlockMethod, lists:sublist(KVL1, 1, 20) + ), B2 = leveled_sstblock:serialise_block( - no_lookup, BlockMethod, lists:sublist(KVL1, 21, 20)), + no_lookup, BlockMethod, lists:sublist(KVL1, 21, 20) + ), MidBlock = leveled_sstblock:serialise_block( - no_lookup, BlockMethod, lists:sublist(KVL1, 41, 20)), + no_lookup, BlockMethod, lists:sublist(KVL1, 41, 20) + ), B4 = leveled_sstblock:serialise_block( - no_lookup, BlockMethod, lists:sublist(KVL1, 61, 20)), + no_lookup, BlockMethod, lists:sublist(KVL1, 61, 20) + ), B5 = leveled_sstblock:serialise_block( - no_lookup, BlockMethod, lists:sublist(KVL1, 81, 20)), + no_lookup, BlockMethod, lists:sublist(KVL1, 81, 20) + ), CorruptBlockFun = fun(Block) -> case rand:uniform(10) < 2 of true -> - flip_byte(Block, 0 , byte_size(Block)); + flip_byte(Block, 0, byte_size(Block)); false -> Block end @@ -4736,10 +5297,11 @@ corrupted_block_rangetester(BlockMethod, TestCount) -> lists:map(CorruptBlockFun, [B1, B2, MidBlock, B4, B5]), BR = blocks_required( - {SK, EK}, CB1, CB2, CBMid, CB4, CB5, BlockMethod), + {SK, EK}, CB1, CB2, CBMid, CB4, CB5, BlockMethod + ), ?assertMatch(true, length(BR) =< 100), lists:foreach(fun({_K, _V}) -> ok end, BR) - end, + end, lists:foreach(CheckFun, RandomRanges). corrupted_block_fetch_v0_test() -> @@ -4760,24 +5322,19 @@ corrupted_block_fetch_tester(BlockMethod) -> {{Header, SlotBin, _HashL, _LastKey}, _BT} = generate_binary_slot( - lookup, {forward, KVL1}, BlockMethod, false, no_timing), - <> = Header, + lookup, {forward, KVL1}, BlockMethod, false, no_timing + ), + <> = Header, HS = byte_size(Header), - <> = SlotBin, + <> = SlotBin, - CorruptB3 = flip_byte(B3, 0 , B3L), + CorruptB3 = flip_byte(B3, 0, B3L), CorruptSlotBin = - <>, CheckFun = @@ -4812,16 +5369,20 @@ block_index_cache_test() -> {Mega, Sec, _} = os:timestamp(), Now = Mega * 1000000 + Sec, EntriesTS = - lists:map(fun(I) -> - TS = Now - I + 1, - {I, <<0:160/integer, TS:32/integer, 0:32/integer>>} - end, - lists:seq(1, 8)), + lists:map( + fun(I) -> + TS = Now - I + 1, + {I, <<0:160/integer, TS:32/integer, 0:32/integer>>} + end, + lists:seq(1, 8) + ), EntriesNoTS = - lists:map(fun(I) -> - {I, <<0:160/integer, 0:32/integer>>} - end, - lists:seq(1, 8)), + lists:map( + fun(I) -> + {I, <<0:160/integer, 0:32/integer>>} + end, + lists:seq(1, 8) + ), HeaderTS = <<0:160/integer, Now:32/integer, 0:32/integer>>, HeaderNoTS = <<0:192>>, BIC = new_blockindex_cache(8), @@ -4845,61 +5406,70 @@ key_matchesprefix_test() -> FileName = "keymatchesprefix_test", IndexKeyFun = fun(I) -> - {{?IDX_TAG, - {<<"btype">>, <<"bucket">>}, - {<<"dob_bin">>, - list_to_binary("19601301|" - ++ io_lib:format("~6..0w", [I]))}, - list_to_binary(io_lib:format("~6..0w", [I]))}, - {1, {active, infinity}, no_lookup, null}} + { + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, + list_to_binary( + "19601301|" ++ + io_lib:format("~6..0w", [I]) + )}, + list_to_binary(io_lib:format("~6..0w", [I]))}, + {1, {active, infinity}, no_lookup, null} + } end, IndexEntries = lists:map(IndexKeyFun, lists:seq(1, 500)), OddIdxKey = - {{?IDX_TAG, - {<<"btype">>, <<"bucket">>}, - {<<"dob_bin">>, <<"19601301">>}, - list_to_binary(io_lib:format("~6..0w", [0]))}, - {1, {active, infinity}, no_lookup, null}}, + { + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"19601301">>}, + list_to_binary(io_lib:format("~6..0w", [0]))}, + {1, {active, infinity}, no_lookup, null} + }, OptsSST = - #sst_options{press_method=native, - log_options=leveled_log:get_opts()}, + #sst_options{ + press_method = native, + log_options = leveled_log:get_opts() + }, {ok, P1, {_FK1, _LK1}, _Bloom1} = sst_new( - ?TEST_AREA, FileName, 1, [OddIdxKey|IndexEntries], 6000, OptsSST), + ?TEST_AREA, FileName, 1, [OddIdxKey | IndexEntries], 6000, OptsSST + ), IdxRange2 = sst_getkvrange( P1, - {?IDX_TAG, - {<<"btype">>, <<"bucket">>}, - {<<"dob_bin">>, <<"1960">>}, null}, - {?IDX_TAG, - {<<"btype">>, <<"bucket">>}, - {<<"dob_bin">>, <<"1961">>}, null}, - 16), + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"1960">>}, + null}, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"1961">>}, + null}, + 16 + ), IdxRange4 = sst_getkvrange( P1, {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"19601301|000251">>}, null}, - {?IDX_TAG, {<<"btype">>, <<"bucket">>}, - {<<"dob_bin">>, <<"1961">>}, null}, - 16), + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"1961">>}, + null}, + 16 + ), IdxRangeX = sst_getkvrange( P1, {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"19601301">>}, null}, - {?IDX_TAG, {<<"btype">>, <<"bucket">>}, - {<<"dob_bin">>, <<"1961">>}, null}, - 16), + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"1961">>}, + null}, + 16 + ), IdxRangeY = sst_getkvrange( P1, {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"19601301|">>}, null}, - {?IDX_TAG, {<<"btype">>, <<"bucket">>}, - {<<"dob_bin">>, <<"1961">>}, null}, - 16), + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"1961">>}, + null}, + 16 + ), IdxRangeZ = sst_getkvrange( P1, @@ -4907,7 +5477,8 @@ key_matchesprefix_test() -> {<<"dob_bin">>, <<"19601301|">>}, null}, {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"19601301|000500">>}, null}, - 16), + 16 + ), ?assertMatch(501, length(IdxRange2)), ?assertMatch(250, length(IdxRange4)), ?assertMatch(501, length(IdxRangeX)), @@ -4918,59 +5489,59 @@ key_matchesprefix_test() -> ObjectKeyFun = fun(I) -> - {{?RIAK_TAG, - {<<"btype">>, <<"bucket">>}, - list_to_binary("19601301|" - ++ io_lib:format("~6..0w", [I])), - null}, - {1, {active, infinity}, {0, 0}, null}} + { + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, + list_to_binary( + "19601301|" ++ + io_lib:format("~6..0w", [I]) + ), + null}, + {1, {active, infinity}, {0, 0}, null} + } end, ObjectEntries = lists:map(ObjectKeyFun, lists:seq(1, 500)), OddObjKey = - {{?RIAK_TAG, - {<<"btype">>, <<"bucket">>}, - <<"19601301">>, - null}, - {1, {active, infinity}, {100, 100}, null}}, + { + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, <<"19601301">>, null}, + {1, {active, infinity}, {100, 100}, null} + }, OptsSST = - #sst_options{press_method=native, log_options=leveled_log:get_opts()}, + #sst_options{ + press_method = native, log_options = leveled_log:get_opts() + }, {ok, P2, {_FK2, _LK2}, _Bloom2} = sst_new( - ?TEST_AREA, FileName, 1, [OddObjKey|ObjectEntries], 6000, OptsSST), + ?TEST_AREA, FileName, 1, [OddObjKey | ObjectEntries], 6000, OptsSST + ), ObjRange2 = sst_getkvrange( P2, - {?RIAK_TAG, - {<<"btype">>, <<"bucket">>}, - <<"1960">>, null}, - {?RIAK_TAG, - {<<"btype">>, <<"bucket">>}, - <<"1961">>, null}, - 16), + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, <<"1960">>, null}, + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, <<"1961">>, null}, + 16 + ), ObjRange4 = sst_getkvrange( P2, - {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, - <<"19601301|000251">>, null}, - {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, - <<"1961">>, null}, - 16), + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, <<"19601301|000251">>, + null}, + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, <<"1961">>, null}, + 16 + ), ObjRangeX = sst_getkvrange( P2, - {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, - <<"19601301">>, null}, - {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, - <<"1961">>, null}, - 16), + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, <<"19601301">>, null}, + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, <<"1961">>, null}, + 16 + ), ObjRangeY = sst_getkvrange( P2, - {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, - <<"19601301|">>, null}, - {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, - <<"1961">>, null}, - 16), + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, <<"19601301|">>, null}, + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, <<"1961">>, null}, + 16 + ), ?assertMatch(501, length(ObjRange2)), ?assertMatch(250, length(ObjRange4)), ?assertMatch(501, length(ObjRangeX)), @@ -4978,41 +5549,43 @@ key_matchesprefix_test() -> ok = sst_close(P2), ok = file:delete(filename:join(?TEST_AREA, FileName ++ ".sst")). - range_key_indextermmatch_test() -> FileName = "indextermmatch_test", IndexKeyFun = fun(I) -> - {{?IDX_TAG, - {<<"btype">>, <<"bucket">>}, - {<<"dob_bin">>, - <<"19601301">>}, - list_to_binary(io_lib:format("~6..0w", [I]))}, - {1, {active, infinity}, no_lookup, null}} + { + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, <<"19601301">>}, + list_to_binary(io_lib:format("~6..0w", [I]))}, + {1, {active, infinity}, no_lookup, null} + } end, IndexEntries = lists:map(IndexKeyFun, lists:seq(1, 500)), OptsSST = - #sst_options{press_method=native, - log_options=leveled_log:get_opts()}, + #sst_options{ + press_method = native, + log_options = leveled_log:get_opts() + }, {ok, P1, {_FK1, _LK1}, _Bloom1} = sst_new(?TEST_AREA, FileName, 1, IndexEntries, 6000, OptsSST), IdxRange1 = sst_getkvrange( P1, - {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"1959">>}, null}, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"1959">>}, + null}, all, - 16), + 16 + ), IdxRange2 = sst_getkvrange( P1, - {?IDX_TAG, - {<<"btype">>, <<"bucket">>}, - {<<"dob_bin">>, <<"1960">>}, null}, - {?IDX_TAG, - {<<"btype">>, <<"bucket">>}, - {<<"dob_bin">>, <<"1961">>}, null}, - 16), + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"1960">>}, + null}, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"1961">>}, + null}, + 16 + ), IdxRange3 = sst_getkvrange( P1, @@ -5020,7 +5593,8 @@ range_key_indextermmatch_test() -> {<<"dob_bin">>, <<"19601301">>}, <<"000000">>}, {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"19601301">>}, null}, - 16), + 16 + ), IdxRange4 = sst_getkvrange( P1, @@ -5028,7 +5602,8 @@ range_key_indextermmatch_test() -> {<<"dob_bin">>, <<"19601301">>}, <<"000100">>}, {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"19601301">>}, null}, - 16), + 16 + ), IdxRange5 = sst_getkvrange( P1, @@ -5036,7 +5611,8 @@ range_key_indextermmatch_test() -> {<<"dob_bin">>, <<"19601301">>}, null}, {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"19601301">>}, <<"000100">>}, - 16), + 16 + ), IdxRange6 = sst_getkvrange( P1, @@ -5044,7 +5620,8 @@ range_key_indextermmatch_test() -> {<<"dob_bin">>, <<"19601301">>}, <<"000300">>}, {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"19601301">>}, null}, - 16), + 16 + ), IdxRange7 = sst_getkvrange( P1, @@ -5052,7 +5629,8 @@ range_key_indextermmatch_test() -> {<<"dob_bin">>, <<"19601301">>}, null}, {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"19601301">>}, <<"000300">>}, - 16), + 16 + ), IdxRange8 = sst_getkvrange( P1, @@ -5060,7 +5638,8 @@ range_key_indextermmatch_test() -> {<<"dob_bin">>, <<"19601301">>}, null}, {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"19601302">>}, <<"000300">>}, - 16), + 16 + ), IdxRange9 = sst_getkvrange( P1, @@ -5068,7 +5647,8 @@ range_key_indextermmatch_test() -> {<<"dob_bin">>, <<"19601300">>}, <<"000100">>}, {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"19601301">>}, null}, - 16), + 16 + ), ?assertMatch(500, length(IdxRange1)), ?assertMatch(500, length(IdxRange2)), ?assertMatch(500, length(IdxRange3)), @@ -5081,23 +5661,27 @@ range_key_indextermmatch_test() -> ok = sst_close(P1), ok = file:delete(filename:join(?TEST_AREA, FileName ++ ".sst")). - range_key_lestthanprefix_test() -> FileName = "lessthanprefix_test", IndexKeyFun = fun(I) -> - {{?IDX_TAG, - {<<"btype">>, <<"bucket">>}, - {<<"dob_bin">>, - list_to_binary("19601301|" - ++ io_lib:format("~6..0w", [I]))}, - list_to_binary(io_lib:format("~6..0w", [I]))}, - {1, {active, infinity}, no_lookup, null}} + { + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, + {<<"dob_bin">>, + list_to_binary( + "19601301|" ++ + io_lib:format("~6..0w", [I]) + )}, + list_to_binary(io_lib:format("~6..0w", [I]))}, + {1, {active, infinity}, no_lookup, null} + } end, IndexEntries = lists:map(IndexKeyFun, lists:seq(1, 500)), OptsSST = - #sst_options{press_method=native, - log_options=leveled_log:get_opts()}, + #sst_options{ + press_method = native, + log_options = leveled_log:get_opts() + }, {ok, P1, {_FK1, _LK1}, _Bloom1} = sst_new(?TEST_AREA, FileName, 1, IndexEntries, 6000, OptsSST), @@ -5106,43 +5690,47 @@ range_key_lestthanprefix_test() -> IdxRange1 = sst_getkvrange( P1, - {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"1959">>}, null}, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"1959">>}, + null}, all, - 16), + 16 + ), IdxRange2 = sst_getkvrange( P1, - {?IDX_TAG, - {<<"btype">>, <<"bucket">>}, - {<<"dob_bin">>, <<"1960">>}, null}, - {?IDX_TAG, - {<<"btype">>, <<"bucket">>}, - {<<"dob_bin">>, <<"1961">>}, null}, - 16), + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"1960">>}, + null}, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"1961">>}, + null}, + 16 + ), IdxRange3 = sst_getkvrange( P1, - {?IDX_TAG, {<<"btype">>, <<"bucket">>}, - {<<"dob_bin">>, <<"1960">>}, null}, + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"1960">>}, + null}, {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"19601301|000250">>}, null}, - 16), + 16 + ), IdxRange4 = sst_getkvrange( P1, {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"19601301|000251">>}, null}, - {?IDX_TAG, {<<"btype">>, <<"bucket">>}, - {<<"dob_bin">>, <<"1961">>}, null}, - 16), + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"1961">>}, + null}, + 16 + ), IdxRange5 = sst_getkvrange( P1, {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"19601301|000250">>}, <<"000251">>}, - {?IDX_TAG, {<<"btype">>, <<"bucket">>}, - {<<"dob_bin">>, <<"1961">>}, null}, - 16), + {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"1961">>}, + null}, + 16 + ), IdxRange6 = sst_getkvrange( P1, @@ -5150,7 +5738,8 @@ range_key_lestthanprefix_test() -> {<<"dob_bin">>, <<"19601301|000">>}, null}, {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"19601301|0002">>}, null}, - 16), + 16 + ), IdxRange7 = sst_getkvrange( P1, @@ -5158,7 +5747,8 @@ range_key_lestthanprefix_test() -> {<<"dob_bin">>, <<"19601301|000">>}, null}, {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"19601301|0001">>}, null}, - 16), + 16 + ), IdxRange8 = sst_getkvrange( P1, @@ -5166,7 +5756,8 @@ range_key_lestthanprefix_test() -> {<<"dob_bin">>, <<"19601301|000000">>}, null}, {?IDX_TAG, {<<"btype">>, <<"bucket">>}, {<<"dob_bin">>, <<"19601301|000100">>}, null}, - 16), + 16 + ), ?assertMatch(500, length(IdxRange1)), ?assertMatch(500, length(IdxRange2)), ?assertMatch(250, length(IdxRange3)), @@ -5180,17 +5771,22 @@ range_key_lestthanprefix_test() -> ObjectKeyFun = fun(I) -> - {{?RIAK_TAG, - {<<"btype">>, <<"bucket">>}, - list_to_binary("19601301|" - ++ io_lib:format("~6..0w", [I])), - null}, - {1, {active, infinity}, {0, 0}, null}} + { + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, + list_to_binary( + "19601301|" ++ + io_lib:format("~6..0w", [I]) + ), + null}, + {1, {active, infinity}, {0, 0}, null} + } end, ObjectEntries = lists:map(ObjectKeyFun, lists:seq(1, 500)), OptsSST = - #sst_options{press_method=native, - log_options=leveled_log:get_opts()}, + #sst_options{ + press_method = native, + log_options = leveled_log:get_opts() + }, {ok, P2, {_FK2, _LK2}, _Bloom2} = sst_new(?TEST_AREA, FileName, 1, ObjectEntries, 6000, OptsSST), @@ -5201,57 +5797,54 @@ range_key_lestthanprefix_test() -> P2, {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, <<"1959">>, null}, all, - 16), + 16 + ), ObjRange2 = sst_getkvrange( P2, - {?RIAK_TAG, - {<<"btype">>, <<"bucket">>}, - <<"1960">>, null}, - {?RIAK_TAG, - {<<"btype">>, <<"bucket">>}, - <<"1961">>, null}, - 16), + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, <<"1960">>, null}, + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, <<"1961">>, null}, + 16 + ), ObjRange3 = sst_getkvrange( P2, - {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, - <<"1960">>, null}, - {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, - <<"19601301|000250">>, null}, - 16), + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, <<"1960">>, null}, + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, <<"19601301|000250">>, + null}, + 16 + ), ObjRange4 = sst_getkvrange( P2, - {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, - <<"19601301|000251">>, null}, - {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, - <<"1961">>, null}, - 16), + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, <<"19601301|000251">>, + null}, + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, <<"1961">>, null}, + 16 + ), ObjRange6 = sst_getkvrange( P2, - {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, - <<"19601301|000">>, null}, - {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, - <<"19601301|0002">>, null}, - 16), + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, <<"19601301|000">>, null}, + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, <<"19601301|0002">>, null}, + 16 + ), ObjRange7 = sst_getkvrange( P2, - {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, - <<"19601301|000">>, null}, - {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, - <<"19601301|0001">>, null}, - 16), + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, <<"19601301|000">>, null}, + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, <<"19601301|0001">>, null}, + 16 + ), ObjRange8 = sst_getkvrange( P2, - {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, - <<"19601301|000000">>, null}, - {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, - <<"19601301|000100">>, null}, - 16), + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, <<"19601301|000000">>, + null}, + {?RIAK_TAG, {<<"btype">>, <<"bucket">>}, <<"19601301|000100">>, + null}, + 16 + ), ?assertMatch(500, length(ObjRange1)), ?assertMatch(500, length(ObjRange2)), @@ -5265,75 +5858,75 @@ range_key_lestthanprefix_test() -> HeadKeyFun = fun(I) -> - {{?HEAD_TAG, - {<<"btype">>, <<"bucket">>}, - list_to_binary("19601301|" - ++ io_lib:format("~6..0w", [I])), - null}, - {1, {active, infinity}, {0, 0}, null, undefined}} + { + {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, + list_to_binary( + "19601301|" ++ + io_lib:format("~6..0w", [I]) + ), + null}, + {1, {active, infinity}, {0, 0}, null, undefined} + } end, HeadEntries = lists:map(HeadKeyFun, lists:seq(1, 500)), {ok, P3, {_FK3, _LK3}, _Bloom3} = sst_new(?TEST_AREA, FileName, 1, HeadEntries, 6000, OptsSST), - HeadFileStateSize = size_summary(P3), + HeadFileStateSize = size_summary(P3), HeadRange1 = sst_getkvrange( P3, {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, <<"1959">>, null}, all, - 16), + 16 + ), HeadRange2 = sst_getkvrange( P3, - {?HEAD_TAG, - {<<"btype">>, <<"abucket">>}, - <<"1962">>, null}, - {?HEAD_TAG, - {<<"btype">>, <<"zbucket">>}, - <<"1960">>, null}, - 16), + {?HEAD_TAG, {<<"btype">>, <<"abucket">>}, <<"1962">>, null}, + {?HEAD_TAG, {<<"btype">>, <<"zbucket">>}, <<"1960">>, null}, + 16 + ), HeadRange3 = sst_getkvrange( P3, - {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, - <<"1960">>, null}, - {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, - <<"19601301|000250">>, null}, - 16), + {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, <<"1960">>, null}, + {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, <<"19601301|000250">>, + null}, + 16 + ), HeadRange4 = sst_getkvrange( P3, - {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, - <<"19601301|000251">>, null}, - {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, - <<"1961">>, null}, - 16), + {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, <<"19601301|000251">>, + null}, + {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, <<"1961">>, null}, + 16 + ), HeadRange6 = sst_getkvrange( P3, - {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, - <<"19601301|000">>, null}, - {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, - <<"19601301|0002">>, null}, - 16), + {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, <<"19601301|000">>, null}, + {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, <<"19601301|0002">>, null}, + 16 + ), HeadRange7 = sst_getkvrange( P3, - {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, - <<"19601301|000">>, null}, - {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, - <<"19601301|0001">>, null}, - 16), + {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, <<"19601301|000">>, null}, + {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, <<"19601301|0001">>, null}, + 16 + ), HeadRange8 = sst_getkvrange( P3, - {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, - <<"19601301|000000">>, null}, - {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, - <<"19601301|000100">>, null}, - 16), + {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, <<"19601301|000000">>, + null}, + {?HEAD_TAG, {<<"btype">>, <<"bucket">>}, <<"19601301|000100">>, + null}, + 16 + ), ?assertMatch(500, length(HeadRange1)), ?assertMatch(500, length(HeadRange2)), @@ -5345,42 +5938,51 @@ range_key_lestthanprefix_test() -> ok = sst_close(P3), ok = file:delete(filename:join(?TEST_AREA, FileName ++ ".sst")), - [_HdO|RestObjectEntries] = ObjectEntries, - [_HdI|RestIndexEntries] = IndexEntries, - [_Hdh|RestHeadEntries] = HeadEntries, + [_HdO | RestObjectEntries] = ObjectEntries, + [_HdI | RestIndexEntries] = IndexEntries, + [_Hdh | RestHeadEntries] = HeadEntries, {ok, P4, {_FK4, _LK4}, _Bloom4} = sst_new( ?TEST_AREA, - FileName, 1, - [HeadKeyFun(9999)|RestIndexEntries], - 6000, OptsSST), + FileName, + 1, + [HeadKeyFun(9999) | RestIndexEntries], + 6000, + OptsSST + ), print_compare_size("Index", IndexFileStateSize, size_summary(P4)), ok = sst_close(P4), ok = file:delete(filename:join(?TEST_AREA, FileName ++ ".sst")), {ok, P5, {_FK5, _LK5}, _Bloom5} = - sst_new( - ?TEST_AREA, - FileName, 1, - [HeadKeyFun(9999)|RestObjectEntries], - 6000, OptsSST), + sst_new( + ?TEST_AREA, + FileName, + 1, + [HeadKeyFun(9999) | RestObjectEntries], + 6000, + OptsSST + ), print_compare_size("Object", ObjectFileStateSize, size_summary(P5)), ok = sst_close(P5), ok = file:delete(filename:join(?TEST_AREA, FileName ++ ".sst")), {ok, P6, {_FK6, _LK6}, _Bloom6} = - sst_new( - ?TEST_AREA, - FileName, 1, - RestHeadEntries ++ [IndexKeyFun(1)], - 6000, OptsSST), + sst_new( + ?TEST_AREA, + FileName, + 1, + RestHeadEntries ++ [IndexKeyFun(1)], + 6000, + OptsSST + ), print_compare_size("Head", HeadFileStateSize, size_summary(P6)), ok = sst_close(P6), ok = file:delete(filename:join(?TEST_AREA, FileName ++ ".sst")). size_summary(P) -> - Summary = + Summary = case sys:get_state(P) of State when is_tuple(State) -> element(2, element(2, State)) @@ -5392,7 +5994,8 @@ print_compare_size(Type, OptimisedSize, UnoptimisedSize) -> io:format( user, "~n~s State optimised to ~w bytes unoptimised ~w bytes~n", - [Type, OptimisedSize * 8, UnoptimisedSize * 8]), + [Type, OptimisedSize * 8, UnoptimisedSize * 8] + ), % Reduced by at least a quarter ?assert(OptimisedSize < (UnoptimisedSize - (UnoptimisedSize div 4))). @@ -5403,8 +6006,10 @@ single_key_test() -> Chunk = crypto:strong_rand_bytes(16), MV = leveled_codec:convert_to_ledgerv(LK, 1, Chunk, 16, infinity), OptsSST = - #sst_options{press_method=native, - log_options=leveled_log:get_opts()}, + #sst_options{ + press_method = native, + log_options = leveled_log:get_opts() + }, {ok, P1, {LK, LK}, _Bloom1} = sst_new(?TEST_AREA, FileName, 1, [{LK, MV}], 6000, OptsSST), ?assertMatch({LK, MV}, sst_get(P1, LK)), @@ -5413,11 +6018,13 @@ single_key_test() -> IndexSpecs = [{add, Field, <<"20220101">>}], [{IdxK, IdxV}] = - leveled_codec:idx_indexspecs(IndexSpecs, - <<"Bucket">>, - <<"Key">>, - 1, - infinity), + leveled_codec:idx_indexspecs( + IndexSpecs, + <<"Bucket">>, + <<"Key">>, + 1, + infinity + ), {ok, P2, {IdxK, IdxK}, _Bloom2} = sst_new(?TEST_AREA, FileName, 1, [{IdxK, IdxV}], 6000, OptsSST), ?assertMatch( @@ -5426,21 +6033,27 @@ single_key_test() -> P2, {?IDX_TAG, <<"Bucket">>, {Field, <<"20220100">>}, null}, all, - 16)), + 16 + ) + ), ?assertMatch( [{IdxK, IdxV}], sst_getkvrange( P2, {?IDX_TAG, <<"Bucket">>, {Field, <<"20220100">>}, null}, {?IDX_TAG, <<"Bucket">>, {Field, <<"20220101">>}, null}, - 16)), + 16 + ) + ), ?assertMatch( [{IdxK, IdxV}], sst_getkvrange( P2, {?IDX_TAG, <<"Bucket">>, {Field, <<"20220101">>}, null}, {?IDX_TAG, <<"Bucket">>, {Field, <<"20220101">>}, null}, - 16)), + 16 + ) + ), ok = sst_close(P2), ok = file:delete(filename:join(?TEST_AREA, FileName ++ ".sst")). @@ -5448,14 +6061,19 @@ strange_range_test() -> FileName = "strange_range_test", V = leveled_head:riak_metadata_to_binary( term_to_binary([{"actor1", 1}]), - <<1:32/integer, 0:32/integer, 0:32/integer>>), + <<1:32/integer, 0:32/integer, 0:32/integer>> + ), OptsSST = - #sst_options{press_method=native, - log_options=leveled_log:get_opts()}, + #sst_options{ + press_method = native, + log_options = leveled_log:get_opts() + }, FK = leveled_codec:to_objectkey({<<"T0">>, <<"B0">>}, <<"K0">>, ?RIAK_TAG), LK = leveled_codec:to_objectkey({<<"T0">>, <<"B0">>}, <<"K02">>, ?RIAK_TAG), - EK = leveled_codec:to_objectkey({<<"T0">>, <<"B0">>}, <<"K0299">>, ?RIAK_TAG), + EK = leveled_codec:to_objectkey( + {<<"T0">>, <<"B0">>}, <<"K0299">>, ?RIAK_TAG + ), KL1 = lists:map( @@ -5463,23 +6081,28 @@ strange_range_test() -> leveled_codec:to_objectkey( {<<"T0">>, <<"B0">>}, list_to_binary("K00" ++ integer_to_list(I)), - ?RIAK_TAG) + ?RIAK_TAG + ) end, - lists:seq(1, 300)), + lists:seq(1, 300) + ), KL2 = lists:map( fun(I) -> leveled_codec:to_objectkey( {<<"T0">>, <<"B0">>}, list_to_binary("K02" ++ integer_to_list(I)), - ?RIAK_TAG) + ?RIAK_TAG + ) end, - lists:seq(1, 300)), + lists:seq(1, 300) + ), GenerateValue = fun(K) -> element( - 3, leveled_codec:generate_ledgerkv(K, 1, V, 16, infinity)) + 3, leveled_codec:generate_ledgerkv(K, 1, V, 16, infinity) + ) end, KVL = @@ -5487,10 +6110,12 @@ strange_range_test() -> 1, lists:map( fun(K) -> {K, GenerateValue(K)} end, - [FK] ++ KL1 ++ [LK] ++ KL2)), + [FK] ++ KL1 ++ [LK] ++ KL2 + ) + ), {ok, P1, {FK, EK}, _Bloom1} = - sst_new(?TEST_AREA, FileName, 1, KVL, 6000, OptsSST), + sst_new(?TEST_AREA, FileName, 1, KVL, 6000, OptsSST), ?assertMatch({LK, _}, sst_get(P1, LK)), ?assertMatch({FK, _}, sst_get(P1, FK)), @@ -5500,22 +6125,27 @@ strange_range_test() -> IndexSpecs = lists:map( fun(I) -> {add, <<"t1_bin">>, integer_to_binary(I)} end, - lists:seq(1, 500)), + lists:seq(1, 500) + ), IdxKVL = - leveled_codec:idx_indexspecs(IndexSpecs, - <<"Bucket">>, - <<"Key">>, - 1, - infinity), + leveled_codec:idx_indexspecs( + IndexSpecs, + <<"Bucket">>, + <<"Key">>, + 1, + infinity + ), {ok, P2, {_FIdxK, _EIdxK}, _Bloom2} = sst_new( - ?TEST_AREA, FileName, 1, lists:ukeysort(1, IdxKVL), 6000, OptsSST), + ?TEST_AREA, FileName, 1, lists:ukeysort(1, IdxKVL), 6000, OptsSST + ), [{IdxK1, _IdxV1}, {IdxK2, _IdxV2}] = sst_getkvrange( P2, {?IDX_TAG, <<"Bucket">>, {<<"t1_bin">>, <<"1">>}, null}, {?IDX_TAG, <<"Bucket">>, {<<"t1_bin">>, <<"10">>}, null}, - 16), + 16 + ), ?assertMatch( {?IDX_TAG, <<"Bucket">>, {<<"t1_bin">>, <<"1">>}, <<"Key">>}, IdxK1 @@ -5527,7 +6157,6 @@ strange_range_test() -> ok = sst_close(P2), ok = file:delete(filename:join(?TEST_AREA, FileName ++ ".sst")). - receive_fun() -> receive {sst_pid, SST_P} -> @@ -5539,13 +6168,14 @@ start_sst_fun(ProcessToInform) -> N = 3000, KVL1 = lists:ukeysort(1, generate_randomkeys(N + 1, N, 1, 20)), OptsSST = - #sst_options{press_method=native, - log_options=leveled_log:get_opts()}, + #sst_options{ + press_method = native, + log_options = leveled_log:get_opts() + }, {ok, P1, {_FK1, _LK1}, _Bloom1} = sst_new(?TEST_AREA, "level1_src", 1, KVL1, 6000, OptsSST), ProcessToInform ! {sst_pid, P1}. - blocks_required_test() -> B = <<"Bucket">>, Idx = <<"idx_bin">>, @@ -5554,25 +6184,29 @@ blocks_required_test() -> fun(I) -> list_to_binary(io_lib:format("B~6..0B", [I])) end, - IdxKey = + IdxKey = fun(I) -> {?IDX_TAG, B, {Idx, KeyFun(I)}, KeyFun(I)} end, StdKey = fun(I) -> {?STD_TAG, B, KeyFun(I), null} end, MetaValue = - fun(I) -> + fun(I) -> element( 3, leveled_codec:generate_ledgerkv( - StdKey(I), I, Chunk, 32, infinity)) + StdKey(I), I, Chunk, 32, infinity + ) + ) end, IdxValue = fun(I) -> element( 3, leveled_codec:generate_ledgerkv( - IdxKey(I), I, <<>>, 0, infinity)) + IdxKey(I), I, <<>>, 0, infinity + ) + ) end, Block1L = lists:map(fun(I) -> {IdxKey(I), IdxValue(I)} end, lists:seq(1, 16)), @@ -5583,9 +6217,8 @@ blocks_required_test() -> Block4L = lists:map(fun(I) -> {IdxKey(I), IdxValue(I)} end, lists:seq(49, 64)), Block5L = - lists:map(fun(I) -> {IdxKey(I), IdxValue(I)} end, lists:seq(65, 70)) - ++ - lists:map(fun(I) -> {StdKey(I), MetaValue(I)} end, lists:seq(1, 8)), + lists:map(fun(I) -> {IdxKey(I), IdxValue(I)} end, lists:seq(65, 70)) ++ + lists:map(fun(I) -> {StdKey(I), MetaValue(I)} end, lists:seq(1, 8)), B1 = leveled_sstblock:serialise_block(no_lookup, {0, native}, Block1L), B2 = leveled_sstblock:serialise_block(no_lookup, {0, native}, Block2L), B3 = leveled_sstblock:serialise_block(no_lookup, {0, native}, MidBlockL), @@ -5598,10 +6231,11 @@ blocks_required_test() -> KVL = blocks_required({SK, EK}, B1, B2, B3, B4, B5, {0, native}), io:format( "Length KVL ~w First ~p Last ~p~n", - [length(KVL), hd(KVL), lists:last(KVL)]), + [length(KVL), hd(KVL), lists:last(KVL)] + ), ?assert(length(KVL) == Exp) end, - + TestFun( {?IDX_TAG, B, {Idx, KeyFun(3)}, null}, {?IDX_TAG, B, {Idx, KeyFun(99)}, null}, @@ -5619,35 +6253,73 @@ blocks_required_test() -> ), KVL1 = blocks_required( - {{?IDX_TAG, B, {Idx, KeyFun(3)}, null}, - {?IDX_TAG, B, {Idx, KeyFun(99)}, null}}, - B1, B2, Empty, B4, B5, {0, native}), + { + {?IDX_TAG, B, {Idx, KeyFun(3)}, null}, + {?IDX_TAG, B, {Idx, KeyFun(99)}, null} + }, + B1, + B2, + Empty, + B4, + B5, + {0, native} + ), ?assertMatch(52, length(KVL1)), KVL2 = blocks_required( - {{?IDX_TAG, B, {Idx, KeyFun(3)}, null}, - {?IDX_TAG, B, {Idx, KeyFun(99)}, null}}, - B1, B2, Empty, Empty, Empty, {0, native}), + { + {?IDX_TAG, B, {Idx, KeyFun(3)}, null}, + {?IDX_TAG, B, {Idx, KeyFun(99)}, null} + }, + B1, + B2, + Empty, + Empty, + Empty, + {0, native} + ), ?assertMatch(30, length(KVL2)), KVL3 = blocks_required( - {{?IDX_TAG, B, {Idx, KeyFun(3)}, null}, - {?IDX_TAG, B, {Idx, KeyFun(99)}, null}}, - B1, Empty, Empty, Empty, Empty, {0, native}), + { + {?IDX_TAG, B, {Idx, KeyFun(3)}, null}, + {?IDX_TAG, B, {Idx, KeyFun(99)}, null} + }, + B1, + Empty, + Empty, + Empty, + Empty, + {0, native} + ), ?assertMatch(14, length(KVL3)), KVL4 = blocks_required( - {{?IDX_TAG, B, {Idx, KeyFun(3)}, null}, - {?IDX_TAG, B, {Idx, KeyFun(99)}, null}}, - B1, Empty, B3, B4, B5, {0, native}), + { + {?IDX_TAG, B, {Idx, KeyFun(3)}, null}, + {?IDX_TAG, B, {Idx, KeyFun(99)}, null} + }, + B1, + Empty, + B3, + B4, + B5, + {0, native} + ), ?assertMatch(52, length(KVL4)), KVL5 = blocks_required( - {{?IDX_TAG, B, {Idx, KeyFun(3)}, null}, - {?IDX_TAG, B, {Idx, KeyFun(99)}, null}}, - B1, B2, B3, Empty, B5, {0, native}), - ?assertMatch(52, length(KVL5)) - . - - --endif. \ No newline at end of file + { + {?IDX_TAG, B, {Idx, KeyFun(3)}, null}, + {?IDX_TAG, B, {Idx, KeyFun(99)}, null} + }, + B1, + B2, + B3, + Empty, + B5, + {0, native} + ), + ?assertMatch(52, length(KVL5)). + +-endif. diff --git a/src/leveled_sstblock.erl b/src/leveled_sstblock.erl index c5bac1e6..ee72a54e 100644 --- a/src/leveled_sstblock.erl +++ b/src/leveled_sstblock.erl @@ -3,18 +3,18 @@ %% Functions to serialise and then fetch from those serialised blocks, i.e. %% - serialise_block/3 %% - get_all/2 deserialise returning all -%% - get_topandtail/2 return only the first and last elements, as well as a +%% - get_topandtail/2 return only the first and last elements, as well as a %% function to return the remainder, so that deserialisation of the remainder %% may be avoided on inspection of top and tail %% - get_nth/3 deserialise enough of the block to return just the nth item %% %% The fetch functions may be optimised for the block type to minimise the %% work required to fetch the required amount of deserialised data. -%% +%% %% Standard block sizes are %% -define(LOOK_BLOCKSIZE, {24, 32}). %% -define(NOLOOK_BLOCKSIZE, {56, 32}). -%% +%% %% Requirement to serialise LOOK_BLOCKS to optimise for picking the nth value %% Requirement to serialise NOLOOK_BLOCKS to optimise for picking the first and %% last values @@ -23,33 +23,33 @@ -define(MAX_SUBBLOCK_SIZE, 1 bsl 16). -define(BLOCK_TYPE0, 0). - % Block is just a list of terms +% Block is just a list of terms -define(BLOCK_TYPE1, 1). - % Lookup block divided into 4 blocks of 6 - % 24 KV blocks only +% Lookup block divided into 4 blocks of 6 +% 24 KV blocks only -define(BLOCK_TYPE2, 2). - % Lookup block divided into 4 blocks of 8 - % 32 KV blocks only +% Lookup block divided into 4 blocks of 8 +% 32 KV blocks only -define(BLOCK_TYPE3, 3). - % Nolookup block with first/last terms at head +% Nolookup block with first/last terms at head -define(BLOCK_TYPE4, 4). - % Nolookup block with first/last terms at head, and block split into L/M/R - % 56 KV blocks only +% Nolookup block with first/last terms at head, and block split into L/M/R +% 56 KV blocks only -define(COMPRESSION_FACTOR, 1). - % When using native compression - how hard should the compression code - % try to reduce the size of the compressed output. 1 Is to imply minimal - % effort, 6 is default in OTP: - % https://www.erlang.org/doc/man/erlang.html#term_to_binary-2 +% When using native compression - how hard should the compression code +% try to reduce the size of the compressed output. 1 Is to imply minimal +% effort, 6 is default in OTP: +% https://www.erlang.org/doc/man/erlang.html#term_to_binary-2 -define(BINARY_SETTINGS, [{compressed, ?COMPRESSION_FACTOR}]). -type block_type() :: - ?BLOCK_TYPE0|?BLOCK_TYPE1|?BLOCK_TYPE2|?BLOCK_TYPE3|?BLOCK_TYPE4. + ?BLOCK_TYPE0 | ?BLOCK_TYPE1 | ?BLOCK_TYPE2 | ?BLOCK_TYPE3 | ?BLOCK_TYPE4. -type range_filter() :: - all|{leveled_codec:ledger_key(), leveled_codec:ledger_key()}. + all | {leveled_codec:ledger_key(), leveled_codec:ledger_key()}. -type top_and_tail() :: { - leveled_codec:ledger_key()|not_present, - leveled_codec:ledger_key()|not_present, + leveled_codec:ledger_key() | not_present, + leveled_codec:ledger_key() | not_present, fun((range_filter()) -> list(leveled_codec:ledger_kv())) }. @@ -66,7 +66,7 @@ %%% API %%%============================================================================ - +%% erlfmt:ignore-begin -spec serialise_block( lookup|no_lookup, {leveled_sst:block_version(), leveled_sst:press_method()}, @@ -243,10 +243,12 @@ serialise_block(_, {1, PressMethod}, TermList) -> serialise_block_aslist(PressMethod, TermList); serialise_block(_, {0, PressMethod}, TermList) -> serialise_block(TermList, PressMethod). +%% erlfmt:ignore-end -spec get_all( - binary(), leveled_sst:block_method()) -> - list(leveled_codec:ledger_kv()). + binary(), leveled_sst:block_method() +) -> + list(leveled_codec:ledger_kv()). get_all(Block, {1, PressMethod}) -> ExtractFun = fun(CheckedBlock) -> @@ -261,7 +263,8 @@ get_all(Block, {0, PressMethod}) -> check_block(Block, [], ExtractFun). -spec get_topandtail( - binary(), leveled_sst:block_method()) -> top_and_tail(). + binary(), leveled_sst:block_method() +) -> top_and_tail(). get_topandtail(Block, {0, PressMethod}) -> ExtractFun = fun(CheckedBlock) -> @@ -289,8 +292,9 @@ get_topandtail(Block, {1, PressMethod}) -> ). -spec get_nth( - pos_integer(), binary(), leveled_sst:block_method()) -> - leveled_codec:ledger_kv()|not_present. + pos_integer(), binary(), leveled_sst:block_method() +) -> + leveled_codec:ledger_kv() | not_present. get_nth(N, Block, {1, PressMethod}) -> ExtractFun = fun(CheckedBlock) -> @@ -317,8 +321,9 @@ crc_validate_bin(Bin) -> <>. -spec serialise_block_aslist( - leveled_sst:press_method(), list(leveled_codec:ledger_kv())) - -> binary(). + leveled_sst:press_method(), list(leveled_codec:ledger_kv()) +) -> + binary(). serialise_block_aslist(PM, TermList) when PM == lz4; PM == zstd -> CompressedBin = << @@ -335,17 +340,17 @@ serialise_block_aslist(native, TermList) -> crc_validate_bin(CompressedBin); serialise_block_aslist(none, TermList) -> UncompressedBin = - <<(term_to_binary(TermList))/binary, ?BLOCK_TYPE0:8/integer >>, + <<(term_to_binary(TermList))/binary, ?BLOCK_TYPE0:8/integer>>, crc_validate_bin(UncompressedBin). --spec compress_block(binary(), lz4|zstd) -> binary(). +-spec compress_block(binary(), lz4 | zstd) -> binary(). compress_block(BlockBin, lz4) -> {ok, Bin} = lz4:pack(BlockBin), Bin; compress_block(BlockBin, zstd) -> zstd:compress(BlockBin). - --spec decompress_block(binary(), lz4|zstd) -> binary(). + +-spec decompress_block(binary(), lz4 | zstd) -> binary(). decompress_block(BlockBin, lz4) -> {ok, Bin} = lz4:unpack(BlockBin), Bin; @@ -355,14 +360,13 @@ decompress_block(BlockBin, zstd) -> DeflateBin end. --spec - check_block - (binary(), list(), fun((binary()) -> list(leveled_codec:ledger_kv()))) - -> list(leveled_codec:ledger_kv()); - (binary(), not_present, fun((binary()) -> leveled_codec:ledger_kv())) - -> leveled_codec:ledger_kv()|not_present; - (binary(), top_and_tail(), fun((binary()) -> top_and_tail())) - -> top_and_tail(). +-spec check_block + (binary(), list(), fun((binary()) -> list(leveled_codec:ledger_kv()))) -> + list(leveled_codec:ledger_kv()); + (binary(), not_present, fun((binary()) -> leveled_codec:ledger_kv())) -> + leveled_codec:ledger_kv() | not_present; + (binary(), top_and_tail(), fun((binary()) -> top_and_tail())) -> + top_and_tail(). check_block(Block, Default, ExtractFun) when byte_size(Block) > 4 -> BinS = byte_size(Block) - 4, <> = Block, @@ -381,6 +385,7 @@ check_block(_Block, Default, _ExtractFun) -> %%% Block-type specific cases - v1 %%%============================================================================ +%% erlfmt:ignore-begin -spec get_topandtail_block( binary(), leveled_sst:press_method()) -> top_and_tail(). get_topandtail_block(CheckedBlock, PressMethod) -> @@ -647,6 +652,7 @@ get_all_block(Type, TypedBlock, PM) C1, C2, C3, C4, C5, C6, C7, C8, D1, D2, D3, D4, D5, D6, D7, D8 ]. +%% erlfmt:ignore-end %%%============================================================================ %%% Internal functions - v0 @@ -772,13 +778,12 @@ v1_block_tester(Lookup, BlockMethod, BlockSize, SibMetaBin, B) -> GenKeyFun = fun(X) -> LK = - {?RIAK_TAG, - B, - list_to_binary("Key" ++ integer_to_list(X)), + {?RIAK_TAG, B, list_to_binary("Key" ++ integer_to_list(X)), null}, LKV = leveled_codec:generate_ledgerkv( - LK, X, V, byte_size(V), infinity), + LK, X, V, byte_size(V), infinity + ), {_Bucket, _Key, MetaValue, _Hashes, _LastMods} = LKV, {LK, MetaValue} end, @@ -803,6 +808,4 @@ v1_block_tester(Lookup, BlockMethod, BlockSize, SibMetaBin, B) -> ?assertMatch(KVL, AllFun(all)), ?assertMatch(KVL, get_all(Block, BlockMethod)). - - --endif. \ No newline at end of file +-endif. diff --git a/src/leveled_tictac.erl b/src/leveled_tictac.erl index 6bdfc33e..ac922071 100644 --- a/src/leveled_tictac.erl +++ b/src/leveled_tictac.erl @@ -52,39 +52,40 @@ -module(leveled_tictac). -export([ - new_tree/1, - new_tree/2, - new_tree/3, - add_kv/4, - add_kv/5, - alter_segment/3, - find_dirtyleaves/2, - find_dirtysegments/2, - fetch_root/1, - fetch_leaves/2, - merge_trees/2, - get_segment/2, - export_tree/1, - import_tree/1, - valid_size/1, - keyto_segment32/1, - keyto_doublesegment32/1, - keyto_segment48/1, - generate_segmentfilter_list/2, - adjust_segmentmatch_list/3, - merge_binaries/2, - join_segment/2, - match_segment/2, - tictac_hash/2 % called by kv_index_tictactree - ]). + new_tree/1, + new_tree/2, + new_tree/3, + add_kv/4, + add_kv/5, + alter_segment/3, + find_dirtyleaves/2, + find_dirtysegments/2, + fetch_root/1, + fetch_leaves/2, + merge_trees/2, + get_segment/2, + export_tree/1, + import_tree/1, + valid_size/1, + keyto_segment32/1, + keyto_doublesegment32/1, + keyto_segment48/1, + generate_segmentfilter_list/2, + adjust_segmentmatch_list/3, + merge_binaries/2, + join_segment/2, + match_segment/2, + % called by kv_index_tictactree + tictac_hash/2 +]). -define(HASH_SIZE, 4). -define(L2_CHUNKSIZE, 256). -define(L2_BITSIZE, 8). %% UNSUUPPORTED tree sizes for accelerated segment filtering --define(XXSMALL, 16). --define(XSMALL, 64). +-define(XXSMALL, 16). +-define(XSMALL, 64). %% SUPPORTED tree sizes for accelerated segment filtering -define(SMALL, 256). @@ -92,19 +93,19 @@ -define(LARGE, 4096). -define(XLARGE, 16384). - -define(EMPTY, <<0:8/integer>>). -define(VALID_SIZES, [xxsmall, xsmall, small, medium, large, xlarge]). --record(tictactree, {treeID :: any(), - size :: tree_size(), - width :: integer(), - segment_count :: integer(), - level1 :: level1_map(), - level2 :: array:array(binary()) - }). +-record(tictactree, { + treeID :: any(), + size :: tree_size(), + width :: integer(), + segment_count :: integer(), + level1 :: level1_map(), + level2 :: array:array(binary()) +}). --type level1_map() :: #{non_neg_integer() => binary()}|binary(). +-type level1_map() :: #{non_neg_integer() => binary()} | binary(). -type tictactree() :: #tictactree{}. @@ -112,17 +113,16 @@ {segment_hash, non_neg_integer(), non_neg_integer(), non_neg_integer()}. -type tree_extract() :: {binary(), integer(), integer(), integer(), binary()}. --type tree_size() :: - xxsmall|xsmall|small|medium|large|xlarge. --type bin_extract_fun() - :: - fun((term(), term()) -> - {binary(), binary()|{is_hash, non_neg_integer()}} +-type tree_size() :: + xxsmall | xsmall | small | medium | large | xlarge. +%% erlfmt:ignore - issues with editors when function definitions are split +-type bin_extract_fun() :: + fun((term(), term()) + -> {binary(), binary() | {is_hash, non_neg_integer()}} ). -export_type([tictactree/0, segment48/0, tree_size/0]). - %%%============================================================================ %%% External functions %%%============================================================================ @@ -138,7 +138,7 @@ valid_size(Size) -> %% Create a new tree, zeroed out. new_tree(TreeID) -> new_tree(TreeID, small). - + -spec new_tree(any(), tree_size()) -> tictactree(). new_tree(TreeID, Size) -> new_tree(TreeID, Size, true). @@ -161,42 +161,39 @@ new_tree(TreeID, Size, UseMap) -> width = Width, segment_count = Width * ?L2_CHUNKSIZE, level1 = Lv1Init, - % array values are indeed all binaries - % eqwalizer:ignore + % array values are indeed all binaries + % eqwalizer:ignore level2 = Lv2Init }. -spec export_tree(tictactree()) -> {struct, list()}. %% @doc -%% Export the tree into a tuple list, with the level1 binary, and then for +%% Export the tree into a tuple list, with the level1 binary, and then for %% level2 {branchID, binary()} export_tree(Tree) -> - EncodeL2Fun = + EncodeL2Fun = fun(X, L2Acc) -> L2Element = zlib:compress(array:get(X, Tree#tictactree.level2)), - [{integer_to_binary(X), base64:encode_to_string(L2Element)}|L2Acc] + [{integer_to_binary(X), base64:encode_to_string(L2Element)} | L2Acc] end, - L2 = + L2 = lists:foldl(EncodeL2Fun, [], lists:seq(0, Tree#tictactree.width - 1)), - {struct, - [{<<"level1">>, - base64:encode_to_string( - from_level1_map(Tree#tictactree.level1) - ) - }, - {<<"level2">>, - {struct, lists:reverse(L2)} - } - ] - }. + {struct, [ + {<<"level1">>, + base64:encode_to_string( + from_level1_map(Tree#tictactree.level1) + )}, + {<<"level2">>, {struct, lists:reverse(L2)}} + ]}. -spec import_tree({struct, list()}) -> tictactree(). %% @doc %% Reverse the export process import_tree(ExportedTree) -> - {struct, - [{<<"level1">>, L1Base64}, - {<<"level2">>, {struct, L2List}}]} = ExportedTree, + {struct, [ + {<<"level1">>, L1Base64}, + {<<"level2">>, {struct, L2List}} + ]} = ExportedTree, L1Bin = base64:decode(L1Base64), Sizes = lists:map( @@ -205,10 +202,10 @@ import_tree(ExportedTree) -> ), Width = byte_size(L1Bin) div ?HASH_SIZE, {Size, _Width} = lists:keyfind(Width, 2, Sizes), - %% assert that side is indeed the provided width + %% assert that side is indeed the provided width true = get_size(Size) == Width, Lv2Init = array:new([{size, Width}, {default, ?EMPTY}]), - FoldFun = + FoldFun = fun({X, EncodedL2SegBin}, L2Array) -> L2SegBin = zlib:uncompress(base64:decode(EncodedL2SegBin)), array:set(binary_to_integer(X), L2SegBin, L2Array) @@ -221,16 +218,15 @@ import_tree(ExportedTree) -> width = Width, segment_count = Width * ?L2_CHUNKSIZE, level1 = to_level1_map(L1Bin), - % array values are indeed all binaries - % eqwalizer:ignore + % array values are indeed all binaries + % eqwalizer:ignore level2 = Lv2 }. - -spec add_kv(tictactree(), term(), term(), bin_extract_fun()) -> tictactree(). %% @doc -%% Add a Key and value to a tictactree using the BinExtractFun to extract a -%% binary from the Key and value from which to generate the hash. The +%% Add a Key and value to a tictactree using the BinExtractFun to extract a +%% binary from the Key and value from which to generate the hash. The %% BinExtractFun will also need to do any canonicalisation necessary to make %% the hash consistent (such as whitespace removal, or sorting) add_kv(TicTacTree, Key, Value, BinExtractFun) -> @@ -247,9 +243,9 @@ add_kv(TicTacTree, Key, Value, BinExtractFun, true) -> {BinK, BinV} = BinExtractFun(Key, Value), {SegHash, SegChangeHash} = tictac_hash(BinK, BinV), Segment = get_segment(SegHash, TicTacTree#tictactree.segment_count), - - {SegLeaf1, SegLeaf2, L1Extract, L2Extract} - = extract_segment(Segment, TicTacTree), + + {SegLeaf1, SegLeaf2, L1Extract, L2Extract} = + extract_segment(Segment, TicTacTree), SegLeaf2Upd = SegLeaf2 bxor SegChangeHash, SegLeaf1Upd = SegLeaf1 bxor SegChangeHash, @@ -267,9 +263,9 @@ add_kv(TicTacTree, Key, Value, BinExtractFun, false) -> %% Replace the value of a segment in the tree with a new value - for example %% to be used in partial rebuilds of trees alter_segment(Segment, Hash, Tree) -> - {SegLeaf1, SegLeaf2, L1Extract, L2Extract} - = extract_segment(Segment, Tree), - + {SegLeaf1, SegLeaf2, L1Extract, L2Extract} = + extract_segment(Segment, Tree), + SegLeaf1Upd = SegLeaf1 bxor SegLeaf2 bxor Hash, replace_segment(SegLeaf1Upd, Hash, L1Extract, L2Extract, Tree). @@ -280,12 +276,12 @@ alter_segment(Segment, Hash, Tree) -> find_dirtyleaves(SrcTree, SnkTree) -> SizeSrc = SrcTree#tictactree.size, SizeSnk = SnkTree#tictactree.size, - true = SizeSrc == SizeSnk, - + true = SizeSrc == SizeSnk, + IdxList = find_dirtysegments(fetch_root(SrcTree), fetch_root(SnkTree)), SrcLeaves = fetch_leaves(SrcTree, IdxList), SnkLeaves = fetch_leaves(SnkTree, IdxList), - + FoldFun = fun(Idx, Acc) -> {_, SrcLeaf} = lists:keyfind(Idx, 1, SrcLeaves), @@ -294,10 +290,11 @@ find_dirtyleaves(SrcTree, SnkTree) -> lists:foldl( fun(X, InnerAcc) -> SegID = X + Idx * ?L2_CHUNKSIZE, - [SegID|InnerAcc] + [SegID | InnerAcc] end, Acc, - L2IdxList) + L2IdxList + ) end, %% Output not sorted, as sorted by the design of the construction process lists:foldl(FoldFun, [], IdxList). @@ -329,18 +326,18 @@ fetch_leaves(TicTacTree, BranchList) -> -spec merge_trees(tictactree(), tictactree()) -> tictactree(). %% Merge two trees providing a result that represents the combined state, %% assuming that the two trees were correctly partitioned pre-merge. If a key -%% and value has been added to both trees, then the merge will not give the +%% and value has been added to both trees, then the merge will not give the %% expected outcome. merge_trees(TreeA, TreeB) -> Size = TreeA#tictactree.size, Size = TreeB#tictactree.size, - + MergedTree = new_tree(merge, Size), - + L1A = fetch_root(TreeA), L1B = fetch_root(TreeB), NewLevel1 = merge_binaries(L1A, L1B), - + MergeFun = fun(SQN, MergeL2) -> L2A = get_level2(TreeA, SQN), @@ -354,15 +351,16 @@ merge_trees(TreeA, TreeB) -> MergedTree#tictactree.level2, lists:seq(0, MergedTree#tictactree.width - 1) ), - + MergedTree#tictactree{ level1 = to_level1_map(NewLevel1), level2 = NewLevel2 }. -spec get_segment( - integer(), - integer()|xxsmall|xsmall|small|medium|large|xlarge) -> integer(). + integer(), + integer() | xxsmall | xsmall | small | medium | large | xlarge +) -> integer(). %% @doc %% Return the segment ID for a Key. Can pass the tree size or the actual %% segment count derived from the size @@ -371,24 +369,26 @@ get_segment(Hash, SegmentCount) when is_integer(SegmentCount) -> get_segment(Hash, TreeSize) -> get_segment(Hash, ?L2_CHUNKSIZE * get_size(TreeSize)). - -spec tictac_hash(binary(), any()) -> {integer(), integer()}. %% @doc %% Hash the key and term. -%% The term can be of the form {is_hash, 32-bit integer)} to indicate the hash -%% has already been taken. If the value is not a pre-extracted hash just use +%% The term can be of the form {is_hash, 32-bit integer)} to indicate the hash +%% has already been taken. If the value is not a pre-extracted hash just use %% erlang:phash2. If an exportable hash of the value is required this should %% be managed through the add_kv ExtractFun providing a pre-prepared Hash. tictac_hash( - BinKey, {is_hash, HashedVal}) - when is_binary(BinKey), is_integer(HashedVal) -> + BinKey, {is_hash, HashedVal} +) when + is_binary(BinKey), is_integer(HashedVal) +-> {HashKeyToSeg, AltHashKey} = keyto_doublesegment32(BinKey), {HashKeyToSeg, AltHashKey bxor HashedVal}; tictac_hash(BinKey, ValToHash) when is_binary(BinKey) -> tictac_hash(BinKey, {is_hash, erlang:phash2(ValToHash)}). -spec keyto_doublesegment32( - binary()) -> {non_neg_integer(), non_neg_integer()}. + binary() +) -> {non_neg_integer(), non_neg_integer()}. %% @doc %% Used in tictac_hash/2 to provide an alternative hash of the key to bxor with %% the value, as well as the segment hash to locate the leaf of the tree to be @@ -399,10 +399,11 @@ keyto_doublesegment32(BinKey) when is_binary(BinKey) -> -spec keyto_segment32(any()) -> integer(). %% @doc -%% The first 16 bits of the segment hash used in the tictac tree should be +%% The first 16 bits of the segment hash used in the tictac tree should be %% made up of the segment ID part (which is used to accelerate queries) -keyto_segment32({segment_hash, SegmentID, ExtraHash, _AltHash}) - when is_integer(SegmentID), is_integer(ExtraHash) -> +keyto_segment32({segment_hash, SegmentID, ExtraHash, _AltHash}) when + is_integer(SegmentID), is_integer(ExtraHash) +-> (ExtraHash band 65535) bsl 16 + SegmentID; keyto_segment32(BinKey) when is_binary(BinKey) -> keyto_segment32(keyto_segment48(BinKey)); @@ -411,29 +412,28 @@ keyto_segment32(Key) -> -spec keyto_segment48(binary()) -> segment48(). %% @doc -%% Produce a segment with an Extra Hash part - for tictac use most of the +%% Produce a segment with an Extra Hash part - for tictac use most of the %% ExtraHash will be discarded keyto_segment48(BinKey) -> - <> = crypto:hash(md5, BinKey), + <> = crypto:hash(md5, BinKey), {segment_hash, SegmentID, ExtraHash, AltHash}. -spec generate_segmentfilter_list( - list(integer()), tree_size()) -> false|list(integer()). + list(integer()), tree_size() +) -> false | list(integer()). %% @doc %% Cannot accelerate segment listing for trees below certain sizes, so check %% the creation of segment filter lists with this function -generate_segmentfilter_list(_SegmentList, xxsmall) -> +generate_segmentfilter_list(_SegmentList, xxsmall) -> false; -generate_segmentfilter_list(SegmentList, xsmall) -> +generate_segmentfilter_list(SegmentList, xsmall) -> case length(SegmentList) =< 4 of true -> A0 = 1 bsl 15, A1 = 1 bsl 14, - ExpandSegFun = - fun(X, Acc) -> + ExpandSegFun = + fun(X, Acc) -> [X, X + A0, X + A1, X + A0 + A1] ++ Acc end, lists:foldr(ExpandSegFun, [], SegmentList); @@ -441,13 +441,14 @@ generate_segmentfilter_list(SegmentList, xsmall) -> false end; generate_segmentfilter_list(SegmentList, Size) -> - case lists:member(Size, ?VALID_SIZES) of + case lists:member(Size, ?VALID_SIZES) of true -> SegmentList end. -spec adjust_segmentmatch_list( - list(integer()), tree_size(), tree_size()) -> list(integer()). + list(integer()), tree_size(), tree_size() +) -> list(integer()). %% @doc %% If we have dirty segments discovered by comparing trees of size CompareSize, %% and we want to see if it matches a segment for a key which was created for a @@ -473,32 +474,37 @@ generate_segmentfilter_list(SegmentList, Size) -> adjust_segmentmatch_list(SegmentList, CompareSize, StoreSize) -> CompareSizeI = get_size(CompareSize), StoreSizeI = get_size(StoreSize), - if CompareSizeI =< StoreSizeI -> - ExpItems = StoreSizeI div CompareSizeI - 1, - ShiftFactor = round(math:log2(CompareSizeI * ?L2_CHUNKSIZE)), - ExpList = - lists:map(fun(X) -> X bsl ShiftFactor end, lists:seq(1, ExpItems)), - UpdSegmentList = - lists:foldl(fun(S, Acc) -> - L = lists:map(fun(F) -> F + S end, ExpList), - L ++ Acc - end, - [], - SegmentList), - lists:usort(UpdSegmentList ++ SegmentList) + if + CompareSizeI =< StoreSizeI -> + ExpItems = StoreSizeI div CompareSizeI - 1, + ShiftFactor = round(math:log2(CompareSizeI * ?L2_CHUNKSIZE)), + ExpList = + lists:map( + fun(X) -> X bsl ShiftFactor end, lists:seq(1, ExpItems) + ), + UpdSegmentList = + lists:foldl( + fun(S, Acc) -> + L = lists:map(fun(F) -> F + S end, ExpList), + L ++ Acc + end, + [], + SegmentList + ), + lists:usort(UpdSegmentList ++ SegmentList) end. - -spec match_segment( - {integer(), tree_size()}, {integer(), tree_size()}) -> boolean(). + {integer(), tree_size()}, {integer(), tree_size()} +) -> boolean(). %% @doc %% Does segment A match segment B - given that segment A was generated using %% Tree size A and segment B was generated using Tree Size B match_segment({SegIDA, TreeSizeA}, {SegIDB, TreeSizeB}) -> - SmallestTreeSize = + SmallestTreeSize = min(get_size(TreeSizeA), get_size(TreeSizeB)) * ?L2_CHUNKSIZE, - get_segment(SegIDA, SmallestTreeSize) - == get_segment(SegIDB, SmallestTreeSize). + get_segment(SegIDA, SmallestTreeSize) == + get_segment(SegIDB, SmallestTreeSize). -spec join_segment(integer(), integer()) -> integer(). %% @doc @@ -532,53 +538,52 @@ from_level1_map(Level1B) when is_binary(Level1B) -> Level1B. -spec extract_segment( - integer(), tictactree()) -> - {integer(), integer(), tree_extract(), tree_extract()}. + integer(), tictactree() +) -> + {integer(), integer(), tree_extract(), tree_extract()}. %% @doc %% Extract the Level 1 and Level 2 slices from a tree to prepare an update extract_segment(Segment, TicTacTree) -> Level1Pos = - (Segment bsr ?L2_BITSIZE) - band (TicTacTree#tictactree.width - 1), - + (Segment bsr ?L2_BITSIZE) band + (TicTacTree#tictactree.width - 1), + Level2Pos = Segment band (?L2_CHUNKSIZE - 1), Level2BytePos = ?HASH_SIZE * Level2Pos, Level2 = get_level2(TicTacTree, Level1Pos), - + HashIntLength = ?HASH_SIZE * 8, - <> = Level2, {PreL1, SegLeaf1, PostL1, Level1BytePos} = extract_level1_slice(TicTacTree#tictactree.level1, Level1Pos), - - {SegLeaf1, - SegLeaf2, + + {SegLeaf1, SegLeaf2, {PreL1, Level1BytePos, Level1Pos, HashIntLength, PostL1}, {PreL2, Level2BytePos, Level2Pos, HashIntLength, PostL2}}. -spec extract_level1_slice( - level1_map(), non_neg_integer()) -> - {binary(), non_neg_integer(), binary(), non_neg_integer()}. + level1_map(), non_neg_integer() +) -> + {binary(), non_neg_integer(), binary(), non_neg_integer()}. extract_level1_slice(Level1M, Level1Pos) when is_map(Level1M) -> Level1Slice = Level1Pos div 16, HashIntLength = ?HASH_SIZE * 8, Level1BytePos = ?HASH_SIZE * (Level1Pos rem 16), - <> = maps:get(Level1Slice, Level1M), {PreL1, SegLeaf1, PostL1, Level1BytePos}; extract_level1_slice(Level1B, Level1Pos) when is_binary(Level1B) -> HashIntLength = ?HASH_SIZE * 8, Level1BytePos = ?HASH_SIZE * Level1Pos, - <> = Level1B, {PreL1, SegLeaf1, PostL1, Level1BytePos}. -spec replace_level1_slice( - level1_map(), non_neg_integer(), binary()) -> level1_map(). + level1_map(), non_neg_integer(), binary() +) -> level1_map(). replace_level1_slice(Level1M, Level1Pos, Level1Upd) when is_map(Level1M) -> Level1Slice = Level1Pos div 16, maps:put(Level1Slice, Level1Upd, Level1M); @@ -586,39 +591,41 @@ replace_level1_slice(Level1B, _Level1Pos, Level1Upd) when is_binary(Level1B) -> Level1Upd. -spec replace_segment( - integer(), integer(), tree_extract(), tree_extract(), tictactree()) -> - tictactree(). + integer(), integer(), tree_extract(), tree_extract(), tictactree() +) -> + tictactree(). %% @doc %% Replace a slice of a tree replace_segment(L1Hash, L2Hash, L1Extract, L2Extract, TicTacTree) -> {PreL1, Level1BytePos, Level1Pos, HashIntLength, PostL1} = L1Extract, {PreL2, Level2BytePos, _Level2Pos, HashIntLength, PostL2} = L2Extract, - Level1Upd = <>, - Level2Upd = <>, + Level1Upd = + <>, + Level2Upd = + <>, TicTacTree#tictactree{ level1 = replace_level1_slice( TicTacTree#tictactree.level1, Level1Pos, Level1Upd ), - level2 = array:set(Level1Pos, Level2Upd, TicTacTree#tictactree.level2)}. + level2 = array:set(Level1Pos, Level2Upd, TicTacTree#tictactree.level2) + }. get_level2(TicTacTree, L1Pos) -> - case array:get(L1Pos, TicTacTree#tictactree.level2) of + case array:get(L1Pos, TicTacTree#tictactree.level2) of ?EMPTY -> Lv2SegBinSize = ?L2_CHUNKSIZE * ?HASH_SIZE * 8, <<0:Lv2SegBinSize/integer>>; SrcL2 -> - SrcL2 + SrcL2 end. get_size(Size) -> case Size of - xxsmall -> + xxsmall -> ?XXSMALL; xsmall -> ?XSMALL; @@ -632,7 +639,6 @@ get_size(Size) -> ?XLARGE end. - segmentcompare(SrcBin, SinkBin) when byte_size(SrcBin) == byte_size(SinkBin) -> segmentcompare(SrcBin, SinkBin, [], 0); segmentcompare(<<>>, SinkBin) -> @@ -648,10 +654,10 @@ segmentcompare(SrcBin, SnkBin, Acc, Counter) -> <> = SrcBin, <> = SnkBin, case SrcHash of - H when H == SnkHash -> + H when H == SnkHash -> segmentcompare(SrcTail, SnkTail, Acc, Counter + 1); _ -> - segmentcompare(SrcTail, SnkTail, [Counter|Acc], Counter + 1) + segmentcompare(SrcTail, SnkTail, [Counter | Acc], Counter + 1) end. merge_binaries(BinA, BinB) -> @@ -680,7 +686,7 @@ checktree(Level1Bin, TicTacTree, Counter) -> <> = Level1Bin, L2Bin = get_level2(TicTacTree, Counter), true = TopHash == segmentsummarise(L2Bin, 0), - checktree(Tail, TicTacTree, Counter + 1). + checktree(Tail, TicTacTree, Counter + 1). segmentsummarise(<<>>, L1Acc) -> L1Acc; @@ -703,7 +709,7 @@ simple_bysize_test_allsizes() -> simple_test_withsize(Size) -> ?assertMatch(true, valid_size(Size)), BinFun = fun(K, V) -> {leveled_util:t2b(K), leveled_util:t2b(V)} end, - + K1 = {o, "B1", "K1", null}, K2 = {o, "B1", "K2", null}, K3 = {o, "B1", "K3", null}, @@ -730,7 +736,7 @@ simple_test_withsize(Size) -> ?assertMatch(false, Tree1#tictactree.level1 == Tree2#tictactree.level1), ?assertMatch(false, Tree2#tictactree.level1 == Tree3#tictactree.level1), ?assertMatch(false, Tree3#tictactree.level1 == Tree3A#tictactree.level1), - + Tree0X = new_tree(0, Size), Tree1X = add_kv(Tree0X, K3, {caine, 3}, BinFun), Tree2X = add_kv(Tree1X, K1, {caine, 1}, BinFun), @@ -740,14 +746,14 @@ simple_test_withsize(Size) -> ?assertMatch(false, Tree2#tictactree.level1 == Tree2X#tictactree.level1), ?assertMatch(true, Tree3#tictactree.level1 == Tree3X#tictactree.level1), ?assertMatch(true, Tree3XA#tictactree.level1 == Tree3XA#tictactree.level1), - + SC = Tree0#tictactree.segment_count, - GetSegFun = + GetSegFun = fun(TK) -> get_segment(keyto_segment32(leveled_util:t2b(TK)), SC) end, - + DL0 = find_dirtyleaves(Tree1, Tree0), ?assertMatch(true, lists:member(GetSegFun(K1), DL0)), DL1 = find_dirtyleaves(Tree3, Tree1), @@ -755,7 +761,7 @@ simple_test_withsize(Size) -> ?assertMatch(true, lists:member(GetSegFun(K2), DL1)), ?assertMatch(true, lists:member(GetSegFun(K3), DL1)), ?assertMatch(false, lists:member(GetSegFun(K1), DL1)), - + % Export and import tree to confirm no difference ExpTree3 = export_tree(Tree3), ImpTree3 = import_tree(ExpTree3), @@ -765,17 +771,21 @@ dirtyleaves_sorted_test() -> Tree0 = new_tree(test, large), KVL1 = lists:map( - fun(I) -> - {{o, to_bucket(I rem 8), to_key(I), null}, - {is_hash, erlang:phash2(integer_to_binary(I))}} + fun(I) -> + { + {o, to_bucket(I rem 8), to_key(I), null}, + {is_hash, erlang:phash2(integer_to_binary(I))} + } end, lists:seq(1, 50000) ), KVL2 = lists:map( - fun(I) -> - {{o, to_bucket(I rem 8), to_key(I), null}, - {is_hash, erlang:phash2(integer_to_binary(I))}} + fun(I) -> + { + {o, to_bucket(I rem 8), to_key(I), null}, + {is_hash, erlang:phash2(integer_to_binary(I))} + } end, lists:seq(100000, 150000) ), @@ -807,7 +817,6 @@ dirtyleaves_sorted_test() -> ?assertMatch(DL2, lists:sort(DL2)), ?assertMatch(DL1, DL2). - merge_bysize_small_test() -> merge_test_withsize(small, true, true), merge_test_withsize(small, true, false). @@ -829,35 +838,35 @@ merge_bysize_xlarge_test2() -> merge_test_withsize(Size, T1UseMap, T2UseMap) -> BinFun = fun(K, V) -> {leveled_util:t2b(K), leveled_util:t2b(V)} end, - + TreeX0 = new_tree(0, Size, T1UseMap), TreeX1 = add_kv(TreeX0, {o, "B1", "X1", null}, {caine, 1}, BinFun), TreeX2 = add_kv(TreeX1, {o, "B1", "X2", null}, {caine, 2}, BinFun), TreeX3 = add_kv(TreeX2, {o, "B1", "X3", null}, {caine, 3}, BinFun), TreeX4 = add_kv(TreeX3, {o, "B1", "X3", null}, {caine, 4}, BinFun), - + TreeY0 = new_tree(0, Size, T2UseMap), TreeY1 = add_kv(TreeY0, {o, "B1", "Y1", null}, {caine, 101}, BinFun), TreeY2 = add_kv(TreeY1, {o, "B1", "Y2", null}, {caine, 102}, BinFun), TreeY3 = add_kv(TreeY2, {o, "B1", "Y3", null}, {caine, 103}, BinFun), TreeY4 = add_kv(TreeY3, {o, "B1", "Y3", null}, {caine, 104}, BinFun), - + TreeZ1 = add_kv(TreeX4, {o, "B1", "Y1", null}, {caine, 101}, BinFun), TreeZ2 = add_kv(TreeZ1, {o, "B1", "Y2", null}, {caine, 102}, BinFun), TreeZ3 = add_kv(TreeZ2, {o, "B1", "Y3", null}, {caine, 103}, BinFun), TreeZ4 = add_kv(TreeZ3, {o, "B1", "Y3", null}, {caine, 104}, BinFun), - + TreeM0 = merge_trees(TreeX4, TreeY4), checktree(TreeM0), ?assertMatch(true, TreeM0#tictactree.level1 == TreeZ4#tictactree.level1), - + TreeM1 = merge_trees(TreeX3, TreeY4), checktree(TreeM1), ?assertMatch(false, TreeM1#tictactree.level1 == TreeZ4#tictactree.level1). exportable_test() -> {Int1, Int2} = tictac_hash(<<"key">>, <<"value">>), - ?assertMatch({true, true}, {Int1 >= 0, Int2 >=0}). + ?assertMatch({true, true}, {Int1 >= 0, Int2 >= 0}). merge_emptytree_test() -> TreeA = new_tree("A"), @@ -871,7 +880,7 @@ alter_segment_test() -> alter_segment_tester(UseMap) -> BinFun = fun(K, V) -> {leveled_util:t2b(K), leveled_util:t2b(V)} end, - + TreeX0 = new_tree(0, small, UseMap), TreeX1 = add_kv(TreeX0, {o, "B1", "X1", null}, {caine, 1}, BinFun), TreeX2 = add_kv(TreeX1, {o, "B1", "X2", null}, {caine, 2}, BinFun), @@ -894,15 +903,14 @@ return_segment_test() -> return_segment_tester(UseMap) -> BinFun = fun(K, V) -> {leveled_util:t2b(K), leveled_util:t2b(V)} end, - + TreeX0 = new_tree(0, small, UseMap), - {TreeX1, SegID} - = add_kv(TreeX0, {o, "B1", "X1", null}, {caine, 1}, BinFun, true), + {TreeX1, SegID} = + add_kv(TreeX0, {o, "B1", "X1", null}, {caine, 1}, BinFun, true), TreeX2 = alter_segment(SegID, 0, TreeX1), ?assertMatch(1, length(compare_trees_maxonedelta(TreeX1, TreeX0))), ?assertMatch(1, length(compare_trees_maxonedelta(TreeX1, TreeX2))). - compare_trees_maxonedelta(Tree0, Tree1) -> Root1 = fetch_root(Tree1), Root0 = fetch_root(Tree0), @@ -936,7 +944,6 @@ expand_membershiplist_tester(SmallSize, LargeSize, Key) -> AdjList = adjust_segmentmatch_list([Segment1], SmallSize, LargeSize), ?assertMatch(true, lists:member(Segment2, AdjList)). - segment_expandsimple_test() -> AdjList = adjust_segmentmatch_list([1, 100], small, medium), io:format("List adjusted to ~w~n", [AdjList]), @@ -952,7 +959,6 @@ segment_expandsimple_test() -> OrigList = adjust_segmentmatch_list([1, 100], medium, medium), ?assertMatch([1, 100], OrigList). - timing_test() -> timing_tester(10000, 4, small, large), timing_tester(10000, 8, small, large), @@ -960,19 +966,20 @@ timing_test() -> timing_tester(10000, 4, small, medium), timing_tester(100000, 4, small, large). - timing_tester(KeyCount, SegCount, SmallSize, LargeSize) -> SegList = - lists:map(fun(_C) -> - rand:uniform(get_size(SmallSize) * ?L2_CHUNKSIZE - 1) - end, - lists:seq(1, SegCount)), + lists:map( + fun(_C) -> + rand:uniform(get_size(SmallSize) * ?L2_CHUNKSIZE - 1) + end, + lists:seq(1, SegCount) + ), KeyToSegFun = fun(I) -> HK = keyto_segment32(integer_to_binary(I)), {I, get_segment(HK, LargeSize)} end, - + MatchList = lists:map(KeyToSegFun, lists:seq(1, KeyCount)), {T0, Out0} = @@ -980,13 +987,17 @@ timing_tester(KeyCount, SegCount, SmallSize, LargeSize) -> {T1, Out1} = matchbysegment_check(SegList, MatchList, SmallSize, LargeSize), ?assertMatch(true, Out0 == Out1), - io:format(user, "~nCheck with KeyCount=~w SegCount=~w TreeSizes ~w ~w:~n", - [KeyCount, SegCount, SmallSize, LargeSize]), - io:format(user, - "adjust_segmentmatch_list check took ~w ms " ++ - "match_segment took ~w ms~n", - [T0, T1]). - + io:format( + user, + "~nCheck with KeyCount=~w SegCount=~w TreeSizes ~w ~w:~n", + [KeyCount, SegCount, SmallSize, LargeSize] + ), + io:format( + user, + "adjust_segmentmatch_list check took ~w ms " ++ + "match_segment took ~w ms~n", + [T0, T1] + ). adjustsegmentlist_check(SegList, MatchList, SmallSize, LargeSize) -> SW = os:timestamp(), @@ -996,23 +1007,24 @@ adjustsegmentlist_check(SegList, MatchList, SmallSize, LargeSize) -> lists:member(S, AdjList) end, OL = lists:filter(PredFun, MatchList), - {timer:now_diff(os:timestamp(), SW)/1000, OL}. + {timer:now_diff(os:timestamp(), SW) / 1000, OL}. matchbysegment_check(SegList, MatchList, SmallSize, LargeSize) -> SW = os:timestamp(), PredFun = fun({_I, S}) -> FoldFun = - fun(_SM, true) -> - true; - (SM, false) -> - match_segment({SM, SmallSize}, {S, LargeSize}) + fun + (_SM, true) -> + true; + (SM, false) -> + match_segment({SM, SmallSize}, {S, LargeSize}) end, lists:foldl(FoldFun, false, SegList) end, OL = lists:filter(PredFun, MatchList), - {timer:now_diff(os:timestamp(), SW)/1000, OL}. - + {timer:now_diff(os:timestamp(), SW) / 1000, OL}. + find_dirtysegments_withanemptytree_test() -> T1 = new_tree(t1), T2 = new_tree(t2), @@ -1024,7 +1036,6 @@ find_dirtysegments_withanemptytree_test() -> ?assertMatch(ExpectedAnswer, find_dirtysegments(<<>>, fetch_root(T3))), ?assertMatch(ExpectedAnswer, find_dirtysegments(fetch_root(T3), <<>>)). - tictac_perf_test_() -> {timeout, 120, fun tictac_perf_tester_multi/0}. @@ -1038,13 +1049,15 @@ tictac_perf_tester(KeyCount, TreeSize) -> SW0 = os:system_time(millisecond), KVL = lists:map( - fun(I) -> - {{o, to_bucket(I rem 8), to_key(I), null}, - {is_hash, erlang:phash2(integer_to_binary(I))}} + fun(I) -> + { + {o, to_bucket(I rem 8), to_key(I), null}, + {is_hash, erlang:phash2(integer_to_binary(I))} + } end, lists:seq(1, KeyCount) ), - + SW1 = os:system_time(millisecond), io:format(user, "Generating Keys took ~w milliseconds~n", [SW1 - SW0]), @@ -1062,7 +1075,7 @@ tictac_perf_tester(KeyCount, TreeSize) -> Tree, KVL ), - + SW3 = os:system_time(millisecond), io:format(user, "Loading tree took ~w milliseconds~n", [SW3 - SW2]), log_memory_footprint(), @@ -1090,9 +1103,4 @@ to_bucket(N) -> log_memory_footprint() -> io:format(user, "Memory footprint ~0p~n", [erlang:memory()]). - -endif. - - - - diff --git a/src/leveled_tree.erl b/src/leveled_tree.erl index 22aa626f..c6dd92c1 100644 --- a/src/leveled_tree.erl +++ b/src/leveled_tree.erl @@ -4,36 +4,35 @@ %% - the lack of iterator_from support in OTP16 gb_trees %% - the time to convert from/to list in gb_trees %% -%% Leveled had had a skiplist implementation previously, and this is a +%% Leveled had had a skiplist implementation previously, and this is a %% variation on that. The Treein this case is a bunch of sublists of length -%% SKIP_WIDTH with the start_keys in a gb_tree. +%% SKIP_WIDTH with the start_keys in a gb_tree. -module(leveled_tree). -include("leveled.hrl"). -export([ - from_orderedlist/2, - from_orderedset/2, - from_orderedlist/3, - from_orderedset/3, - to_list/1, - match_range/3, - search_range/4, - match/2, - search/3, - tsize/1, - empty/1 - ]). + from_orderedlist/2, + from_orderedset/2, + from_orderedlist/3, + from_orderedset/3, + to_list/1, + match_range/3, + search_range/4, + match/2, + search/3, + tsize/1, + empty/1 +]). -define(SKIP_WIDTH, 16). --type tree_type() :: tree|idxt|skpl. +-type tree_type() :: tree | idxt | skpl. -type leveled_tree() :: {tree_type(), integer(), any()}. -export_type([leveled_tree/0]). - %%%============================================================================ %%% API %%%============================================================================ @@ -46,7 +45,8 @@ from_orderedset(Table, Type) -> from_orderedlist(ets:tab2list(Table), Type, ?SKIP_WIDTH). -spec from_orderedset( - ets:tab(), tree_type(), integer()|auto) -> leveled_tree(). + ets:tab(), tree_type(), integer() | auto +) -> leveled_tree(). %% @doc %% Convert an ETS table of Keys and Values (of table type ordered_set) into a %% leveled_tree of the given type. The SkipWidth is an integer representing @@ -64,7 +64,8 @@ from_orderedlist(OrderedList, Type) -> from_orderedlist(OrderedList, Type, ?SKIP_WIDTH). -spec from_orderedlist( - list(tuple()), tree_type(), integer()|auto) -> leveled_tree(). + list(tuple()), tree_type(), integer() | auto +) -> leveled_tree(). %% @doc %% Convert a list of Keys and Values (of table type ordered_set) into a %% leveled_tree of the given type. The SkipWidth is an integer representing @@ -88,8 +89,8 @@ from_orderedlist(OrderedList, skpl, _SkipWidth) -> _ -> 4 end, {skpl, L, skpl_fromorderedlist(OrderedList, L, SkipWidth, 2)}. - --spec match(tuple()|integer(), leveled_tree()) -> none|{value, any()}. + +-spec match(tuple() | integer(), leveled_tree()) -> none | {value, any()}. %% @doc %% Return the value from a tree associated with an exact match for the given %% key. This assumes the tree contains the actual keys and values to be @@ -115,10 +116,11 @@ match(Key, {skpl, _L, SkipList}) -> lookup_match(Key, SL0). -spec search( - tuple()|integer(), + tuple() | integer(), leveled_tree(), - fun((leveled_pmanifest:manifest_entry()) -> leveled_codec:object_key())) - -> none|tuple(). + fun((leveled_pmanifest:manifest_entry()) -> leveled_codec:object_key()) +) -> + none | tuple(). %% @doc %% Search is used when the tree is a manifest of key ranges and it is necessary %% to find a rnage which may contain the key. The StartKeyFun is used if the @@ -167,9 +169,10 @@ search(Key, {skpl, _L, SkipList}, StartKeyFun) -> end. -spec match_range( - tuple()|integer()|all, - tuple()|integer()|all, - leveled_tree()) -> list(). + tuple() | integer() | all, + tuple() | integer() | all, + leveled_tree() +) -> list(). %% @doc %% Return a range of value between trees from a tree associated with an %% exact match for the given key. This assumes the tree contains the actual @@ -186,10 +189,11 @@ match_range(StartRange, EndRange, Tree) -> match_range(StartRange, EndRange, Tree, EndRangeFun). -spec match_range( - tuple()|integer()|all, - tuple()|integer()|all, + tuple() | integer() | all, + tuple() | integer() | all, leveled_tree(), - fun((term(), term(), term()) -> boolean())) -> list(). + fun((term(), term(), term()) -> boolean()) +) -> list(). %% @doc %% As match_range/3 but a function can be passed to be used when comparing the %5 EndKey with a key in the tree (such as leveled_codec:endkey_passed), where @@ -202,11 +206,12 @@ match_range(StartRange, EndRange, {skpl, _L, SkipList}, EndRangeFun) -> skpllookup_to_range(StartRange, EndRange, SkipList, EndRangeFun). -spec search_range( - tuple()|integer()|all, - tuple()|integer()|all, + tuple() | integer() | all, + tuple() | integer() | all, leveled_tree(), - fun((leveled_pmanifest:manifest_entry()) -> leveled_codec:object_key())) - -> list(). + fun((leveled_pmanifest:manifest_entry()) -> leveled_codec:object_key()) +) -> + list(). %% @doc %% Extract a range from a tree, with search used when the tree is a manifest %% of key ranges and it is necessary to find a rnage which may encapsulate the @@ -218,7 +223,7 @@ search_range(StartRange, EndRange, Tree, StartKeyFun) -> EndRangeFun = fun(ER, _FirstRHSKey, FirstRHSValue) -> StartRHSKey = StartKeyFun(FirstRHSValue), - not leveled_codec:endkey_passed(ER, StartRHSKey) + not leveled_codec:endkey_passed(ER, StartRHSKey) end, case Tree of {tree, _L, T} -> @@ -241,15 +246,14 @@ to_list({tree, _L, Tree}) -> to_list({idxt, _L, {TLI, _IDX}}) when is_tuple(TLI) -> lists:append(tuple_to_list(TLI)); to_list({skpl, _L, SkipList}) when is_list(SkipList) -> - FoldFun = + FoldFun = fun({_M, SL}, Acc) -> - [SL|Acc] + [SL | Acc] end, Lv1List = lists:reverse(lists:foldl(FoldFun, [], SkipList)), Lv0List = lists:reverse(lists:foldl(FoldFun, [], lists:append(Lv1List))), lists:append(Lv0List). - -spec tsize(leveled_tree()) -> integer(). %% @doc %% Return the count of items in a tree @@ -270,28 +274,29 @@ empty(skpl) -> %%% Internal Functions %%%============================================================================ - tree_fromorderedlist([], TmpList, _L, _SkipWidth) -> gb_trees:from_orddict(lists:reverse(TmpList)); tree_fromorderedlist(OrdList, TmpList, L, SkipWidth) -> SubLL = min(SkipWidth, L), {Head, Tail} = lists:split(SubLL, OrdList), {LastK, _LastV} = lists:last(Head), - tree_fromorderedlist(Tail, [{LastK, Head}|TmpList], L - SubLL, SkipWidth). - + tree_fromorderedlist(Tail, [{LastK, Head} | TmpList], L - SubLL, SkipWidth). + idxt_fromorderedlist([], {TmpListElements, TmpListIdx, _C}, _L, _SkipWidth) -> - {list_to_tuple(lists:reverse(TmpListElements)), - gb_trees:from_orddict(lists:reverse(TmpListIdx))}; + { + list_to_tuple(lists:reverse(TmpListElements)), + gb_trees:from_orddict(lists:reverse(TmpListIdx)) + }; idxt_fromorderedlist(OrdList, {TmpListElements, TmpListIdx, C}, L, SkipWidth) -> SubLL = min(SkipWidth, L), {Head, Tail} = lists:split(SubLL, OrdList), {LastK, _LastV} = lists:last(Head), - idxt_fromorderedlist(Tail, - {[Head|TmpListElements], - [{LastK, C}|TmpListIdx], - C + 1}, - L - SubLL, - SkipWidth). + idxt_fromorderedlist( + Tail, + {[Head | TmpListElements], [{LastK, C} | TmpListIdx], C + 1}, + L - SubLL, + SkipWidth + ). skpl_fromorderedlist(SkipList, _L, _SkipWidth, 0) -> SkipList; @@ -305,9 +310,7 @@ roll_list(KVList, L, SkipList, SkipWidth) -> SubLL = min(SkipWidth, L), {Head, Tail} = lists:split(SubLL, KVList), {LastK, _LastV} = lists:last(Head), - roll_list(Tail, L - SubLL, [{LastK, Head}|SkipList], SkipWidth). - - + roll_list(Tail, L - SubLL, [{LastK, Head} | SkipList], SkipWidth). % lookup_match(_Key, []) -> % none; @@ -319,7 +322,7 @@ roll_list(KVList, L, SkipList, SkipWidth) -> % lookup_match(Key, Tail). lookup_match(Key, KVList) -> - case lists:keyfind(Key, 1, KVList) of + case lists:keyfind(Key, 1, KVList) of false -> none; {Key, Value} -> @@ -328,9 +331,9 @@ lookup_match(Key, KVList) -> lookup_best(_Key, []) -> none; -lookup_best(Key, [{EK, EV}|_Tail]) when EK >= Key -> +lookup_best(Key, [{EK, EV} | _Tail]) when EK >= Key -> {EK, EV}; -lookup_best(Key, [_Top|Tail]) -> +lookup_best(Key, [_Top | Tail]) -> lookup_best(Key, Tail). treelookup_range_start(StartRange, EndRange, Tree, EndRangeFun) -> @@ -355,7 +358,7 @@ treelookup_range_end(EndRange, {NK0, SL0}, Iter0, Output, EndRangeFun) -> case leveled_codec:endkey_passed(EndRange, NK0) of true -> {LHS, RHS} = lists:splitwith(PredFun, SL0), - [{FirstRHSKey, FirstRHSValue}|_Rest] = RHS, + [{FirstRHSKey, FirstRHSValue} | _Rest] = RHS, case EndRangeFun(EndRange, FirstRHSKey, FirstRHSValue) of true -> Output ++ LHS ++ [{FirstRHSKey, FirstRHSValue}]; @@ -368,16 +371,18 @@ treelookup_range_end(EndRange, {NK0, SL0}, Iter0, Output, EndRangeFun) -> none -> UpdOutput; {NK1, SL1, Iter1} -> - treelookup_range_end(EndRange, - {NK1, SL1}, - Iter1, - UpdOutput, - EndRangeFun) - end + treelookup_range_end( + EndRange, + {NK1, SL1}, + Iter1, + UpdOutput, + EndRangeFun + ) + end end. idxtlookup_range_start(StartRange, EndRange, {TLI, IDX}, EndRangeFun) -> - % TLI tuple of lists, IDS is a gb_tree of End Keys mapping to tuple + % TLI tuple of lists, IDS is a gb_tree of End Keys mapping to tuple % indexes Iter0 = tree_iterator_from(StartRange, IDX), case tree_next(Iter0) of @@ -389,27 +394,29 @@ idxtlookup_range_start(StartRange, EndRange, {TLI, IDX}, EndRangeFun) -> K < StartRange end, {_LHS, RHS} = lists:splitwith(PredFun, element(ListID, TLI)), - % The RHS is the list of {EK, SK} elements where the EK >= the + % The RHS is the list of {EK, SK} elements where the EK >= the % StartRange, otherwise the LHS falls before the range - idxtlookup_range_end(EndRange, {TLI, NK, RHS}, Iter1, [], EndRangeFun) + idxtlookup_range_end( + EndRange, {TLI, NK, RHS}, Iter1, [], EndRangeFun + ) end. idxtlookup_range_end(EndRange, {TLI, NK0, SL0}, Iter0, Output, EndRangeFun) -> PredFun = fun({K, _V}) -> not leveled_codec:endkey_passed(EndRange, K) - % true if EndRange is after K + % true if EndRange is after K end, case leveled_codec:endkey_passed(EndRange, NK0) of true -> % The end key of this list is after the end of the range, so no - % longer interested in any of the rest of the tree - just this + % longer interested in any of the rest of the tree - just this % sublist {LHS, RHS} = lists:splitwith(PredFun, SL0), - % Split the {EK, SK} pairs based on the EndRange. Note that the + % Split the {EK, SK} pairs based on the EndRange. Note that the % last key is passed the end range - so the RHS cannot be empty, it % must at least include the last key (as NK0 is at the end of SL0). - [{FirstRHSKey, FirstRHSValue}|_Rest] = RHS, + [{FirstRHSKey, FirstRHSValue} | _Rest] = RHS, case EndRangeFun(EndRange, FirstRHSKey, FirstRHSValue) of true -> % The start key is not after the end of the range @@ -426,26 +433,29 @@ idxtlookup_range_end(EndRange, {TLI, NK0, SL0}, Iter0, Output, EndRangeFun) -> none -> UpdOutput; {NK1, ListID, Iter1} -> - idxtlookup_range_end(EndRange, - {TLI, NK1, element(ListID, TLI)}, - Iter1, - UpdOutput, - EndRangeFun) - end + idxtlookup_range_end( + EndRange, + {TLI, NK1, element(ListID, TLI)}, + Iter1, + UpdOutput, + EndRangeFun + ) + end end. skplfold_range([], _StartRange, _EndRange, Acc) -> Acc; -skplfold_range([{K, _SL}|Rest], StartRange, EndRange, Acc) when StartRange > K -> +skplfold_range([{K, _SL} | Rest], StartRange, EndRange, Acc) when + StartRange > K +-> skplfold_range(Rest, StartRange, EndRange, Acc); -skplfold_range([{K, SL}|Rest], StartRange, EndRange, Acc) -> +skplfold_range([{K, SL} | Rest], StartRange, EndRange, Acc) -> case leveled_codec:endkey_passed(EndRange, K) of true -> - [SL|Acc]; + [SL | Acc]; false -> - skplfold_range(Rest, StartRange, EndRange, [SL|Acc]) + skplfold_range(Rest, StartRange, EndRange, [SL | Acc]) end. - skpllookup_to_range(StartRange, EndRange, SkipList, EndRangeFun) -> Lv1List = @@ -455,7 +465,8 @@ skpllookup_to_range(StartRange, EndRange, SkipList, EndRangeFun) -> Lv0List = lists:reverse( skplfold_range( - lists:append(Lv1List), StartRange, EndRange, []) + lists:append(Lv1List), StartRange, EndRange, [] + ) ), BeforeFun = fun({K, _V}) -> @@ -470,7 +481,7 @@ skpllookup_to_range(StartRange, EndRange, SkipList, EndRangeFun) -> EndRangeFun(EndRange, K, V) end end, - + case Lv0List of [] -> []; @@ -481,14 +492,13 @@ skpllookup_to_range(StartRange, EndRange, SkipList, EndRangeFun) -> RHSofLHL = lists:dropwhile(BeforeFun, LHList), LHSofRHL = lists:takewhile(AfterFun, RHList), RHSofLHL ++ LHSofRHL; - [LHL|Rest] -> + [LHL | Rest] -> RHSofLHL = lists:dropwhile(BeforeFun, LHL), LHSofRHL = lists:takewhile(AfterFun, lists:last(Rest)), MidLists = lists:sublist(Rest, length(Rest) - 1), lists:append([RHSofLHL] ++ MidLists ++ [LHSofRHL]) end. - skpl_getsublist(Key, SkipList) -> FoldFun = fun({Mark, SL}, Acc) -> @@ -520,7 +530,6 @@ tree_next(I) -> % For OTP 16 compatibility with gb_trees next(I). - iterator_from(S, {_, T}) -> iterator_1_from(S, T). @@ -560,11 +569,13 @@ iterator(nil, As) -> -include_lib("eunit/include/eunit.hrl"). generate_randomkeys(Seqn, Count, BucketRangeLow, BucketRangeHigh) -> - generate_randomkeys(Seqn, - Count, - [], - BucketRangeLow, - BucketRangeHigh). + generate_randomkeys( + Seqn, + Count, + [], + BucketRangeLow, + BucketRangeHigh + ). generate_randomkeys(_Seqn, 0, Acc, _BucketLow, _BucketHigh) -> Acc; @@ -572,21 +583,25 @@ generate_randomkeys(Seqn, Count, Acc, BucketLow, BRange) -> BRand = rand:uniform(BRange), BNumber = lists:flatten( - io_lib:format("K~4..0B", [BucketLow + BRand])), + io_lib:format("K~4..0B", [BucketLow + BRand]) + ), KNumber = lists:flatten( - io_lib:format("K~8..0B", [rand:uniform(1000)])), + io_lib:format("K~8..0B", [rand:uniform(1000)]) + ), {K, V} = - {{o_kv, - {<<"btype">>, list_to_binary("Bucket" ++ BNumber)}, - list_to_binary("Key" ++ KNumber), - null}, - Seqn}, - generate_randomkeys(Seqn + 1, - Count - 1, - [{K, V}|Acc], - BucketLow, - BRange). + { + {o_kv, {<<"btype">>, list_to_binary("Bucket" ++ BNumber)}, + list_to_binary("Key" ++ KNumber), null}, + Seqn + }, + generate_randomkeys( + Seqn + 1, + Count - 1, + [{K, V} | Acc], + BucketLow, + BRange + ). generate_simplekeys(Seqn, Count) -> generate_simplekeys(Seqn, Count, []). @@ -597,9 +612,10 @@ generate_simplekeys(Seqn, Count, Acc) -> KNumber = list_to_binary( lists:flatten( - io_lib:format("K~8..0B", [rand:uniform(100000)]))), - generate_simplekeys(Seqn + 1, Count - 1, [{KNumber, Seqn}|Acc]). - + io_lib:format("K~8..0B", [rand:uniform(100000)]) + ) + ), + generate_simplekeys(Seqn + 1, Count - 1, [{KNumber, Seqn} | Acc]). tree_search_test() -> search_test_by_type(tree), @@ -620,7 +636,7 @@ search_test_by_type(Type) -> end, KL = lists:map(MapFun, lists:seq(1, 50)), T = from_orderedlist(KL, Type), - + StartKeyFun = fun(V) -> V end, statistics(runtime), ?assertMatch([], search_range(0, 1, T, StartKeyFun)), @@ -634,9 +650,11 @@ search_test_by_type(Type) -> ?assertMatch(49, length(search_range(4, 197, T, StartKeyFun))), ?assertMatch(48, length(search_range(5, 197, T, StartKeyFun))), {_, T1} = statistics(runtime), - io:format(user, "10 range tests with type ~w in ~w microseconds~n", - [Type, T1]). - + io:format( + user, + "10 range tests with type ~w in ~w microseconds~n", + [Type, T1] + ). tree_oor_test() -> outofrange_test_by_type(tree). @@ -661,9 +679,9 @@ outofrange_test_by_type(Type) -> ?assertMatch(none, match(97, T)), ?assertMatch(none, match(197, T)), ?assertMatch(none, match(201, T)), - + StartKeyFun = fun(V) -> V end, - + ?assertMatch(none, search(0, T, StartKeyFun)), ?assertMatch(none, search(5, T, StartKeyFun)), ?assertMatch(none, search(97, T, StartKeyFun)), @@ -688,7 +706,6 @@ tolist_test_by_type(Type) -> T = from_orderedlist(KL, Type), T_Reverse = to_list(T), ?assertMatch(KL, T_Reverse). - timing_tests_tree_test_() -> {timeout, 60, fun tree_timing/0}. @@ -724,15 +741,21 @@ log_tree_test_by_(Width, Type, N) -> KL = lists:ukeysort(1, generate_randomkeys(1, N, 1, N div 5)), SW = os:timestamp(), tree_test_by_(Width, Type, KL), - io:format(user, "Test took ~w ms", - [timer:now_diff(os:timestamp(), SW) div 1000]). + io:format( + user, + "Test took ~w ms", + [timer:now_diff(os:timestamp(), SW) div 1000] + ). log_tree_test_by_simplekey_(Width, Type, N) -> KL = lists:ukeysort(1, generate_simplekeys(1, N)), SW = os:timestamp(), tree_test_by_(Width, Type, KL, false), - io:format(user, "Test with simple key took ~w ms", - [timer:now_diff(os:timestamp(), SW) div 1000]). + io:format( + user, + "Test with simple key took ~w ms", + [timer:now_diff(os:timestamp(), SW) div 1000] + ). tree_test_by_(Width, Type, KL) -> tree_test_by_(Width, Type, KL, true). @@ -741,72 +764,98 @@ tree_test_by_(Width, Type, KL, ComplexKey) -> io:format( user, "~n~nTree test with complexkey=~w for type and width: ~w ~w~n", - [ComplexKey, Type, Width]), + [ComplexKey, Type, Width] + ), OS = ets:new(test, [ordered_set, private]), ets:insert(OS, KL), SWaETS = os:timestamp(), Tree0 = from_orderedset(OS, Type, Width), - io:format(user, "Generating tree from ETS in ~w microseconds" ++ - " of size ~w~n", - [timer:now_diff(os:timestamp(), SWaETS), - tsize(Tree0)]), - io:format(user, + io:format( + user, + "Generating tree from ETS in ~w microseconds" ++ + " of size ~w~n", + [ + timer:now_diff(os:timestamp(), SWaETS), + tsize(Tree0) + ] + ), + io:format( + user, "Tree has footprint size ~w bytes flat_size ~w bytes~n", - [erts_debug:size(Tree0) * 8, erts_debug:flat_size(Tree0) * 8]), - + [erts_debug:size(Tree0) * 8, erts_debug:flat_size(Tree0) * 8] + ), + SWaGSL = os:timestamp(), Tree1 = from_orderedlist(KL, Type, Width), - io:format(user, "Generating tree from orddict in ~w microseconds" ++ - " of size ~w~n", - [timer:now_diff(os:timestamp(), SWaGSL), - tsize(Tree1)]), - io:format(user, + io:format( + user, + "Generating tree from orddict in ~w microseconds" ++ + " of size ~w~n", + [ + timer:now_diff(os:timestamp(), SWaGSL), + tsize(Tree1) + ] + ), + io:format( + user, "Tree has footprint size ~w bytes flat_size ~w bytes~n", - [erts_debug:size(Tree1) * 8, erts_debug:flat_size(Tree1) * 8]), + [erts_debug:size(Tree1) * 8, erts_debug:flat_size(Tree1) * 8] + ), SWaLUP = os:timestamp(), lists:foreach(match_fun(Tree0), KL), lists:foreach(match_fun(Tree1), KL), - io:format(user, "Looked up all keys twice in ~w microseconds~n", - [timer:now_diff(os:timestamp(), SWaLUP)]), - + io:format( + user, + "Looked up all keys twice in ~w microseconds~n", + [timer:now_diff(os:timestamp(), SWaLUP)] + ), + ?assertMatch(Tree0, Tree1), - + SWaSRCH1 = os:timestamp(), lists:foreach(search_exactmatch_fun(Tree0), KL), lists:foreach(search_exactmatch_fun(Tree1), KL), - io:format(user, "Search all keys twice for exact match in ~w microseconds~n", - [timer:now_diff(os:timestamp(), SWaSRCH1)]), - + io:format( + user, + "Search all keys twice for exact match in ~w microseconds~n", + [timer:now_diff(os:timestamp(), SWaSRCH1)] + ), + BitBiggerKeyFun = case ComplexKey of true -> fun(Idx) -> {K, _V} = lists:nth(Idx, KL), {o_kv, B, FullKey, null} = K, - {{o_kv, - B, - list_to_binary(binary_to_list(FullKey) ++ "0"), - null}, - lists:nth(Idx + 1, KL)} + { + {o_kv, B, + list_to_binary(binary_to_list(FullKey) ++ "0"), + null}, + lists:nth(Idx + 1, KL) + } end; false -> fun(Idx) -> {K, _V} = lists:nth(Idx, KL), - {list_to_binary(binary_to_list(K) ++ "0"), - lists:nth(Idx + 1, KL)} + { + list_to_binary(binary_to_list(K) ++ "0"), + lists:nth(Idx + 1, KL) + } end - end, - + end, + SrchKL = lists:map(BitBiggerKeyFun, lists:seq(1, length(KL) - 1)), - + SWaSRCH2 = os:timestamp(), lists:foreach(search_nearmatch_fun(Tree0), SrchKL), lists:foreach(search_nearmatch_fun(Tree1), SrchKL), - io:format(user, "Search all keys twice for near match in ~w microseconds~n", - [timer:now_diff(os:timestamp(), SWaSRCH2)]). - + io:format( + user, + "Search all keys twice for near match in ~w microseconds~n", + [timer:now_diff(os:timestamp(), SWaSRCH2)] + ). tree_matchrange_test() -> matchrange_test_by_type(tree), @@ -820,29 +869,32 @@ skpl_matchrange_test() -> matchrange_test_by_type(skpl), extra_matchrange_test_by_type(skpl). - matchrange_test_by_type(Type) -> N = 4000, KL = lists:ukeysort(1, generate_randomkeys(1, N, 1, N div 5)), Tree0 = from_orderedlist(KL, Type), - + FirstKey = element(1, lists:nth(1, KL)), FinalKey = element(1, lists:last(KL)), PenultimateKey = element(1, lists:nth(length(KL) - 1, KL)), AfterFirstKey = - setelement(3, - FirstKey, - list_to_binary(binary_to_list(element(3, FirstKey)) ++ "0")), + setelement( + 3, + FirstKey, + list_to_binary(binary_to_list(element(3, FirstKey)) ++ "0") + ), AfterPenultimateKey = - setelement(3, - PenultimateKey, - list_to_binary(binary_to_list(element(3, PenultimateKey)) ++ "0")), - + setelement( + 3, + PenultimateKey, + list_to_binary(binary_to_list(element(3, PenultimateKey)) ++ "0") + ), + LengthR = fun(SK, EK, T) -> length(match_range(SK, EK, T)) end, - + KL_Length = length(KL), io:format("KL_Length ~w~n", [KL_Length]), ?assertMatch(KL_Length, LengthR(FirstKey, FinalKey, Tree0)), @@ -860,10 +912,12 @@ extra_matchrange_test_by_type(Type) -> Tree0 = from_orderedlist(KL, Type), SubL = lists:sublist(KL, 2000, 3100), - RangeLists = - lists:map(fun(P) -> lists:sublist(SubL, P, P + 50) end, - lists:seq(1, 50)), - TestRangeLFun = + RangeLists = + lists:map( + fun(P) -> lists:sublist(SubL, P, P + 50) end, + lists:seq(1, 50) + ), + TestRangeLFun = fun(RangeL) -> SKeyV = lists:nth(1, RangeL), EKeyV = lists:nth(50, RangeL), @@ -885,15 +939,15 @@ extra_searchrange_test_by_type(Type) -> Tree0 = from_orderedlist(SearchKL, Type), SubL = lists:sublist(KL, 2000, 3100), - + SKFun = fun(V) -> V end, - - TestRangeLFun = + + TestRangeLFun = fun(P) -> RangeL = lists:sublist(SubL, P, P + 50), - % If P is odd, the range keys will be between a start key and an - % end key. - % If P is even, the range keys will be between an end key and a + % If P is odd, the range keys will be between a start key and an + % end key. + % If P is even, the range keys will be between an end key and a % start key SKeyV = lists:nth(1, RangeL), EKeyV = lists:nth(50, RangeL), @@ -903,16 +957,18 @@ extra_searchrange_test_by_type(Type) -> {o_kv, SB, list_to_binary(binary_to_list(SK) ++ "0"), null}, BRangeK = {o_kv, EB, list_to_binary(binary_to_list(EK) ++ "0"), null}, - ?assertMatch(25, length(search_range(FRangeK, BRangeK, Tree0, SKFun))) + ?assertMatch( + 25, length(search_range(FRangeK, BRangeK, Tree0, SKFun)) + ) end, lists:foreach(TestRangeLFun, lists:seq(1, 50)). convertkeylist(KeyList, Acc) when length(KeyList) < 2 -> lists:reverse(Acc); -convertkeylist(KeyList, Acc) -> - [{SK, _SV}|OddTail] = KeyList, - [{EK, _EV}|EvenTail] = OddTail, - convertkeylist(EvenTail, [{EK, SK}|Acc]). +convertkeylist(KeyList, Acc) -> + [{SK, _SV} | OddTail] = KeyList, + [{EK, _EV} | EvenTail] = OddTail, + convertkeylist(EvenTail, [{EK, SK} | Acc]). match_fun(Tree) -> fun({K, V}) -> @@ -940,22 +996,27 @@ empty_test() -> ?assertMatch(0, tsize(T2)). search_range_idx_test() -> - Tree = - {idxt,1, - {{[{{o_rkv,<<"Bucket1">>,<<"Key1">>,null}, - leveled_pmanifest:new_entry( - {o_rkv, <<"Bucket">>, <<"Key9083">>, null}, + Tree = + {idxt, 1, { + {[ + { {o_rkv, <<"Bucket1">>, <<"Key1">>, null}, - list_to_pid("<0.320.0>"), - "./16_1_6.sst", - none - )}]}, - {1, {{o_rkv, <<"Bucket1">>, <<"Key1">>, null}, 1, nil, nil}}}}, + leveled_pmanifest:new_entry( + {o_rkv, <<"Bucket">>, <<"Key9083">>, null}, + {o_rkv, <<"Bucket1">>, <<"Key1">>, null}, + list_to_pid("<0.320.0>"), + "./16_1_6.sst", + none + ) + } + ]}, + {1, {{o_rkv, <<"Bucket1">>, <<"Key1">>, null}, 1, nil, nil}} + }}, R = search_range( - {o_rkv, <<"Bucket">>, null, null}, - {o_rkv, <<"Bucket">>, null, null}, - Tree, + {o_rkv, <<"Bucket">>, null, null}, + {o_rkv, <<"Bucket">>, null, null}, + Tree, fun leveled_pmanifest:entry_startkey/1 ), ?assertMatch(1, length(R)). diff --git a/src/leveled_util.erl b/src/leveled_util.erl index 4b21166e..e15ddd67 100644 --- a/src/leveled_util.erl +++ b/src/leveled_util.erl @@ -6,16 +6,16 @@ -module(leveled_util). -export([ - generate_uuid/0, - integer_now/0, - integer_time/1, - magic_hash/1, - t2b/1, - safe_rename/4, - regex_run/3, - regex_compile/1, - regex_compile/2 - ]). + generate_uuid/0, + integer_now/0, + integer_time/1, + magic_hash/1, + t2b/1, + safe_rename/4, + regex_run/3, + regex_compile/1, + regex_compile/2 +]). -define(WRITE_OPS, [binary, raw, read, write]). @@ -26,8 +26,10 @@ %% https://github.com/afiskon/erlang-uuid-v4/blob/master/src/uuid.erl generate_uuid() -> <> = crypto:strong_rand_bytes(16), - L = io_lib:format("~8.16.0b-~4.16.0b-4~3.16.0b-~4.16.0b-~12.16.0b", - [A, B, C band 16#0fff, D band 16#3fff bor 16#8000, E]), + L = io_lib:format( + "~8.16.0b-~4.16.0b-4~3.16.0b-~4.16.0b-~12.16.0b", + [A, B, C band 16#0fff, D band 16#3fff bor 16#8000, E] + ), binary_to_list(list_to_binary(L)). -spec integer_now() -> non_neg_integer(). @@ -36,19 +38,18 @@ generate_uuid() -> integer_now() -> integer_time(os:timestamp()). --spec integer_time (erlang:timestamp()) -> non_neg_integer(). +-spec integer_time(erlang:timestamp()) -> non_neg_integer(). %% @doc %% Return a given time in gergorian seconds integer_time(TS) -> DT = calendar:now_to_universal_time(TS), calendar:datetime_to_gregorian_seconds(DT). - -type match_option() :: - caseless | - {offset, non_neg_integer()} | - {capture, value_spec()} | - {capture, value_spec(), value_spec_type()}. + caseless + | {offset, non_neg_integer()} + | {capture, value_spec()} + | {capture, value_spec(), value_spec_type()}. -type value_spec() :: all | all_but_first | first | none | [value_id()]. -type value_spec_type() :: binary. @@ -56,12 +57,13 @@ integer_time(TS) -> -type match_index() :: {non_neg_integer(), non_neg_integer()}. -spec regex_run( - iodata(), leveled_codec:actual_regex(), list(match_option())) -> - match | - nomatch | - {match, list(match_index())} | - {match, list(binary())} | - {error, atom()}. + iodata(), leveled_codec:actual_regex(), list(match_option()) +) -> + match + | nomatch + | {match, list(match_index())} + | {match, list(binary())} + | {error, atom()}. regex_run(Subject, CompiledPCRE, Opts) -> re:run(Subject, CompiledPCRE, Opts). @@ -73,7 +75,7 @@ regex_compile(PlainRegex, pcre) -> re:compile(PlainRegex). -spec magic_hash(any()) -> 0..16#FFFFFFFF. -%% @doc +%% @doc %% Use DJ Bernstein magic hash function. Note, this is more expensive than %% phash2 but provides a much more balanced result. %% @@ -87,14 +89,13 @@ magic_hash(AnyKey) -> BK = t2b(AnyKey), magic_hash({binary, BK}). -hash1(H, <<>>) -> +hash1(H, <<>>) -> H; hash1(H, <>) -> H1 = (H * 33) band 16#FFFFFFFF, H2 = H1 bxor B, hash1(H2, Rest). - -spec t2b(term()) -> binary(). %% @doc %% term_to_binary with options necessary to ensure backwards compatability @@ -105,7 +106,6 @@ hash1(H, <>) -> t2b(Term) -> term_to_binary(Term, [{minor_version, 1}]). - -spec safe_rename(string(), string(), binary(), boolean()) -> ok. %% @doc %% Write a file, sync it and rename it (and for super-safe mode read it back) @@ -150,7 +150,6 @@ magichashperf_test() -> {TimeMH2, HL1} = timer:tc(lists, map, [fun(K) -> magic_hash(K) end, KL]), io:format(user, "1000 keys magic hashed in ~w microseconds~n", [TimeMH2]). - safe_rename_test() -> ok = filelib:ensure_dir(?TEST_AREA), TempFN = filename:join(?TEST_AREA, "test_manifest0.pnd"), @@ -162,5 +161,4 @@ safe_rename_test() -> ok = safe_rename(TempFN1, RealFN1, <<2:128/integer>>, true), ?assertMatch({ok, <<2:128/integer>>}, file:read_file(RealFN1)). - -endif. diff --git a/test/end_to_end/appdefined_SUITE.erl b/test/end_to_end/appdefined_SUITE.erl index 906249ac..3855594b 100644 --- a/test/end_to_end/appdefined_SUITE.erl +++ b/test/end_to_end/appdefined_SUITE.erl @@ -2,17 +2,18 @@ -include("leveled.hrl"). -export([all/0, init_per_suite/1, end_per_suite/1]). -export([ - application_defined_tag/1, - bespoketag_recalc/1 - ]). + application_defined_tag/1, + bespoketag_recalc/1 +]). -all() -> [ - application_defined_tag, - bespoketag_recalc - ]. +all() -> + [ + application_defined_tag, + bespoketag_recalc + ]. init_per_suite(Config) -> - testutil:init_per_suite([{suite, "appdefined"}|Config]), + testutil:init_per_suite([{suite, "appdefined"} | Config]), Config. end_per_suite(Config) -> @@ -21,18 +22,22 @@ end_per_suite(Config) -> application_defined_tag(_Config) -> T1 = os:timestamp(), application_defined_tag_tester(40000, ?STD_TAG, [], false), - io:format("Completed with std tag in ~w ms~n", - [timer:now_diff(os:timestamp(), T1)/1000]), - + io:format( + "Completed with std tag in ~w ms~n", + [timer:now_diff(os:timestamp(), T1) / 1000] + ), + T2 = os:timestamp(), application_defined_tag_tester(40000, bespoke_tag1, [], false), - io:format("Completed with app tag but not function in ~w ms~n", - [timer:now_diff(os:timestamp(), T2)/1000]), - - ExtractMDFun = + io:format( + "Completed with app tag but not function in ~w ms~n", + [timer:now_diff(os:timestamp(), T2) / 1000] + ), + + ExtractMDFun = fun(Tag, Size, Obj) -> - [{hash, Hash}, {shard, Shard}, {random, Random}, {value, _V}] - = Obj, + [{hash, Hash}, {shard, Shard}, {random, Random}, {value, _V}] = + Obj, case Tag of bespoke_tag1 -> {{Hash, Size, [{shard, Shard}, {random, Random}]}, []}; @@ -40,42 +45,56 @@ application_defined_tag(_Config) -> {{Hash, Size, [{shard, Shard}]}, [os:timestamp()]} end end, - + T3 = os:timestamp(), - application_defined_tag_tester(40000, ?STD_TAG, - [{extract_metadata, ExtractMDFun}], - false), - io:format("Completed with std tag and override function in ~w ms~n", - [timer:now_diff(os:timestamp(), T3)/1000]), - + application_defined_tag_tester( + 40000, + ?STD_TAG, + [{extract_metadata, ExtractMDFun}], + false + ), + io:format( + "Completed with std tag and override function in ~w ms~n", + [timer:now_diff(os:timestamp(), T3) / 1000] + ), + T4 = os:timestamp(), - application_defined_tag_tester(40000, bespoke_tag1, - [{extract_metadata, ExtractMDFun}], - true), - io:format("Completed with app tag and override function in ~w ms~n", - [timer:now_diff(os:timestamp(), T4)/1000]), - + application_defined_tag_tester( + 40000, + bespoke_tag1, + [{extract_metadata, ExtractMDFun}], + true + ), + io:format( + "Completed with app tag and override function in ~w ms~n", + [timer:now_diff(os:timestamp(), T4) / 1000] + ), T5 = os:timestamp(), - application_defined_tag_tester(40000, bespoke_tag2, - [{extract_metadata, ExtractMDFun}], - true), - io:format("Completed with app tag and override function in ~w ms~n", - [timer:now_diff(os:timestamp(), T5)/1000]). - + application_defined_tag_tester( + 40000, + bespoke_tag2, + [{extract_metadata, ExtractMDFun}], + true + ), + io:format( + "Completed with app tag and override function in ~w ms~n", + [timer:now_diff(os:timestamp(), T5) / 1000] + ). application_defined_tag_tester(KeyCount, Tag, Functions, ExpectMD) -> RootPath = testutil:reset_filestructure(), - StartOpts1 = [{root_path, RootPath}, - {sync_strategy, testutil:sync_strategy()}, - {log_level, warn}, - {reload_strategy, - [{bespoke_tag1, retain}, {bespoke_tag2, retain}]}, - {override_functions, Functions}], + StartOpts1 = [ + {root_path, RootPath}, + {sync_strategy, testutil:sync_strategy()}, + {log_level, warn}, + {reload_strategy, [{bespoke_tag1, retain}, {bespoke_tag2, retain}]}, + {override_functions, Functions} + ], {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), Value = crypto:strong_rand_bytes(512), - MapFun = - fun(C) -> + MapFun = + fun(C) -> {C, object_generator(C, Value)} end, CBKVL = lists:map(MapFun, lists:seq(1, KeyCount)), @@ -101,12 +120,12 @@ application_defined_tag_tester(KeyCount, Tag, Functions, ExpectMD) -> true = {shard, C rem 10} == lists:keyfind(shard, 1, MD); false -> - true = + true = undefined == MD end end end, - + lists:foreach(CheckFun(Bookie1), CBKVL), ok = leveled_bookie:book_close(Bookie1), @@ -116,18 +135,17 @@ application_defined_tag_tester(KeyCount, Tag, Functions, ExpectMD) -> ok = leveled_bookie:book_close(Bookie2). - object_generator(Count, V) -> Hash = erlang:phash2({count, V}), Random = rand:uniform(1000), Key = list_to_binary(leveled_util:generate_uuid()), Bucket = <<"B">>, - {Bucket, - Key, - [{hash, Hash}, {shard, Count rem 10}, - {random, Random}, {value, V}]}. - - + {Bucket, Key, [ + {hash, Hash}, + {shard, Count rem 10}, + {random, Random}, + {value, V} + ]}. bespoketag_recalc(_Config) -> %% Get a sensible behaviour using the recalc compaction strategy with a @@ -137,13 +155,12 @@ bespoketag_recalc(_Config) -> B0 = <<"B0">>, KeyCount = 7000, - ExtractMDFun = + ExtractMDFun = fun(bespoke_tag, Size, Obj) -> [{index, IL}, {value, _V}] = Obj, - {{erlang:phash2(term_to_binary(Obj)), - Size, - {index, IL}}, - [os:timestamp()]} + {{erlang:phash2(term_to_binary(Obj)), Size, {index, IL}}, [ + os:timestamp() + ]} end, CalcIndexFun = fun(bespoke_tag, UpdMeta, PrvMeta) -> @@ -160,31 +177,41 @@ bespoketag_recalc(_Config) -> lists:map(fun(I) -> {add, <<"temp_int">>, I} end, IndexDeltas) end, - BookOpts = [{root_path, RootPath}, - {cache_size, 1000}, - {max_journalobjectcount, 6000}, - {max_pencillercachesize, 8000}, - {sync_strategy, testutil:sync_strategy()}, - {reload_strategy, [{bespoke_tag, recalc}]}, - {override_functions, - [{extract_metadata, ExtractMDFun}, - {diff_indexspecs, CalcIndexFun}]}], - + BookOpts = [ + {root_path, RootPath}, + {cache_size, 1000}, + {max_journalobjectcount, 6000}, + {max_pencillercachesize, 8000}, + {sync_strategy, testutil:sync_strategy()}, + {reload_strategy, [{bespoke_tag, recalc}]}, + {override_functions, [ + {extract_metadata, ExtractMDFun}, + {diff_indexspecs, CalcIndexFun} + ]} + ], + {ok, Book1} = leveled_bookie:book_start(BookOpts), LoadFun = fun(Book, MustFind) -> fun(I) -> - testutil:stdload_object(Book, - B0, integer_to_binary(I rem KeyCount), - I, erlang:phash2({value, I}), - infinity, bespoke_tag, false, MustFind) + testutil:stdload_object( + Book, + B0, + integer_to_binary(I rem KeyCount), + I, + erlang:phash2({value, I}), + infinity, + bespoke_tag, + false, + MustFind + ) end end, lists:foreach(LoadFun(Book1, false), lists:seq(1, KeyCount)), lists:foreach(LoadFun(Book1, true), lists:seq(KeyCount + 1, KeyCount * 2)), FoldFun = - fun(_B0, {IV0, _K0}, Acc) -> + fun(_B0, {IV0, _K0}, Acc) -> case IV0 - 1 of Acc -> Acc + 1; @@ -196,23 +223,29 @@ bespoketag_recalc(_Config) -> CountFold = fun(Book, CurrentCount) -> - leveled_bookie:book_indexfold(Book, - B0, - {FoldFun, 0}, - {<<"temp_int">>, 0, CurrentCount}, - {true, undefined}) + leveled_bookie:book_indexfold( + Book, + B0, + {FoldFun, 0}, + {<<"temp_int">>, 0, CurrentCount}, + {true, undefined} + ) end, {async, FolderA} = CountFold(Book1, 2 * KeyCount), CountA = FolderA(), - io:format("Counted double index entries ~w - everything loaded OK~n", - [CountA]), + io:format( + "Counted double index entries ~w - everything loaded OK~n", + [CountA] + ), true = 2 * KeyCount == CountA, ok = leveled_bookie:book_close(Book1), {ok, Book2} = leveled_bookie:book_start(BookOpts), - lists:foreach(LoadFun(Book2, true), lists:seq(KeyCount * 2 + 1, KeyCount * 3)), + lists:foreach( + LoadFun(Book2, true), lists:seq(KeyCount * 2 + 1, KeyCount * 3) + ), {async, FolderB} = CountFold(Book2, 3 * KeyCount), CountB = FolderB(), @@ -223,16 +256,20 @@ bespoketag_recalc(_Config) -> io:format("Restart from blank ledger~n"), - leveled_penciller:clean_testdir(proplists:get_value(root_path, BookOpts) ++ - "/ledger"), + leveled_penciller:clean_testdir( + proplists:get_value(root_path, BookOpts) ++ + "/ledger" + ), {ok, Book3} = leveled_bookie:book_start(BookOpts), {async, FolderC} = CountFold(Book3, 3 * KeyCount), CountC = FolderC(), - io:format("All index entries ~w present - recalc ok~n", - [CountC]), + io:format( + "All index entries ~w present - recalc ok~n", + [CountC] + ), true = 3 * KeyCount == CountC, ok = leveled_bookie:book_close(Book3), - - testutil:reset_filestructure(). \ No newline at end of file + + testutil:reset_filestructure(). diff --git a/test/end_to_end/basic_SUITE.erl b/test/end_to_end/basic_SUITE.erl index 5a4d4a3f..1b903c20 100644 --- a/test/end_to_end/basic_SUITE.erl +++ b/test/end_to_end/basic_SUITE.erl @@ -1,42 +1,43 @@ -module(basic_SUITE). -include("leveled.hrl"). -export([all/0, init_per_suite/1, end_per_suite/1]). --export([simple_put_fetch_head_delete/1, - many_put_fetch_head/1, - journal_compaction/1, - fetchput_snapshot/1, - load_and_count/1, - load_and_count_withdelete/1, - space_clear_ondelete/1, - is_empty_test/1, - many_put_fetch_switchcompression/1, - bigjournal_littlejournal/1, - bigsst_littlesst/1, - safereaderror_startup/1, - remove_journal_test/1, - bigpcl_bucketlist/1 - ]). - -all() -> [ - simple_put_fetch_head_delete, - many_put_fetch_head, - journal_compaction, - fetchput_snapshot, - load_and_count, - load_and_count_withdelete, - space_clear_ondelete, - is_empty_test, - many_put_fetch_switchcompression, - bigjournal_littlejournal, - bigsst_littlesst, - safereaderror_startup, - remove_journal_test, - bigpcl_bucketlist - ]. - +-export([ + simple_put_fetch_head_delete/1, + many_put_fetch_head/1, + journal_compaction/1, + fetchput_snapshot/1, + load_and_count/1, + load_and_count_withdelete/1, + space_clear_ondelete/1, + is_empty_test/1, + many_put_fetch_switchcompression/1, + bigjournal_littlejournal/1, + bigsst_littlesst/1, + safereaderror_startup/1, + remove_journal_test/1, + bigpcl_bucketlist/1 +]). + +all() -> + [ + simple_put_fetch_head_delete, + many_put_fetch_head, + journal_compaction, + fetchput_snapshot, + load_and_count, + load_and_count_withdelete, + space_clear_ondelete, + is_empty_test, + many_put_fetch_switchcompression, + bigjournal_littlejournal, + bigsst_littlesst, + safereaderror_startup, + remove_journal_test, + bigpcl_bucketlist + ]. init_per_suite(Config) -> - testutil:init_per_suite([{suite, "basic"}|Config]), + testutil:init_per_suite([{suite, "basic"} | Config]), Config. end_per_suite(Config) -> @@ -50,8 +51,8 @@ simple_put_fetch_head_delete(_Config) -> io:format("simple test with error and stats logs~n"), simple_test_withlog( error, - [b0015, b0016, b0017, b0018, p0032, sst12, cdb19, sst13, i0019]). - + [b0015, b0016, b0017, b0018, p0032, sst12, cdb19, sst13, i0019] + ). simple_test_withlog(LogLevel, ForcedLogs) -> RootPath = testutil:reset_filestructure(), @@ -69,13 +70,15 @@ simple_test_withlog(LogLevel, ForcedLogs) -> testutil:check_forobject(Bookie1, TestObject), testutil:check_formissingobject(Bookie1, <<"Bucket1">>, <<"Key2">>), ok = leveled_bookie:book_close(Bookie1), - StartOpts2 = [{root_path, RootPath}, - {max_journalsize, 3000000}, - {sync_strategy, testutil:sync_strategy()}, - {log_level, LogLevel}, - {forced_logs, ForcedLogs}], + StartOpts2 = [ + {root_path, RootPath}, + {max_journalsize, 3000000}, + {sync_strategy, testutil:sync_strategy()}, + {log_level, LogLevel}, + {forced_logs, ForcedLogs} + ], {ok, Bookie2} = leveled_bookie:book_start(StartOpts2), - + testutil:check_forobject(Bookie2, TestObject), ObjList1 = testutil:generate_objects(5000, 2), testutil:riakload(Bookie2, ObjList1), @@ -96,7 +99,7 @@ simple_test_withlog(LogLevel, ForcedLogs) -> {ok, {2220864, S, undefined}} = leveled_bookie:book_head(Bookie2, <<"Bucket1">>, <<"Key2">>), true = (S == 63) or (S == 65), - % After OTP 26 the object is 58 bytes not 60 + % After OTP 26 the object is 58 bytes not 60 testutil:check_formissingobject(Bookie2, <<"Bucket1">>, <<"Key2">>), ok = leveled_bookie:book_put( @@ -104,9 +107,11 @@ simple_test_withlog(LogLevel, ForcedLogs) -> <<"Bucket1">>, <<"Key2">>, <<"Value2">>, - [{remove, <<"Index1">>, <<"Term1">>}, - {add, <<"Index1">>, <<"Term2">>}] - ), + [ + {remove, <<"Index1">>, <<"Term1">>}, + {add, <<"Index1">>, <<"Term2">>} + ] + ), {ok, <<"Value2">>} = leveled_bookie:book_get(Bookie2, <<"Bucket1">>, <<"Key2">>), ok = leveled_bookie:book_close(Bookie2), @@ -130,7 +135,8 @@ simple_test_withlog(LogLevel, ForcedLogs) -> many_put_fetch_head(_Config) -> RootPath = testutil:reset_filestructure(), StartOpts1 = - [{root_path, RootPath}, + [ + {root_path, RootPath}, {max_pencillercachesize, 16000}, {sync_strategy, riak_sync}, {compression_point, on_compact} @@ -139,28 +145,49 @@ many_put_fetch_head(_Config) -> {TestObject, TestSpec} = testutil:generate_testobject(), ok = testutil:book_riakput(Bookie1, TestObject, TestSpec), testutil:check_forobject(Bookie1, TestObject), - {ok, 1} = leveled_bookie:book_sqn(Bookie1, - testutil:get_bucket(TestObject), - testutil:get_key(TestObject), - ?RIAK_TAG), + {ok, 1} = leveled_bookie:book_sqn( + Bookie1, + testutil:get_bucket(TestObject), + testutil:get_key(TestObject), + ?RIAK_TAG + ), ok = leveled_bookie:book_close(Bookie1), - StartOpts2 = [{root_path, RootPath}, - {max_journalsize, 50000000}, - {max_pencillercachesize, 32000}, - {sync_strategy, testutil:sync_strategy()}, - {compression_point, on_receipt}], + StartOpts2 = [ + {root_path, RootPath}, + {max_journalsize, 50000000}, + {max_pencillercachesize, 32000}, + {sync_strategy, testutil:sync_strategy()}, + {compression_point, on_receipt} + ], {ok, Bookie2} = leveled_bookie:book_start(StartOpts2), ok = leveled_bookie:book_loglevel(Bookie2, error), ok = leveled_bookie:book_addlogs(Bookie2, ["B0015"]), testutil:check_forobject(Bookie2, TestObject), - {ok, 1} = leveled_bookie:book_sqn(Bookie2, - testutil:get_bucket(TestObject), - testutil:get_key(TestObject), - ?RIAK_TAG), - GenList = [2, 20002, 40002, 60002, 80002, - 100002, 120002, 140002, 160002, 180002], - CLs = testutil:load_objects(20000, GenList, Bookie2, TestObject, - fun testutil:generate_smallobjects/2), + {ok, 1} = leveled_bookie:book_sqn( + Bookie2, + testutil:get_bucket(TestObject), + testutil:get_key(TestObject), + ?RIAK_TAG + ), + GenList = [ + 2, + 20002, + 40002, + 60002, + 80002, + 100002, + 120002, + 140002, + 160002, + 180002 + ], + CLs = testutil:load_objects( + 20000, + GenList, + Bookie2, + TestObject, + fun testutil:generate_smallobjects/2 + ), {error, ["B0015"]} = leveled_bookie:book_logsettings(Bookie2), ok = leveled_bookie:book_removelogs(Bookie2, ["B0015"]), CL1A = lists:nth(1, CLs), @@ -181,57 +208,78 @@ many_put_fetch_head(_Config) -> {ok, Bookie3} = leveled_bookie:book_start(StartOpts2), testutil:check_forlist(Bookie3, ChkList2A), testutil:check_forobject(Bookie3, TestObject), - {ok, 1} = leveled_bookie:book_sqn(Bookie3, - testutil:get_bucket(TestObject), - testutil:get_key(TestObject), - ?RIAK_TAG), - not_found = leveled_bookie:book_sqn(Bookie3, - testutil:get_bucket(TestObject), - testutil:get_key(TestObject), - ?STD_TAG), - not_found = leveled_bookie:book_sqn(Bookie3, - testutil:get_bucket(TestObject), - testutil:get_key(TestObject)), - testutil:check_formissingobject(Bookie3, <<"Bookie1">>, <<"MissingKey0123">>), + {ok, 1} = leveled_bookie:book_sqn( + Bookie3, + testutil:get_bucket(TestObject), + testutil:get_key(TestObject), + ?RIAK_TAG + ), + not_found = leveled_bookie:book_sqn( + Bookie3, + testutil:get_bucket(TestObject), + testutil:get_key(TestObject), + ?STD_TAG + ), + not_found = leveled_bookie:book_sqn( + Bookie3, + testutil:get_bucket(TestObject), + testutil:get_key(TestObject) + ), + testutil:check_formissingobject( + Bookie3, <<"Bookie1">>, <<"MissingKey0123">> + ), ok = leveled_bookie:book_destroy(Bookie3). bigjournal_littlejournal(_Config) -> RootPath = testutil:reset_filestructure(), - StartOpts1 = [{root_path, RootPath}, - {max_journalsize, 50000000}, - {max_pencillercachesize, 32000}, - {sync_strategy, testutil:sync_strategy()}, - {compression_point, on_compact}], + StartOpts1 = [ + {root_path, RootPath}, + {max_journalsize, 50000000}, + {max_pencillercachesize, 32000}, + {sync_strategy, testutil:sync_strategy()}, + {compression_point, on_compact} + ], {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), - ObjL1 = - testutil:generate_objects(100, 1, [], - crypto:strong_rand_bytes(10000), - fun() -> [] end, <<"B">>), + ObjL1 = + testutil:generate_objects( + 100, + 1, + [], + crypto:strong_rand_bytes(10000), + fun() -> [] end, + <<"B">> + ), testutil:riakload(Bookie1, ObjL1), ok = leveled_bookie:book_close(Bookie1), - StartOpts2 = lists:ukeysort(1, [{max_journalsize, 5000}|StartOpts1]), + StartOpts2 = lists:ukeysort(1, [{max_journalsize, 5000} | StartOpts1]), {ok, Bookie2} = leveled_bookie:book_start(StartOpts2), - ObjL2 = - testutil:generate_objects(10, 1000, [], - crypto:strong_rand_bytes(10000), - fun() -> [] end, <<"B">>), + ObjL2 = + testutil:generate_objects( + 10, + 1000, + [], + crypto:strong_rand_bytes(10000), + fun() -> [] end, + <<"B">> + ), testutil:riakload(Bookie2, ObjL2), testutil:check_forlist(Bookie2, ObjL1), testutil:check_forlist(Bookie2, ObjL2), ok = leveled_bookie:book_destroy(Bookie2). - bigsst_littlesst(_Config) -> RootPath = testutil:reset_filestructure(), - StartOpts1 = [{root_path, RootPath}, - {max_journalsize, 50000000}, - {cache_size, 500}, - {max_pencillercachesize, 16000}, - {max_sstslots, 256}, - {sync_strategy, testutil:sync_strategy()}, - {compression_point, on_compact}], + StartOpts1 = [ + {root_path, RootPath}, + {max_journalsize, 50000000}, + {cache_size, 500}, + {max_pencillercachesize, 16000}, + {max_sstslots, 256}, + {sync_strategy, testutil:sync_strategy()}, + {compression_point, on_compact} + ], {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), - ObjL1 = + ObjL1 = lists:keysort( 1, testutil:generate_objects( @@ -240,27 +288,30 @@ bigsst_littlesst(_Config) -> [], crypto:strong_rand_bytes(100), fun() -> [] end, - <<"B">>) + <<"B">> + ) ), testutil:riakload(Bookie1, ObjL1), testutil:check_forlist(Bookie1, ObjL1), - timer:sleep(10000), % Wait for delete timeout + % Wait for delete timeout + timer:sleep(10000), JFP = RootPath ++ "/ledger/ledger_files/", {ok, FNS1} = file:list_dir(JFP), ok = leveled_bookie:book_destroy(Bookie1), - StartOpts2 = lists:ukeysort(1, [{max_sstslots, 24}|StartOpts1]), + StartOpts2 = lists:ukeysort(1, [{max_sstslots, 24} | StartOpts1]), {ok, Bookie2} = leveled_bookie:book_start(StartOpts2), testutil:riakload(Bookie2, ObjL1), testutil:check_forlist(Bookie2, ObjL1), - timer:sleep(10000), % Wait for delete timeout + % Wait for delete timeout + timer:sleep(10000), {ok, FNS2} = file:list_dir(JFP), ok = leveled_bookie:book_destroy(Bookie2), - io:format("Big SST ~w files Little SST ~w files~n", - [length(FNS1), length(FNS2)]), - true = length(FNS2) >= (2 * length(FNS1)). - - + io:format( + "Big SST ~w files Little SST ~w files~n", + [length(FNS1), length(FNS2)] + ), + true = length(FNS2) >= (2 * length(FNS1)). journal_compaction(_Config) -> journal_compaction_tester(false, 3600), @@ -269,11 +320,13 @@ journal_compaction(_Config) -> journal_compaction_tester(Restart, WRP) -> RootPath = testutil:reset_filestructure(), - StartOpts1 = [{root_path, RootPath}, - {max_journalsize, 10000000}, - {max_run_length, 1}, - {sync_strategy, testutil:sync_strategy()}, - {waste_retention_period, WRP}], + StartOpts1 = [ + {root_path, RootPath}, + {max_journalsize, 10000000}, + {max_run_length, 1}, + {sync_strategy, testutil:sync_strategy()}, + {waste_retention_period, WRP} + ], {ok, Bookie0} = leveled_bookie:book_start(StartOpts1), ok = leveled_bookie:book_compactjournal(Bookie0, 30000), {TestObject, TestSpec} = testutil:generate_testobject(), @@ -306,40 +359,41 @@ journal_compaction_tester(Restart, WRP) -> ObjListD = testutil:generate_objects(10000, 2), lists:foreach( fun({_R, O, _S}) -> - testutil:book_riakdelete(Bookie0, - testutil:get_bucket(O), - testutil:get_key(O), - []) + testutil:book_riakdelete( + Bookie0, + testutil:get_bucket(O), + testutil:get_key(O), + [] + ) end, ObjListD ), - + %% Now replace all the other objects ObjList2 = testutil:generate_objects(40000, 10002), testutil:riakload(Bookie0, ObjList2), Bookie1 = - case Restart of + case Restart of true -> ok = leveled_bookie:book_close(Bookie0), {ok, RestartedB} = leveled_bookie:book_start(StartOpts1), RestartedB; false -> - Bookie0 + Bookie0 end, - - + WasteFP = RootPath ++ "/journal/journal_files/waste", % Start snapshot - should stop deletions - {ok, PclClone, InkClone} = + {ok, PclClone, InkClone} = leveled_bookie:book_snapshot(Bookie1, store, undefined, false), ok = leveled_bookie:book_compactjournal(Bookie1, 30000), testutil:wait_for_compaction(Bookie1), % Wait to cause delete_pending to be blocked by snapshot % timeout on switch to delete_pending is 10s timer:sleep(10100), - case WRP of - undefined -> + case WRP of + undefined -> ok; _ -> % Check nothing got deleted @@ -349,10 +403,11 @@ journal_compaction_tester(Restart, WRP) -> ok = leveled_penciller:pcl_close(PclClone), ok = leveled_inker:ink_close(InkClone), % Snapshot released so deletes should occur at next timeout - case WRP of + case WRP of undefined -> - timer:sleep(10100); % wait for delete_pending timeout - % Wait 2 seconds for files to be deleted + % wait for delete_pending timeout + timer:sleep(10100); + % Wait 2 seconds for files to be deleted _ -> FindDeletedFilesFun = fun(X, Found) -> @@ -371,70 +426,75 @@ journal_compaction_tester(Restart, WRP) -> end end end, - lists:foldl(FindDeletedFilesFun, - false, - [2000,2000,2000,2000,2000,2000]) + lists:foldl( + FindDeletedFilesFun, + false, + [2000, 2000, 2000, 2000, 2000, 2000] + ) end, {ok, ClearedJournals} = file:list_dir(WasteFP), io:format("~w ClearedJournals found~n", [length(ClearedJournals)]), - case is_integer(WRP) of + case is_integer(WRP) of true -> true = length(ClearedJournals) > 0; false -> true = length(ClearedJournals) == 0 end, - + ChkList3 = lists:sublist(lists:sort(ObjList2), 500), testutil:check_forlist(Bookie1, ChkList3), - + ok = leveled_bookie:book_close(Bookie1), - + % Restart {ok, Bookie2} = leveled_bookie:book_start(StartOpts1), testutil:check_forobject(Bookie2, TestObject), testutil:check_forlist(Bookie2, ChkList3), - + ok = leveled_bookie:book_close(Bookie2), - - StartOpts2 = [{root_path, RootPath}, - {max_journalsize, 10000000}, - {max_run_length, 1}, - {waste_retention_period, 1}, - {sync_strategy, testutil:sync_strategy()}], + + StartOpts2 = [ + {root_path, RootPath}, + {max_journalsize, 10000000}, + {max_run_length, 1}, + {waste_retention_period, 1}, + {sync_strategy, testutil:sync_strategy()} + ], {ok, Bookie3} = leveled_bookie:book_start(StartOpts2), ok = leveled_bookie:book_compactjournal(Bookie3, 30000), busy = leveled_bookie:book_compactjournal(Bookie3, 30000), testutil:wait_for_compaction(Bookie3), ok = leveled_bookie:book_close(Bookie3), - + {ok, ClearedJournalsPC} = file:list_dir(WasteFP), io:format("~w ClearedJournals found~n", [length(ClearedJournalsPC)]), - case is_integer(WRP) of + case is_integer(WRP) of true -> true = length(ClearedJournals) > 0; false -> true = length(ClearedJournals) == 0 end, - - testutil:reset_filestructure(10000). + testutil:reset_filestructure(10000). fetchput_snapshot(_Config) -> RootPath = testutil:reset_filestructure(), - StartOpts1 = [{root_path, RootPath}, - {max_journalsize, 30000000}, - {cache_size, 2000}, - {max_pencillercachesize, 16000}, - {sync_strategy, none}], + StartOpts1 = [ + {root_path, RootPath}, + {max_journalsize, 30000000}, + {cache_size, 2000}, + {max_pencillercachesize, 16000}, + {sync_strategy, none} + ], {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), {TestObject, TestSpec} = testutil:generate_testobject(), ok = testutil:book_riakput(Bookie1, TestObject, TestSpec), - + % Load up 5000 objects ObjList1 = testutil:generate_objects(5000, 2), testutil:riakload(Bookie1, ObjList1), - + % Now take a snapshot - check it has the same objects SnapOpts1 = [{snapshot_bookie, Bookie1}], @@ -453,7 +513,7 @@ fetchput_snapshot(_Config) -> io:format("Closed initial bookies~n"), % all now closed - + % Open a new store (to start with the previously loaded data) {ok, Bookie2} = leveled_bookie:book_start(StartOpts1), @@ -463,7 +523,6 @@ fetchput_snapshot(_Config) -> {ok, SnapBookie2} = leveled_bookie:book_start(SnapOpts2), io:format("Bookies restarted~n"), - % Check both the newly opened store and its snapshot have the data @@ -471,21 +530,23 @@ fetchput_snapshot(_Config) -> io:format("Check active bookie still contains original data~n"), testutil:check_forlist(SnapBookie2, ChkList1), io:format("Check snapshot still contains original data~n"), - - % Generate some replacement objects, load them up - check the master + + % Generate some replacement objects, load them up - check the master % store has the replacement objects, but the snapshot still has the old % objects - + ObjList2 = testutil:generate_objects(5000, 2), testutil:riakload(Bookie2, ObjList2), io:format("Replacement objects put~n"), - + ChkList2 = lists:sublist(lists:sort(ObjList2), 100), testutil:check_forlist(Bookie2, ChkList2), testutil:check_forlist(SnapBookie2, ChkList1), - io:format("Checked for replacement objects in active bookie" ++ - ", old objects in snapshot~n"), - + io:format( + "Checked for replacement objects in active bookie" ++ + ", old objects in snapshot~n" + ), + % Check out how many ledger files we now have (should just be 1) ok = filelib:ensure_dir(RootPath ++ "/ledger/ledger_files"), @@ -503,13 +564,18 @@ fetchput_snapshot(_Config) -> % Now loads lots of new objects GenList = [20002, 40002, 60002, 80002, 100002, 120002, 140002, 160002], - CLs2 = testutil:load_objects(20000, GenList, Bookie2, TestObject, - fun testutil:generate_smallobjects/2), + CLs2 = testutil:load_objects( + 20000, + GenList, + Bookie2, + TestObject, + fun testutil:generate_smallobjects/2 + ), io:format("Loaded significant numbers of new objects~n"), - + testutil:check_forlist(Bookie2, lists:nth(length(CLs2), CLs2)), io:format("Checked active bookie has new objects~n"), - + % Start a second snapshot, which should have the new objects, whilst the % previous snapshot still doesn't @@ -523,19 +589,24 @@ fetchput_snapshot(_Config) -> io:format("Started new snapshot and check for new objects~n"), compare_foldwithsnap(Bookie2, SnapBookie3, ChkList3), - + % Load yet more objects, these are replacement objects for the last load - CLs3 = testutil:load_objects(20000, GenList, Bookie2, TestObject, - fun testutil:generate_smallobjects/2), + CLs3 = testutil:load_objects( + 20000, + GenList, + Bookie2, + TestObject, + fun testutil:generate_smallobjects/2 + ), testutil:check_forlist(Bookie2, lists:nth(length(CLs3), CLs3)), testutil:check_forlist(Bookie2, lists:nth(1, CLs3)), - + io:format("Starting 15s sleep in which snap2 should block deletion~n"), timer:sleep(15000), - % There should be lots of ledger files, as we have replaced the objects - % which has created new files, but the old files are still in demand from + % There should be lots of ledger files, as we have replaced the objects + % which has created new files, but the old files are still in demand from % the snapshot {ok, FNsB} = file:list_dir(RootPath ++ "/ledger/ledger_files"), @@ -543,14 +614,14 @@ fetchput_snapshot(_Config) -> io:format("Starting 15s sleep as snap2 close should unblock deletion~n"), timer:sleep(15000), io:format("Pause for deletion has ended~n"), - - % So the pause here is to allow for delete pendings to take effect after the + + % So the pause here is to allow for delete pendings to take effect after the % closing of the snapshot % Now check that any deletions haven't impacted the availability of data testutil:check_forlist(Bookie2, lists:nth(length(CLs3), CLs3)), - % Close the other snapshot, and pause - after the pause there should be a + % Close the other snapshot, and pause - after the pause there should be a % reduction in the number of ledger files due to the deletes ok = leveled_bookie:book_close(SnapBookie3), @@ -561,13 +632,15 @@ fetchput_snapshot(_Config) -> testutil:check_forlist(Bookie2, lists:nth(1, CLs3)), {ok, FNsC} = file:list_dir(RootPath ++ "/ledger/ledger_files"), - io:format("FNsA ~w FNsB ~w FNsC ~w~n", - [length(FNsA), length(FNsB), length(FNsC)]), + io:format( + "FNsA ~w FNsB ~w FNsC ~w~n", + [length(FNsA), length(FNsB), length(FNsC)] + ), true = length(FNsB) > length(FNsA), - true = length(FNsB) > length(FNsC), - % smaller due to replacements and files deleting - % This is dependent on the sleep though (yuk) - + true = length(FNsB) > length(FNsC), + % smaller due to replacements and files deleting + % This is dependent on the sleep though (yuk) + {B1Size, B1Count} = testutil:check_bucket_stats(Bookie2, <<"Bucket1">>), true = B1Size > 0, true = B1Count == 1, @@ -577,7 +650,7 @@ fetchput_snapshot(_Config) -> true = BCount == 180000, io:format("Shutdown with overhanging snapshot~n"), - + {ok, SnpPCL1, SnpJrnl1} = leveled_bookie:book_snapshot(Bookie2, store, undefined, true), {ok, SnpPCL2, SnpJrnl2} = @@ -588,7 +661,8 @@ fetchput_snapshot(_Config) -> fun() -> ok = leveled_bookie:book_close(Bookie2), TestPid ! ok - end), + end + ), timer:sleep(5000), ok = leveled_penciller:pcl_close(SnpPCL1), @@ -599,16 +673,17 @@ fetchput_snapshot(_Config) -> io:format("Time for close to complete is 2 * 10s~n"), io:format("Both Inker and Penciller will have snapshot delay~n"), - receive ok -> ok end, + receive + ok -> ok + end, false = is_process_alive(SnpPCL2), false = is_process_alive(SnpJrnl2), testutil:reset_filestructure(). - compare_foldwithsnap(Bookie, SnapBookie, ChkList) -> - HeadFoldFun = fun(B, K, _Hd, Acc) -> [{B, K}|Acc] end, - KeyFoldFun = fun(B, K, Acc) -> [{B, K}|Acc] end, + HeadFoldFun = fun(B, K, _Hd, Acc) -> [{B, K} | Acc] end, + KeyFoldFun = fun(B, K, Acc) -> [{B, K} | Acc] end, {async, HeadFoldDB} = leveled_bookie:book_headfold( Bookie, ?RIAK_TAG, {HeadFoldFun, []}, true, false, false @@ -627,7 +702,6 @@ compare_foldwithsnap(Bookie, SnapBookie, ChkList) -> ), true = HeadFoldSnap() == KeyFoldSnap(). - load_and_count(_Config) -> % Use artificially small files, and the load keys, counting they're all % present @@ -635,16 +709,17 @@ load_and_count(_Config) -> load_and_count(200000000, 50, 200000), load_and_count(50000000, 1000, 5000). - load_and_count(JournalSize, BookiesMemSize, PencillerMemSize) -> RootPath = testutil:reset_filestructure(), - StartOpts1 = [{root_path, RootPath}, - {max_journalsize, JournalSize}, - {cache_size, BookiesMemSize}, - {max_pencillercachesize, PencillerMemSize}, - {sync_strategy, testutil:sync_strategy()}, - {stats_logfrequency, 5}, - {stats_probability, 80}], + StartOpts1 = [ + {root_path, RootPath}, + {max_journalsize, JournalSize}, + {cache_size, BookiesMemSize}, + {max_pencillercachesize, PencillerMemSize}, + {sync_strategy, testutil:sync_strategy()}, + {stats_logfrequency, 5}, + {stats_probability, 80} + ], {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), {TestObject, TestSpec} = testutil:generate_testobject(), ok = testutil:book_riakput(Bookie1, TestObject, TestSpec), @@ -653,15 +728,17 @@ load_and_count(JournalSize, BookiesMemSize, PencillerMemSize) -> G1 = fun testutil:generate_smallobjects/2, lists:foldl( fun(_X, Acc) -> - testutil:load_objects( - 5000, [Acc + 2], Bookie1, TestObject, G1), - {_S, Count} = - testutil:check_bucket_stats(Bookie1, <<"Bucket">>), - if - Acc + 5000 == Count -> - ok + testutil:load_objects( + 5000, [Acc + 2], Bookie1, TestObject, G1 + ), + {_S, Count} = + testutil:check_bucket_stats(Bookie1, <<"Bucket">>), + if + Acc + 5000 == Count -> + ok + end, + Acc + 5000 end, - Acc + 5000 end, 0, lists:seq(1, 20) ), @@ -670,15 +747,17 @@ load_and_count(JournalSize, BookiesMemSize, PencillerMemSize) -> G2 = fun testutil:generate_compressibleobjects/2, lists:foldl( fun(_X, Acc) -> - testutil:load_objects( - 5000, [Acc + 2], Bookie1, TestObject, G2), - {_S, Count} = - testutil:check_bucket_stats(Bookie1, <<"Bucket">>), - if - Acc + 5000 == Count -> - ok + testutil:load_objects( + 5000, [Acc + 2], Bookie1, TestObject, G2 + ), + {_S, Count} = + testutil:check_bucket_stats(Bookie1, <<"Bucket">>), + if + Acc + 5000 == Count -> + ok + end, + Acc + 5000 end, - Acc + 5000 end, 100000, lists:seq(1, 20) ), @@ -686,34 +765,38 @@ load_and_count(JournalSize, BookiesMemSize, PencillerMemSize) -> io:format("Replacing small objects~n"), lists:foldl( fun(_X, Acc) -> - testutil:load_objects( - 5000, [Acc + 2], Bookie1, TestObject, G1), - {_S, Count} = - testutil:check_bucket_stats(Bookie1, <<"Bucket">>), - if - Count == 200000 -> - ok + testutil:load_objects( + 5000, [Acc + 2], Bookie1, TestObject, G1 + ), + {_S, Count} = + testutil:check_bucket_stats(Bookie1, <<"Bucket">>), + if + Count == 200000 -> + ok + end, + Acc + 5000 end, - Acc + 5000 end, 0, lists:seq(1, 20) ), testutil:check_forobject(Bookie1, TestObject), io:format("Loading more small objects~n"), io:format("Now with unused snapshot so deletions are blocked~n"), - {ok, PclClone, null} = + {ok, PclClone, null} = leveled_bookie:book_snapshot(Bookie1, ledger, undefined, true), lists:foldl( fun(_X, Acc) -> - testutil:load_objects( - 5000, [Acc + 2], Bookie1, TestObject, G2), - {_S, Count} = - testutil:check_bucket_stats(Bookie1, <<"Bucket">>), - if - Acc + 5000 == Count -> - ok + testutil:load_objects( + 5000, [Acc + 2], Bookie1, TestObject, G2 + ), + {_S, Count} = + testutil:check_bucket_stats(Bookie1, <<"Bucket">>), + if + Acc + 5000 == Count -> + ok + end, + Acc + 5000 end, - Acc + 5000 end, 200000, lists:seq(1, 20) ), @@ -723,12 +806,14 @@ load_and_count(JournalSize, BookiesMemSize, PencillerMemSize) -> ok = leveled_bookie:book_close(Bookie1), {ok, Bookie2} = leveled_bookie:book_start(StartOpts1), {_, 300000} = testutil:check_bucket_stats(Bookie2, <<"Bucket">>), - + ok = leveled_bookie:book_close(Bookie2), ManifestFP = - leveled_pmanifest:filepath(filename:join(RootPath, ?LEDGER_FP), - manifest), + leveled_pmanifest:filepath( + filename:join(RootPath, ?LEDGER_FP), + manifest + ), IsManifest = fun(FN) -> filename:extension(FN) == ".man" end, {ok, RawManList} = file:list_dir(ManifestFP), ManList = lists:filter(IsManifest, RawManList), @@ -738,9 +823,11 @@ load_and_count(JournalSize, BookiesMemSize, PencillerMemSize) -> load_and_count_withdelete(_Config) -> RootPath = testutil:reset_filestructure(), - StartOpts1 = [{root_path, RootPath}, - {max_journalsize, 50000000}, - {sync_strategy, testutil:sync_strategy()}], + StartOpts1 = [ + {root_path, RootPath}, + {max_journalsize, 50000000}, + {sync_strategy, testutil:sync_strategy()} + ], {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), {TestObject, TestSpec} = testutil:generate_testobject(), ok = testutil:book_riakput(Bookie1, TestObject, TestSpec), @@ -749,14 +836,16 @@ load_and_count_withdelete(_Config) -> G1 = fun testutil:generate_smallobjects/2, lists:foldl( fun(_X, Acc) -> - testutil:load_objects( - 5000, [Acc + 2], Bookie1, TestObject, G1), - {_S, Count} = testutil:check_bucket_stats(Bookie1, <<"Bucket">>), - if - Acc + 5000 == Count -> - ok + testutil:load_objects( + 5000, [Acc + 2], Bookie1, TestObject, G1 + ), + {_S, Count} = testutil:check_bucket_stats(Bookie1, <<"Bucket">>), + if + Acc + 5000 == Count -> + ok + end, + Acc + 5000 end, - Acc + 5000 end, 0, lists:seq(1, 20) ), @@ -771,20 +860,22 @@ load_and_count_withdelete(_Config) -> G2 = fun testutil:generate_compressibleobjects/2, lists:foldl( fun(_X, Acc) -> - testutil:load_objects( - 5000, [Acc + 2], Bookie1, no_check, G2), - {_S, Count} = testutil:check_bucket_stats(Bookie1, <<"Bucket">>), - if - Acc + 5000 == Count -> - ok + testutil:load_objects( + 5000, [Acc + 2], Bookie1, no_check, G2 + ), + {_S, Count} = testutil:check_bucket_stats(Bookie1, <<"Bucket">>), + if + Acc + 5000 == Count -> + ok + end, + Acc + 5000 end, - Acc + 5000 end, 100000, lists:seq(1, 20) ), not_found = testutil:book_riakget(Bookie1, BucketD, KeyD), ok = leveled_bookie:book_close(Bookie1), - + {ok, Bookie2} = leveled_bookie:book_start(StartOpts1), testutil:check_formissingobject(Bookie2, BucketD, KeyD), testutil:check_formissingobject(Bookie2, "Bookie1", "MissingKey0123"), @@ -793,12 +884,13 @@ load_and_count_withdelete(_Config) -> testutil:reset_filestructure(). - space_clear_ondelete(_Config) -> RootPath = testutil:reset_filestructure(), - StartOpts1 = [{root_path, RootPath}, - {max_journalsize, 10000000}, - {sync_strategy, testutil:sync_strategy()}], + StartOpts1 = [ + {root_path, RootPath}, + {max_journalsize, 10000000}, + {sync_strategy, testutil:sync_strategy()} + ], {ok, Book1} = leveled_bookie:book_start(StartOpts1), G2 = fun testutil:generate_compressibleobjects/2, testutil:load_objects( @@ -808,51 +900,61 @@ space_clear_ondelete(_Config) -> no_check, G2 ), - - FoldKeysFun = fun(B, K, Acc) -> [{B, K}|Acc] end, + + FoldKeysFun = fun(B, K, Acc) -> [{B, K} | Acc] end, {async, F1} = leveled_bookie:book_keylist(Book1, o_rkv, {FoldKeysFun, []}), SW1 = os:timestamp(), KL1 = F1(), - ok = case length(KL1) of - 80000 -> - io:format("Key list took ~w microseconds for 80K keys~n", - [timer:now_diff(os:timestamp(), SW1)]), - ok - end, - timer:sleep(10000), % Allow for any L0 file to be rolled + ok = + case length(KL1) of + 80000 -> + io:format( + "Key list took ~w microseconds for 80K keys~n", + [timer:now_diff(os:timestamp(), SW1)] + ), + ok + end, + % Allow for any L0 file to be rolled + timer:sleep(10000), {ok, FNsA_L} = file:list_dir(RootPath ++ "/ledger/ledger_files"), {ok, FNsA_J} = file:list_dir(RootPath ++ "/journal/journal_files"), - io:format("FNsA - Bookie created ~w journal files and ~w ledger files~n", - [length(FNsA_J), length(FNsA_L)]), + io:format( + "FNsA - Bookie created ~w journal files and ~w ledger files~n", + [length(FNsA_J), length(FNsA_L)] + ), % Get an iterator to lock the inker during compaction FoldObjectsFun = fun(B, K, ObjBin, Acc) -> - [{B, K, erlang:phash2(ObjBin)}|Acc] end, + [{B, K, erlang:phash2(ObjBin)} | Acc] + end, {async, HTreeF1} = leveled_bookie:book_objectfold( - Book1, ?RIAK_TAG, {FoldObjectsFun, []}, false), + Book1, ?RIAK_TAG, {FoldObjectsFun, []}, false + ), - % This query does not Snap PreFold - and so will not prevent - % pending deletes from prompting actual deletes + % This query does not Snap PreFold - and so will not prevent + % pending deletes from prompting actual deletes {async, KF1} = leveled_bookie:book_keylist(Book1, o_rkv, {FoldKeysFun, []}), - % This query does Snap PreFold, and so will prevent deletes from - % the ledger + % This query does Snap PreFold, and so will prevent deletes from + % the ledger % Delete the keys SW2 = os:timestamp(), lists:foreach( fun({Bucket, Key}) -> testutil:book_riakdelete(Book1, Bucket, Key, []) - end, - KL1), + end, + KL1 + ), io:format( "Deletion took ~w microseconds for 80K keys~n", - [timer:now_diff(os:timestamp(), SW2)]), - + [timer:now_diff(os:timestamp(), SW2)] + ), + ok = leveled_bookie:book_compactjournal(Book1, 30000), F = fun leveled_bookie:book_islastcompactionpending/1, lists:foldl( @@ -871,80 +973,98 @@ space_clear_ondelete(_Config) -> end end, true, - lists:seq(1, 15)), + lists:seq(1, 15) + ), io:format("Waiting for journal deletes - blocked~n"), timer:sleep(20000), - + io:format("Sleep over - Fold Objects query ~n"), - % for this query snapshot is made at fold time, and so the results are + % for this query snapshot is made at fold time, and so the results are % empty true = length(HTreeF1()) == 0, - + % This query uses a genuine async fold on a snasphot made at request time % and so the results should be non-empty io:format("Now Query 2 - Fold Keys query~n"), - true = length(KF1()) == 80000, - + true = length(KF1()) == 80000, + io:format("Waiting for journal deletes - unblocked~n"), timer:sleep(20000), {ok, FNsB_L} = file:list_dir(RootPath ++ "/ledger/ledger_files"), {ok, FNsB_J} = file:list_dir(RootPath ++ "/journal/journal_files"), - {ok, FNsB_PC} = file:list_dir(RootPath - ++ "/journal/journal_files/post_compact"), + {ok, FNsB_PC} = file:list_dir( + RootPath ++ + "/journal/journal_files/post_compact" + ), PointB_Journals = length(FNsB_J) + length(FNsB_PC), - io:format("FNsB - Bookie has ~w journal files and ~w ledger files " ++ - "after deletes~n", - [PointB_Journals, length(FNsB_L)]), - + io:format( + "FNsB - Bookie has ~w journal files and ~w ledger files " ++ + "after deletes~n", + [PointB_Journals, length(FNsB_L)] + ), + {async, F2} = leveled_bookie:book_keylist(Book1, o_rkv, {FoldKeysFun, []}), SW3 = os:timestamp(), KL2 = F2(), - ok = case length(KL2) of - 0 -> - io:format("Key list took ~w microseconds for no keys~n", - [timer:now_diff(os:timestamp(), SW3)]), - ok - end, + ok = + case length(KL2) of + 0 -> + io:format( + "Key list took ~w microseconds for no keys~n", + [timer:now_diff(os:timestamp(), SW3)] + ), + ok + end, ok = leveled_bookie:book_close(Book1), - + {ok, Book2} = leveled_bookie:book_start(StartOpts1), {async, F3} = leveled_bookie:book_keylist(Book2, o_rkv, {FoldKeysFun, []}), SW4 = os:timestamp(), KL3 = F3(), - ok = case length(KL3) of - 0 -> - io:format("Key list took ~w microseconds for no keys~n", - [timer:now_diff(os:timestamp(), SW4)]), - ok - end, + ok = + case length(KL3) of + 0 -> + io:format( + "Key list took ~w microseconds for no keys~n", + [timer:now_diff(os:timestamp(), SW4)] + ), + ok + end, ok = leveled_bookie:book_close(Book2), {ok, FNsC_L} = file:list_dir(RootPath ++ "/ledger/ledger_files"), - io:format("FNsC - Bookie has ~w ledger files " ++ - "after close~n", [length(FNsC_L)]), - + io:format( + "FNsC - Bookie has ~w ledger files " ++ + "after close~n", + [length(FNsC_L)] + ), + {ok, Book3} = leveled_bookie:book_start(StartOpts1), io:format("This should cause a final ledger merge event~n"), - io:format("Will require the penciller to resolve the issue of creating" ++ - " an empty file as all keys compact on merge~n"), - - CheckFun = + io:format( + "Will require the penciller to resolve the issue of creating" ++ + " an empty file as all keys compact on merge~n" + ), + + CheckFun = fun(X, FileCount) -> case FileCount of 0 -> 0; _ -> timer:sleep(X), - {ok, NewFC} = + {ok, NewFC} = file:list_dir(RootPath ++ "/ledger/ledger_files"), - io:format("Looping with ledger file count ~w~n", - [length(NewFC)]), + io:format( + "Looping with ledger file count ~w~n", + [length(NewFC)] + ), length(strip_nonsst(NewFC)) end end, - + FC = lists:foldl(CheckFun, infinity, [2000, 3000, 5000, 8000]), ok = leveled_bookie:book_close(Book3), - case FC of + case FC of 0 -> ok; _ -> @@ -954,95 +1074,104 @@ space_clear_ondelete(_Config) -> end, {ok, FNsD_L} = file:list_dir(RootPath ++ "/ledger/ledger_files"), - io:format("FNsD - Bookie has ~w ledger files " ++ - "after second close~n", [length(strip_nonsst(FNsD_L))]), - lists:foreach(fun(FN) -> - io:format("FNsD - Ledger file is ~s~n", [FN]) - end, - FNsD_L), + io:format( + "FNsD - Bookie has ~w ledger files " ++ + "after second close~n", + [length(strip_nonsst(FNsD_L))] + ), + lists:foreach( + fun(FN) -> + io:format("FNsD - Ledger file is ~s~n", [FN]) + end, + FNsD_L + ), true = PointB_Journals < length(FNsA_J), true = length(strip_nonsst(FNsD_L)) < length(strip_nonsst(FNsA_L)), true = length(strip_nonsst(FNsD_L)) < length(strip_nonsst(FNsB_L)), true = length(strip_nonsst(FNsD_L)) =< length(strip_nonsst(FNsC_L)), true = length(strip_nonsst(FNsD_L)) == 0. - strip_nonsst(FileList) -> - SSTOnlyFun = + SSTOnlyFun = fun(FN, Acc) -> - case filename:extension(FN) of - ".sst" -> - [FN|Acc]; + case filename:extension(FN) of + ".sst" -> + [FN | Acc]; _ -> - Acc + Acc end end, lists:foldl(SSTOnlyFun, [], FileList). - is_empty_test(_Config) -> RootPath = testutil:reset_filestructure(), - StartOpts1 = [{root_path, RootPath}, - {sync_strategy, testutil:sync_strategy()}], + StartOpts1 = [ + {root_path, RootPath}, + {sync_strategy, testutil:sync_strategy()} + ], {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), - - {B1, K1, V1, Spec, MD} = {term_to_binary("Bucket1"), - term_to_binary("Key1"), - "Value1", - [], - [{"MDK1", "MDV1"}]}, + + {B1, K1, V1, Spec, MD} = { + term_to_binary("Bucket1"), term_to_binary("Key1"), "Value1", [], [ + {"MDK1", "MDV1"} + ] + }, {TestObject1, TestSpec1} = testutil:generate_testobject(B1, K1, V1, Spec, MD), - {B1, K2, V2, Spec, MD} = {term_to_binary("Bucket1"), - term_to_binary("Key2"), - "Value2", - [], - [{"MDK1", "MDV1"}]}, + {B1, K2, V2, Spec, MD} = { + term_to_binary("Bucket1"), term_to_binary("Key2"), "Value2", [], [ + {"MDK1", "MDV1"} + ] + }, {TestObject2, TestSpec2} = testutil:generate_testobject(B1, K2, V2, Spec, MD), - {B2, K3, V3, Spec, MD} = {term_to_binary("Bucket2"), - term_to_binary("Key3"), - "Value3", - [], - [{"MDK1", "MDV1"}]}, + {B2, K3, V3, Spec, MD} = { + term_to_binary("Bucket2"), term_to_binary("Key3"), "Value3", [], [ + {"MDK1", "MDV1"} + ] + }, {TestObject3, TestSpec3} = testutil:generate_testobject(B2, K3, V3, Spec, MD), ok = testutil:book_riakput(Bookie1, TestObject1, TestSpec1), ok = testutil:book_riakput(Bookie1, TestObject2, TestSpec2), ok = testutil:book_riakput(Bookie1, TestObject3, TestSpec3), - + FoldBucketsFun = fun(B, Acc) -> sets:add_element(B, Acc) end, - BucketListQuery = {bucket_list, - ?RIAK_TAG, - {FoldBucketsFun, sets:new()}}, + BucketListQuery = {bucket_list, ?RIAK_TAG, {FoldBucketsFun, sets:new()}}, {async, BL} = leveled_bookie:book_returnfolder(Bookie1, BucketListQuery), true = sets:size(BL()) == 2, - + ok = leveled_bookie:book_put(Bookie1, B2, K3, delete, [], ?RIAK_TAG), {async, BLpd1} = leveled_bookie:book_returnfolder(Bookie1, BucketListQuery), true = sets:size(BLpd1()) == 1, - + ok = leveled_bookie:book_put(Bookie1, B1, K2, delete, [], ?RIAK_TAG), {async, BLpd2} = leveled_bookie:book_returnfolder(Bookie1, BucketListQuery), true = sets:size(BLpd2()) == 1, - + ok = leveled_bookie:book_put(Bookie1, B1, K1, delete, [], ?RIAK_TAG), {async, BLpd3} = leveled_bookie:book_returnfolder(Bookie1, BucketListQuery), true = sets:size(BLpd3()) == 0, - - ok = leveled_bookie:book_close(Bookie1). + ok = leveled_bookie:book_close(Bookie1). remove_journal_test(_Config) -> RootPath = testutil:reset_filestructure(), - StartOpts1 = [{root_path, RootPath}, - {max_pencillercachesize, 16000}, - {sync_strategy, testutil:sync_strategy()}, - {compression_point, on_compact}], + StartOpts1 = [ + {root_path, RootPath}, + {max_pencillercachesize, 16000}, + {sync_strategy, testutil:sync_strategy()}, + {compression_point, on_compact} + ], {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), GenList = [1, 20001, 40001, 60001], - CLs = testutil:load_objects(20000, GenList, Bookie1, no_check, - fun testutil:generate_smallobjects/2), + CLs = testutil:load_objects( + 20000, + GenList, + Bookie1, + no_check, + fun testutil:generate_smallobjects/2 + ), CheckList1 = lists:sublist(lists:nth(1, CLs), 100, 1000), CheckList2 = lists:sublist(lists:nth(2, CLs), 100, 1000), CheckList3 = lists:sublist(lists:nth(3, CLs), 100, 1000), @@ -1051,23 +1180,27 @@ remove_journal_test(_Config) -> testutil:check_forlist(Bookie1, CheckList2), testutil:check_forlist(Bookie1, CheckList3), testutil:check_forlist(Bookie1, CheckList4), - + ok = leveled_bookie:book_close(Bookie1), leveled_inker:clean_testdir(RootPath ++ "/journal"), {ok, Bookie2} = leveled_bookie:book_start(StartOpts1), - % If we're not careful here new data will be added, and we + % If we're not careful here new data will be added, and we % won't be able to read it - [NewCheckList] = - testutil:load_objects(1000, [80001], Bookie2, no_check, - fun testutil:generate_smallobjects/2), - + [NewCheckList] = + testutil:load_objects( + 1000, + [80001], + Bookie2, + no_check, + fun testutil:generate_smallobjects/2 + ), + ok = leveled_bookie:book_close(Bookie2), {ok, Bookie3} = leveled_bookie:book_start(StartOpts1), testutil:check_forlist(Bookie3, NewCheckList), ok = leveled_bookie:book_destroy(Bookie3). - many_put_fetch_switchcompression(_Config) -> {T0, ok} = timer:tc(fun many_put_fetch_switchcompression_tester/1, [native]), @@ -1079,27 +1212,32 @@ many_put_fetch_switchcompression(_Config) -> many_put_fetch_switchcompression_tester(CompressionMethod) -> RootPath = testutil:reset_filestructure(), - StartOpts1 = [{root_path, RootPath}, - {max_pencillercachesize, 16000}, - {max_journalobjectcount, 30000}, - {compression_level, 3}, - {sync_strategy, testutil:sync_strategy()}, - {compression_method, native}, - {ledger_compression, none}], - StartOpts2 = [{root_path, RootPath}, - {max_pencillercachesize, 24000}, - {max_journalobjectcount, 30000}, - {sync_strategy, testutil:sync_strategy()}, - {compression_method, CompressionMethod}, - {ledger_compression, as_store}], - StartOpts3 = [{root_path, RootPath}, - {max_pencillercachesize, 16000}, - {max_journalobjectcount, 30000}, - {sync_strategy, testutil:sync_strategy()}, - {compression_method, none}, - {ledger_compression, as_store}], - - + StartOpts1 = [ + {root_path, RootPath}, + {max_pencillercachesize, 16000}, + {max_journalobjectcount, 30000}, + {compression_level, 3}, + {sync_strategy, testutil:sync_strategy()}, + {compression_method, native}, + {ledger_compression, none} + ], + StartOpts2 = [ + {root_path, RootPath}, + {max_pencillercachesize, 24000}, + {max_journalobjectcount, 30000}, + {sync_strategy, testutil:sync_strategy()}, + {compression_method, CompressionMethod}, + {ledger_compression, as_store} + ], + StartOpts3 = [ + {root_path, RootPath}, + {max_pencillercachesize, 16000}, + {max_journalobjectcount, 30000}, + {sync_strategy, testutil:sync_strategy()}, + {compression_method, none}, + {ledger_compression, as_store} + ], + {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), {TestObject, TestSpec} = testutil:generate_testobject(), ok = testutil:book_riakput(Bookie1, TestObject, TestSpec), @@ -1110,17 +1248,20 @@ many_put_fetch_switchcompression_tester(CompressionMethod) -> [2, 40002], Bookie1, TestObject, - fun testutil:generate_smallobjects/2), - + fun testutil:generate_smallobjects/2 + ), + lists:foreach( - fun(CL) -> ok = testutil:check_forlist(Bookie1, CL) end, CL1s), + fun(CL) -> ok = testutil:check_forlist(Bookie1, CL) end, CL1s + ), ok = leveled_bookie:book_close(Bookie1), - + %% Change compression method -> lz4 {ok, Bookie2} = leveled_bookie:book_start(StartOpts2), testutil:check_forobject(Bookie2, TestObject), lists:foreach( - fun(CL) -> ok = testutil:check_forlist(Bookie2, CL) end, CL1s), + fun(CL) -> ok = testutil:check_forlist(Bookie2, CL) end, CL1s + ), CL2s = testutil:load_objects( @@ -1128,21 +1269,26 @@ many_put_fetch_switchcompression_tester(CompressionMethod) -> [80002, 120002], Bookie2, TestObject, - fun testutil:generate_smallobjects/2), + fun testutil:generate_smallobjects/2 + ), lists:foreach( - fun(CL) -> ok = testutil:check_forlist(Bookie2, CL) end, CL2s), + fun(CL) -> ok = testutil:check_forlist(Bookie2, CL) end, CL2s + ), lists:foreach( - fun(CL) -> ok = testutil:check_forlist(Bookie2, CL) end, CL1s), + fun(CL) -> ok = testutil:check_forlist(Bookie2, CL) end, CL1s + ), ok = leveled_bookie:book_close(Bookie2), %% Change method back again {ok, Bookie3} = leveled_bookie:book_start(StartOpts1), testutil:check_formissingobject(Bookie3, <<"Bookie1">>, "MissingKey0123"), lists:foreach( - fun(CL) -> ok = testutil:check_forlist(Bookie3, CL) end, CL2s), + fun(CL) -> ok = testutil:check_forlist(Bookie3, CL) end, CL2s + ), lists:foreach( - fun(CL) -> ok = testutil:check_forlist(Bookie3, CL) end, CL1s), - + fun(CL) -> ok = testutil:check_forlist(Bookie3, CL) end, CL1s + ), + CL3s = testutil:load_objects( 40000, @@ -1153,18 +1299,22 @@ many_put_fetch_switchcompression_tester(CompressionMethod) -> 30000 ), lists:foreach( - fun(CL) -> ok = testutil:check_forlist(Bookie3, CL) end, CL3s), + fun(CL) -> ok = testutil:check_forlist(Bookie3, CL) end, CL3s + ), ok = leveled_bookie:book_close(Bookie3), % Change method to no compression {ok, Bookie4} = leveled_bookie:book_start(StartOpts3), lists:foreach( - fun(CL) -> ok = testutil:check_forlist(Bookie4, CL) end, CL2s), + fun(CL) -> ok = testutil:check_forlist(Bookie4, CL) end, CL2s + ), lists:foreach( - fun(CL) -> ok = testutil:check_forlist(Bookie4, CL) end, CL1s), + fun(CL) -> ok = testutil:check_forlist(Bookie4, CL) end, CL1s + ), lists:foreach( - fun(CL) -> ok = testutil:check_forlist(Bookie4, CL) end, CL3s), - + fun(CL) -> ok = testutil:check_forlist(Bookie4, CL) end, CL3s + ), + CL4s = testutil:load_objects( 40000, @@ -1174,9 +1324,11 @@ many_put_fetch_switchcompression_tester(CompressionMethod) -> fun testutil:generate_smallobjects/2 ), lists:foreach( - fun(CL) -> ok = testutil:check_forlist(Bookie4, CL) end, CL3s), + fun(CL) -> ok = testutil:check_forlist(Bookie4, CL) end, CL3s + ), lists:foreach( - fun(CL) -> ok = testutil:check_forlist(Bookie4, CL) end, CL4s), + fun(CL) -> ok = testutil:check_forlist(Bookie4, CL) end, CL4s + ), testutil:delete_some_objects(Bookie4, lists:flatten(CL3s), 60000), CL5s = testutil:load_objects( @@ -1189,51 +1341,61 @@ many_put_fetch_switchcompression_tester(CompressionMethod) -> ok = leveled_bookie:book_compactjournal(Bookie4, 30000), testutil:wait_for_compaction(Bookie4), lists:foreach( - fun(CL) -> ok = testutil:check_forlist(Bookie4, CL) end, CL4s), + fun(CL) -> ok = testutil:check_forlist(Bookie4, CL) end, CL4s + ), lists:foreach( - fun(CL) -> ok = testutil:check_forlist(Bookie4, CL) end, CL5s), - + fun(CL) -> ok = testutil:check_forlist(Bookie4, CL) end, CL5s + ), + ok = leveled_bookie:book_close(Bookie4), %% Change compression method -> lz4 {ok, Bookie5} = leveled_bookie:book_start(StartOpts2), lists:foreach( - fun(CL) -> ok = testutil:check_forlist(Bookie5, CL) end, CL1s), + fun(CL) -> ok = testutil:check_forlist(Bookie5, CL) end, CL1s + ), lists:foreach( - fun(CL) -> ok = testutil:check_forlist(Bookie5, CL) end, CL4s), + fun(CL) -> ok = testutil:check_forlist(Bookie5, CL) end, CL4s + ), lists:foreach( - fun(CL) -> ok = testutil:check_forlist(Bookie5, CL) end, CL5s), + fun(CL) -> ok = testutil:check_forlist(Bookie5, CL) end, CL5s + ), ok = leveled_bookie:book_close(Bookie5), %% Change compression method -> native {ok, Bookie6} = leveled_bookie:book_start(StartOpts1), lists:foreach( - fun(CL) -> ok = testutil:check_forlist(Bookie6, CL) end, CL1s), + fun(CL) -> ok = testutil:check_forlist(Bookie6, CL) end, CL1s + ), lists:foreach( - fun(CL) -> ok = testutil:check_forlist(Bookie6, CL) end, CL4s), + fun(CL) -> ok = testutil:check_forlist(Bookie6, CL) end, CL4s + ), lists:foreach( - fun(CL) -> ok = testutil:check_forlist(Bookie6, CL) end, CL5s), + fun(CL) -> ok = testutil:check_forlist(Bookie6, CL) end, CL5s + ), ok = leveled_bookie:book_destroy(Bookie6). safereaderror_startup(_Config) -> RootPath = testutil:reset_filestructure(), - StartOpts1 = [{root_path, RootPath}, - {compression_point, on_compact}, - {max_journalsize, 1000}, {cache_size, 2060}], + StartOpts1 = [ + {root_path, RootPath}, + {compression_point, on_compact}, + {max_journalsize, 1000}, + {cache_size, 2060} + ], {ok, Bookie1} = leveled_bookie:book_plainstart(StartOpts1), B1 = <<98, 117, 99, 107, 101, 116, 51>>, - K1 = - <<38, 50, 201, 47, 167, 125, 57, 232, 84, 38, 14, 114, 24, 62, - 12, 74>>, - Obj1 = - <<87, 150, 217, 230, 4, 81, 170, 68, 181, 224, 60, 232, 4, 74, - 159, 12, 156, 56, 194, 181, 18, 158, 195, 207, 106, 191, 80, - 111, 100, 81, 252, 248>>, - Obj2 = - <<86, 201, 253, 149, 213, 10, 32, 166, 33, 136, 42, 79, 103, 250, - 139, 95, 42, 143, 161, 3, 185, 74, 149, 226, 232, 214, 183, 64, - 69, 56, 167, 78>>, + K1 = + <<38, 50, 201, 47, 167, 125, 57, 232, 84, 38, 14, 114, 24, 62, 12, 74>>, + Obj1 = + <<87, 150, 217, 230, 4, 81, 170, 68, 181, 224, 60, 232, 4, 74, 159, 12, + 156, 56, 194, 181, 18, 158, 195, 207, 106, 191, 80, 111, 100, 81, + 252, 248>>, + Obj2 = + <<86, 201, 253, 149, 213, 10, 32, 166, 33, 136, 42, 79, 103, 250, 139, + 95, 42, 143, 161, 3, 185, 74, 149, 226, 232, 214, 183, 64, 69, 56, + 167, 78>>, ok = leveled_bookie:book_put(Bookie1, B1, K1, Obj1, []), ok = leveled_bookie:book_put(Bookie1, B1, K1, Obj2, []), exit(Bookie1, kill), @@ -1255,23 +1417,29 @@ bigpcl_bucketlist(_Config) -> RootPath = testutil:reset_filestructure(), BucketCount = 500, ObjectCount = 100, - StartOpts1 = [{root_path, RootPath}, - {max_journalsize, 50000000}, - {cache_size, 4000}, - {max_pencillercachesize, 128000}, - {max_sstslots, 256}, - {sync_strategy, testutil:sync_strategy()}], + StartOpts1 = [ + {root_path, RootPath}, + {max_journalsize, 50000000}, + {cache_size, 4000}, + {max_pencillercachesize, 128000}, + {max_sstslots, 256}, + {sync_strategy, testutil:sync_strategy()} + ], {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), BucketList = - lists:map(fun(I) -> list_to_binary(integer_to_list(I)) end, - lists:seq(1, BucketCount)), + lists:map( + fun(I) -> list_to_binary(integer_to_list(I)) end, + lists:seq(1, BucketCount) + ), MapFun = fun(B) -> testutil:generate_objects( - ObjectCount, 1, [], - crypto:strong_rand_bytes(100), - fun() -> [] end, + ObjectCount, + 1, + [], + crypto:strong_rand_bytes(100), + fun() -> [] end, B ) end, @@ -1288,7 +1456,7 @@ bigpcl_bucketlist(_Config) -> end, FBAccT = {BucketFold, sets:new()}, - {async, BucketFolder1} = + {async, BucketFolder1} = leveled_bookie:book_headfold( Bookie1, ?RIAK_TAG, @@ -1304,8 +1472,8 @@ bigpcl_bucketlist(_Config) -> ok = leveled_bookie:book_close(Bookie1), {ok, Bookie2} = leveled_bookie:book_start(StartOpts1), - - {async, BucketFolder2} = + + {async, BucketFolder2} = leveled_bookie:book_headfold( Bookie2, ?RIAK_TAG, @@ -1318,11 +1486,13 @@ bigpcl_bucketlist(_Config) -> {FoldTime2, BucketList2} = timer:tc(BucketFolder2, []), true = BucketCount == sets:size(BucketList2), - io:format("Fold pre-close ~w ms post-close ~w ms~n", - [FoldTime1 div 1000, FoldTime2 div 1000]), + io:format( + "Fold pre-close ~w ms post-close ~w ms~n", + [FoldTime1 div 1000, FoldTime2 div 1000] + ), true = FoldTime1 < 10 * FoldTime2, %% The fold in-memory should be the same order of magnitude of response %% time as the fold post-persistence - ok = leveled_bookie:book_destroy(Bookie2). \ No newline at end of file + ok = leveled_bookie:book_destroy(Bookie2). diff --git a/test/end_to_end/iterator_SUITE.erl b/test/end_to_end/iterator_SUITE.erl index 19cd34dd..bc92b4c3 100644 --- a/test/end_to_end/iterator_SUITE.erl +++ b/test/end_to_end/iterator_SUITE.erl @@ -5,33 +5,35 @@ -define(KEY_ONLY, {false, undefined}). -export([all/0, init_per_suite/1, end_per_suite/1]). --export([expiring_indexes/1, - breaking_folds/1, - single_object_with2i/1, - small_load_with2i/1, - query_count/1, - multibucket_fold/1, - foldobjects_bybucket_range/1, - rotating_objects/1, - capture_and_filter_terms/1, - complex_queries/1 - ]). - -all() -> [ - expiring_indexes, - breaking_folds, - single_object_with2i, - small_load_with2i, - query_count, - multibucket_fold, - rotating_objects, - foldobjects_bybucket_range, - capture_and_filter_terms, - complex_queries - ]. +-export([ + expiring_indexes/1, + breaking_folds/1, + single_object_with2i/1, + small_load_with2i/1, + query_count/1, + multibucket_fold/1, + foldobjects_bybucket_range/1, + rotating_objects/1, + capture_and_filter_terms/1, + complex_queries/1 +]). + +all() -> + [ + expiring_indexes, + breaking_folds, + single_object_with2i, + small_load_with2i, + query_count, + multibucket_fold, + rotating_objects, + foldobjects_bybucket_range, + capture_and_filter_terms, + complex_queries + ]. init_per_suite(Config) -> - testutil:init_per_suite([{suite, "iterator"}|Config]), + testutil:init_per_suite([{suite, "iterator"} | Config]), Config. end_per_suite(Config) -> @@ -45,14 +47,16 @@ expiring_indexes(_Config) -> % the IndexSpecs are updated as part of the request. KeyCount = 50000, Future = 120, - % 2 minutes - if running tests on a slow machine, may need to increase - % this value + % 2 minutes - if running tests on a slow machine, may need to increase + % this value RootPath = testutil:reset_filestructure(), - StartOpts1 = - [{root_path, RootPath}, + StartOpts1 = + [ + {root_path, RootPath}, {max_pencillercachesize, 16000}, {max_journalobjectcount, 30000}, - {sync_strategy, testutil:sync_strategy()}], + {sync_strategy, testutil:sync_strategy()} + ], {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), SW1 = os:timestamp(), @@ -62,34 +66,36 @@ expiring_indexes(_Config) -> Indexes9 = testutil:get_randomindexes_generator(2), TempRiakObjects = testutil:generate_objects( - KeyCount, binary_uuid, [], V9, Indexes9, <<"riakBucket">>), - + KeyCount, binary_uuid, [], V9, Indexes9, <<"riakBucket">> + ), + IBKL1 = testutil:stdload_expiring(Bookie1, KeyCount, Future), lists:foreach( fun({_RN, Obj, Spc}) -> testutil:book_tempriakput( - Bookie1, Obj, Spc, leveled_util:integer_now() + Future) + Bookie1, Obj, Spc, leveled_util:integer_now() + Future + ) end, TempRiakObjects ), timer:sleep(1000), - % Wait a second after last key so that none loaded in the last second - LoadTime = timer:now_diff(os:timestamp(), SW1)/1000000, - io:format("Load of ~w std objects in ~w seconds~n", [KeyCount, LoadTime]), - + % Wait a second after last key so that none loaded in the last second + LoadTime = timer:now_diff(os:timestamp(), SW1) / 1000000, + io:format("Load of ~w std objects in ~w seconds~n", [KeyCount, LoadTime]), + timer:sleep(1000), SW2 = os:timestamp(), FilterFun = fun({I, _B, _K}) -> lists:member(I, [5, 6, 7, 8]) end, LoadedEntriesInRange = lists:sort(lists:filter(FilterFun, IBKL1)), - true = LoadTime < (Future - 20), - % need 20 seconds spare to run query - % and add replacements + true = LoadTime < (Future - 20), + % need 20 seconds spare to run query + % and add replacements {I0, B0, K0} = hd(IBKL1), - false = FilterFun(hd(IBKL1)), - % The head entry should not have index between 5 and 8 + false = FilterFun(hd(IBKL1)), + % The head entry should not have index between 5 and 8 CountI0Fold = fun() -> @@ -98,7 +104,8 @@ expiring_indexes(_Config) -> B0, {fun(_BF, _KT, Acc) -> Acc + 1 end, 0}, {<<"temp_int">>, I0, I0}, - {true, undefined}) + {true, undefined} + ) end, {async, I0Counter1} = CountI0Fold(), I0Count1 = I0Counter1(), @@ -109,10 +116,14 @@ expiring_indexes(_Config) -> Bookie1, ?RIAK_TAG, {range, <<"riakBucket">>, all}, - {fun(_B, _K, _V, Acc) -> Acc + 1 end, 0}, - false, true, false, - {testutil:convert_to_seconds(LowTS), - testutil:convert_to_seconds(HighTS)}, + {fun(_B, _K, _V, Acc) -> Acc + 1 end, 0}, + false, + true, + false, + { + testutil:convert_to_seconds(LowTS), + testutil:convert_to_seconds(HighTS) + }, false ) end, @@ -122,16 +133,17 @@ expiring_indexes(_Config) -> io:format("HeadCounts ~w before expiry~n", [HeadCounts]), {KeyCount, 0} = HeadCounts, - FoldFun = fun(BF, {IdxV, KeyF}, Acc) -> [{IdxV, BF, KeyF}|Acc] end, + FoldFun = fun(BF, {IdxV, KeyF}, Acc) -> [{IdxV, BF, KeyF} | Acc] end, InitAcc = [], - IndexFold = + IndexFold = fun() -> leveled_bookie:book_indexfold( Bookie1, B0, {FoldFun, InitAcc}, {<<"temp_int">>, 5, 8}, - {true, undefined}) + {true, undefined} + ) end, {async, Folder1} = IndexFold(), @@ -139,22 +151,26 @@ expiring_indexes(_Config) -> true = lists:sort(QR1) == LoadedEntriesInRange, % Replace object with one with an index value of 6 testutil:stdload_object( - Bookie1, B0, K0, 6, <<"value">>, leveled_util:integer_now() + 600), + Bookie1, B0, K0, 6, <<"value">>, leveled_util:integer_now() + 600 + ), % Confirm that this has reduced the index entries in I0 by 1 {async, I0Counter2} = CountI0Fold(), I0Count2 = I0Counter2(), - io:format("Count with index value ~w changed from ~w to ~w~n", - [I0, I0Count1, I0Count2]), + io:format( + "Count with index value ~w changed from ~w to ~w~n", + [I0, I0Count1, I0Count2] + ), true = I0Count2 == (I0Count1 - 1), % Now replace again, shortening the timeout to 10s, % this time index value of 6 testutil:stdload_object( - Bookie1, B0, K0, 5, <<"value">>, leveled_util:integer_now() + 10), + Bookie1, B0, K0, 5, <<"value">>, leveled_util:integer_now() + 10 + ), timer:sleep(1000), {async, Folder2} = IndexFold(), QR2 = Folder2(), io:format("Query with additional entry length ~w~n", [length(QR2)]), - true = lists:sort(QR2) == lists:sort([{5, B0, K0}|LoadedEntriesInRange]), + true = lists:sort(QR2) == lists:sort([{5, B0, K0} | LoadedEntriesInRange]), % Wait for a 10s timeout plus a second to be sure timer:sleep(10000 + 1000), {async, Folder3} = IndexFold(), @@ -164,10 +180,12 @@ expiring_indexes(_Config) -> io:format("Query results length ~w following sleep~n", [length(QR3)]), true = lists:sort(QR3) == LoadedEntriesInRange, - FoldTime = timer:now_diff(os:timestamp(), SW1)/1000000 - LoadTime, - io:format("Query returned ~w entries in ~w seconds - 3 queries + 10s wait~n", - [length(QR3), FoldTime]), - + FoldTime = timer:now_diff(os:timestamp(), SW1) / 1000000 - LoadTime, + io:format( + "Query returned ~w entries in ~w seconds - 3 queries + 10s wait~n", + [length(QR3), FoldTime] + ), + SleepTime = (Future - (timer:now_diff(os:timestamp(), SW2) div (1000 * 1000))) + 1, @@ -182,7 +200,7 @@ expiring_indexes(_Config) -> {async, Folder5} = IndexFold(), QR5 = Folder5(), io:format("QR5 Unexpired indexes of length ~w~n", [length(QR5)]), - + true = QR4 == [], true = QR5 == [], @@ -195,22 +213,24 @@ expiring_indexes(_Config) -> ok = leveled_bookie:book_close(Bookie1), testutil:reset_filestructure(). - breaking_folds(_Config) -> % Run various iterators and show that they can be broken by throwing an % exception from within the fold KeyCount = 10000, RootPath = testutil:reset_filestructure(), - StartOpts1 = [{root_path, RootPath}, - {max_journalsize, 10000000}, - {sync_strategy, testutil:sync_strategy()}], + StartOpts1 = [ + {root_path, RootPath}, + {max_journalsize, 10000000}, + {sync_strategy, testutil:sync_strategy()} + ], {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), ObjectGen = testutil:get_compressiblevalue_andinteger(), IndexGen = testutil:get_randomindexes_generator(8), ObjL1 = testutil:generate_objects( - KeyCount, binary_uuid, [], ObjectGen, IndexGen), + KeyCount, binary_uuid, [], ObjectGen, IndexGen + ), testutil:riakload(Bookie1, ObjL1), % Find all keys index, and then same again but stop at a midpoint using a @@ -218,33 +238,34 @@ breaking_folds(_Config) -> {async, IdxFolder} = leveled_bookie:book_indexfold( Bookie1, - list_to_binary("Bucket"), - {fun testutil:foldkeysfun/3, []}, + list_to_binary("Bucket"), + {fun testutil:foldkeysfun/3, []}, {<<"idx1_bin">>, <<"#">>, <<"|">>}, - {true, undefined}), + {true, undefined} + ), KeyList1 = lists:reverse(IdxFolder()), io:format("Index fold with result size ~w~n", [length(KeyList1)]), true = KeyCount == length(KeyList1), - {MidTerm, MidKey} = lists:nth(KeyCount div 2, KeyList1), - + FoldKeyThrowFun = fun(_B, {Term, Key}, Acc) -> case {Term, Key} > {MidTerm, MidKey} of true -> throw({stop_fold, Acc}); false -> - [{Term, Key}|Acc] + [{Term, Key} | Acc] end end, {async, IdxFolderToMidK} = leveled_bookie:book_indexfold( Bookie1, - list_to_binary("Bucket"), - {FoldKeyThrowFun, []}, + list_to_binary("Bucket"), + {FoldKeyThrowFun, []}, {<<"idx1_bin">>, <<"#">>, <<"|">>}, - {true, undefined}), + {true, undefined} + ), CatchingFold = fun(AsyncFolder) -> try @@ -259,14 +280,15 @@ breaking_folds(_Config) -> io:format("Index fold with result size ~w~n", [length(KeyList2)]), true = KeyCount div 2 == length(KeyList2), - HeadFoldFun = + HeadFoldFun = fun(_B, K, PO, Acc) -> {proxy_object, _MDBin, Size, _FF} = binary_to_term(PO), - [{K, Size}|Acc] + [{K, Size} | Acc] end, - {async, HeadFolder} = + {async, HeadFolder} = leveled_bookie:book_headfold( - Bookie1, ?RIAK_TAG, {HeadFoldFun, []}, true, true, false), + Bookie1, ?RIAK_TAG, {HeadFoldFun, []}, true, true, false + ), KeySizeList1 = lists:reverse(HeadFolder()), io:format("Head fold with result size ~w~n", [length(KeySizeList1)]), true = KeyCount == length(KeySizeList1), @@ -283,7 +305,7 @@ breaking_folds(_Config) -> end end end, - {async, HeadFolderToMidK} = + {async, HeadFolderToMidK} = leveled_bookie:book_headfold( Bookie1, ?RIAK_TAG, @@ -294,13 +316,13 @@ breaking_folds(_Config) -> ), KeySizeList2 = lists:reverse(CatchingFold(HeadFolderToMidK)), io:format("Head fold with result size ~w~n", [length(KeySizeList2)]), - true = KeyCount div 2 == length(KeySizeList2), + true = KeyCount div 2 == length(KeySizeList2), - ObjFoldFun = + ObjFoldFun = fun(_B, K, V, Acc) -> - [{K,byte_size(V)}|Acc] + [{K, byte_size(V)} | Acc] end, - {async, ObjectFolderKO} = + {async, ObjectFolderKO} = leveled_bookie:book_objectfold( Bookie1, ?RIAK_TAG, @@ -312,23 +334,23 @@ breaking_folds(_Config) -> io:format("Obj fold with result size ~w~n", [length(ObjSizeList1)]), true = KeyCount == length(ObjSizeList1), - {async, ObjFolderToMidK} = + {async, ObjFolderToMidK} = leveled_bookie:book_objectfold( Bookie1, - ?RIAK_TAG, - {FoldThrowFun(ObjFoldFun), []}, + ?RIAK_TAG, + {FoldThrowFun(ObjFoldFun), []}, false, key_order ), ObjSizeList2 = lists:reverse(CatchingFold(ObjFolderToMidK)), io:format("Object fold with result size ~w~n", [length(ObjSizeList2)]), - true = KeyCount div 2 == length(ObjSizeList2), + true = KeyCount div 2 == length(ObjSizeList2), % Object folds which are SQN order use a different path through the code, % so testing that here. Note that it would not make sense to have a fold % that was terminated by reaching a point in the key range .. as results % will not be passed to the fold function in key order - {async, ObjectFolderSO} = + {async, ObjectFolderSO} = leveled_bookie:book_objectfold( Bookie1, ?RIAK_TAG, @@ -352,11 +374,11 @@ breaking_folds(_Config) -> end end end, - {async, ObjFolderTo1K} = + {async, ObjFolderTo1K} = leveled_bookie:book_objectfold( Bookie1, - ?RIAK_TAG, - {FoldThrowThousandFun(ObjFoldFun), []}, + ?RIAK_TAG, + {FoldThrowThousandFun(ObjFoldFun), []}, false, sqn_order ), @@ -366,19 +388,22 @@ breaking_folds(_Config) -> ObjL2 = testutil:generate_objects( - 10, binary_uuid, [], ObjectGen, IndexGen, <<"B2">>), + 10, binary_uuid, [], ObjectGen, IndexGen, <<"B2">> + ), ObjL3 = testutil:generate_objects( - 10, binary_uuid, [], ObjectGen, IndexGen, <<"B3">>), + 10, binary_uuid, [], ObjectGen, IndexGen, <<"B3">> + ), ObjL4 = testutil:generate_objects( - 10, binary_uuid, [], ObjectGen, IndexGen, <<"B4">>), + 10, binary_uuid, [], ObjectGen, IndexGen, <<"B4">> + ), testutil:riakload(Bookie1, ObjL2), testutil:riakload(Bookie1, ObjL3), testutil:riakload(Bookie1, ObjL4), - FBAccT = {fun(B, Acc) -> [B|Acc] end, []}, - {async, BucketFolder} = + FBAccT = {fun(B, Acc) -> [B | Acc] end, []}, + {async, BucketFolder} = leveled_bookie:book_bucketlist(Bookie1, ?RIAK_TAG, FBAccT, all), BucketList1 = lists:reverse(BucketFolder()), io:format("bucket list with result size ~w~n", [length(BucketList1)]), @@ -386,7 +411,7 @@ breaking_folds(_Config) -> StopAt3Fun = fun(B, Acc) -> - Acc0 = [B|Acc], + Acc0 = [B | Acc], case B of <<"B3">> -> throw({stop_fold, Acc0}); @@ -394,10 +419,11 @@ breaking_folds(_Config) -> Acc0 end end, - - {async, StopAt3BucketFolder} = + + {async, StopAt3BucketFolder} = leveled_bookie:book_bucketlist( - Bookie1, ?RIAK_TAG, {StopAt3Fun, []}, all), + Bookie1, ?RIAK_TAG, {StopAt3Fun, []}, all + ), BucketListSA3 = lists:reverse(CatchingFold(StopAt3BucketFolder)), io:format("bucket list with result ~w~n", [BucketListSA3]), true = [<<"B2">>, <<"B3">>] == BucketListSA3, @@ -405,19 +431,22 @@ breaking_folds(_Config) -> ok = leveled_bookie:book_close(Bookie1), testutil:reset_filestructure(). - single_object_with2i(_Config) -> % Load a single object with an integer and a binary % index and query for it RootPath = testutil:reset_filestructure(), - StartOpts1 = [{root_path, RootPath}, - {max_journalsize, 5000000}, - {sync_strategy, testutil:sync_strategy()}], - % low journal size to make sure > 1 created + StartOpts1 = [ + {root_path, RootPath}, + {max_journalsize, 5000000}, + {sync_strategy, testutil:sync_strategy()} + ], + % low journal size to make sure > 1 created {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), {TestObject, _TestSpec} = testutil:generate_testobject(), - TestSpec = [{add, list_to_binary("integer_int"), 100}, - {add, list_to_binary("binary_bin"), <<100:32/integer>>}], + TestSpec = [ + {add, list_to_binary("integer_int"), 100}, + {add, list_to_binary("binary_bin"), <<100:32/integer>>} + ], ok = testutil:book_riakput(Bookie1, TestObject, TestSpec), %% @TODO replace all index queries with new Top-Level API if tests @@ -427,13 +456,13 @@ single_object_with2i(_Config) -> Bookie1, <<"Bucket1">>, {fun testutil:foldkeysfun/3, []}, - {list_to_binary("binary_bin"), - <<99:32/integer>>, <<101:32/integer>>}, - {true, undefined}), + {list_to_binary("binary_bin"), <<99:32/integer>>, <<101:32/integer>>}, + {true, undefined} + ), R1 = IdxFolder1(), io:format("R1 of ~w~n", [R1]), true = [{<<100:32/integer>>, <<"Key1">>}] == R1, - + IdxQ2 = { index_query, @@ -446,7 +475,7 @@ single_object_with2i(_Config) -> R2 = IdxFolder2(), io:format("R2 of ~w~n", [R2]), true = [{100, <<"Key1">>}] == R2, - + IdxQ3 = { index_query, @@ -459,16 +488,18 @@ single_object_with2i(_Config) -> R3 = IdxFolder3(), io:format("R2 of ~w~n", [R3]), true = [{100, <<"Key1">>}] == R3, - + ok = leveled_bookie:book_close(Bookie1), testutil:reset_filestructure(). small_load_with2i(_Config) -> RootPath = testutil:reset_filestructure(), - StartOpts1 = [{root_path, RootPath}, - {max_journalsize, 5000000}, - {sync_strategy, testutil:sync_strategy()}], - % low journal size to make sure > 1 created + StartOpts1 = [ + {root_path, RootPath}, + {max_journalsize, 5000000}, + {sync_strategy, testutil:sync_strategy()} + ], + % low journal size to make sure > 1 created {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), {TestObject, TestSpec} = testutil:generate_testobject(), ok = testutil:book_riakput(Bookie1, TestObject, TestSpec), @@ -479,12 +510,13 @@ small_load_with2i(_Config) -> IndexGen = testutil:get_randomindexes_generator(8), ObjL1 = testutil:generate_objects( - 10000, binary_uuid, [], ObjectGen, IndexGen), + 10000, binary_uuid, [], ObjectGen, IndexGen + ), testutil:riakload(Bookie1, ObjL1), ChkList1 = lists:sublist(lists:sort(ObjL1), 100), testutil:check_forlist(Bookie1, ChkList1), testutil:check_forobject(Bookie1, TestObject), - + % Find all keys index, and then just the last key IdxQ1 = { @@ -510,24 +542,25 @@ small_load_with2i(_Config) -> KeyList2 = lists:usort(IdxFolderLK()), io:format("List should be last key ~w ~w~n", [LastKey, KeyList2]), true = 1 == length(KeyList2), - + %% Delete the objects from the ChkList removing the indexes lists:foreach( fun({_RN, Obj, Spc}) -> DSpc = - lists:map(fun({add, F, T}) -> {remove, F, T} end, Spc), + lists:map(fun({add, F, T}) -> {remove, F, T} end, Spc), {B, K} = {testutil:get_bucket(Obj), testutil:get_key(Obj)}, testutil:book_riakdelete(Bookie1, B, K, DSpc) - end, + end, ChkList1 ), %% Get the Buckets Keys and Hashes for the whole bucket FoldObjectsFun = - fun(B, K, V, Acc) -> [{B, K, erlang:phash2(V)}|Acc] end, + fun(B, K, V, Acc) -> [{B, K, erlang:phash2(V)} | Acc] end, {async, HTreeF1} = leveled_bookie:book_objectfold( - Bookie1, ?RIAK_TAG, {FoldObjectsFun, []}, false), + Bookie1, ?RIAK_TAG, {FoldObjectsFun, []}, false + ), KeyHashList1 = HTreeF1(), {async, HTreeF2} = @@ -542,65 +575,74 @@ small_load_with2i(_Config) -> <<"Bucket">>, {<<"idx1_bin">>, <<"#">>, <<"|">>}, {FoldObjectsFun, []}, - false), + false + ), KeyHashList3 = HTreeF3(), - true = 9901 == length(KeyHashList1), % also includes the test object + % also includes the test object + true = 9901 == length(KeyHashList1), true = 9900 == length(KeyHashList2), true = 9900 == length(KeyHashList3), - + SumIntFun = fun(_B, _K, Obj, Acc) -> {I, _Bin} = testutil:get_value(Obj), Acc + I end, - BucketObjQ = - {foldobjects_bybucket, ?RIAK_TAG, <<"Bucket">>, all, {SumIntFun, 0}, true}, + BucketObjQ = + {foldobjects_bybucket, ?RIAK_TAG, <<"Bucket">>, all, {SumIntFun, 0}, + true}, {async, Sum1} = leveled_bookie:book_returnfolder(Bookie1, BucketObjQ), Total1 = Sum1(), io:format("Total from summing all I is ~w~n", [Total1]), - SumFromObjLFun = + SumFromObjLFun = fun(Obj, Acc) -> {I, _Bin} = testutil:get_value_from_objectlistitem(Obj), Acc + I end, - ObjL1Total = + ObjL1Total = lists:foldl(SumFromObjLFun, 0, ObjL1), - ChkList1Total = + ChkList1Total = lists:foldl(SumFromObjLFun, 0, ChkList1), io:format( - "Total in original object list ~w and from removed list ~w~n", - [ObjL1Total, ChkList1Total]), + "Total in original object list ~w and from removed list ~w~n", + [ObjL1Total, ChkList1Total] + ), + + Total1 = ObjL1Total - ChkList1Total, - Total1 = ObjL1Total - ChkList1Total, - ok = leveled_bookie:book_close(Bookie1), - + {ok, Bookie2} = leveled_bookie:book_start(StartOpts1), - + {async, Sum2} = leveled_bookie:book_returnfolder(Bookie2, BucketObjQ), Total2 = Sum2(), - true = Total2 == Total1, - + true = Total2 == Total1, + FoldBucketsFun = fun(B, Acc) -> sets:add_element(B, Acc) end, - % this should find Bucket and Bucket1 - as we can now find string-based + % this should find Bucket and Bucket1 - as we can now find string-based % buckets using bucket_list - i.e. it isn't just binary buckets now - {async, BL} = leveled_bookie:book_bucketlist(Bookie2, ?RIAK_TAG, {FoldBucketsFun, sets:new()}, all), + {async, BL} = leveled_bookie:book_bucketlist( + Bookie2, ?RIAK_TAG, {FoldBucketsFun, sets:new()}, all + ), true = sets:size(BL()) == 2, ok = leveled_bookie:book_close(Bookie2), testutil:reset_filestructure(). - query_count(_Config) -> RootPath = testutil:reset_filestructure(), {ok, Book1} = leveled_bookie:book_start( - RootPath, 2000, 50000000, testutil:sync_strategy()), + RootPath, 2000, 50000000, testutil:sync_strategy() + ), BucketBin = list_to_binary("Bucket"), {TestObject, TestSpec} = testutil:generate_testobject( - BucketBin, term_to_binary("Key1"), <<"Value1">>, [], [{<<"MDK1">>, <<"MDV1">>}]), + BucketBin, term_to_binary("Key1"), <<"Value1">>, [], [ + {<<"MDK1">>, <<"MDV1">>} + ] + ), ok = testutil:book_riakput(Book1, TestObject, TestSpec), testutil:check_forobject(Book1, TestObject), testutil:check_formissingobject(Book1, <<"Bucket1">>, <<"Key2">>), @@ -616,9 +658,11 @@ query_count(_Config) -> io:format( "Put of 10000 objects with 8 index entries " "each completed in ~w microseconds~n", - [timer:now_diff(os:timestamp(), SW)]) + [timer:now_diff(os:timestamp(), SW)] + ) end, - lists:seq(1, 8)), + lists:seq(1, 8) + ), testutil:check_forobject(Book1, TestObject), Total = lists:foldl( @@ -626,24 +670,27 @@ query_count(_Config) -> IdxF = "idx" ++ integer_to_list(X) ++ "_bin", T = count_termsonindex( - BucketBin, list_to_binary(IdxF), Book1, ?KEY_ONLY), + BucketBin, list_to_binary(IdxF), Book1, ?KEY_ONLY + ), io:format("~w terms found on index ~s~n", [T, IdxF]), Acc + T end, 0, - lists:seq(1, 8)), + lists:seq(1, 8) + ), true = Total == 640000, Index1Count = count_termsonindex( - BucketBin, <<"idx1_bin">>, Book1, ?KEY_ONLY), + BucketBin, <<"idx1_bin">>, Book1, ?KEY_ONLY + ), - TermCountFun = + TermCountFun = fun(_B, {T, _K}, Acc) -> Cnt = maps:get(T, Acc, 0), maps:put(T, Cnt, Acc) end, - {async, TermRunner} = + {async, TermRunner} = leveled_bookie:book_returnfolder( Book1, { @@ -659,7 +706,7 @@ query_count(_Config) -> io:format("TermCounts ~0p", [TermCounts]), lists:foreach( fun(T) -> - {async, TR0} = + {async, TR0} = leveled_bookie:book_returnfolder( Book1, { @@ -680,10 +727,12 @@ query_count(_Config) -> ok = leveled_bookie:book_close(Book1), {ok, Book2} = leveled_bookie:book_start( - RootPath, 1000, 50000000, testutil:sync_strategy()), + RootPath, 1000, 50000000, testutil:sync_strategy() + ), Index1Count = count_termsonindex( - BucketBin, <<"idx1_bin">>, Book2, ?KEY_ONLY), + BucketBin, <<"idx1_bin">>, Book2, ?KEY_ONLY + ), NameList = testutil:name_list(), TotalNameByName = lists:foldl( @@ -696,33 +745,30 @@ query_count(_Config) -> BucketBin, list_to_binary("idx1_bin"), Book2, - {false, Regex}), + {false, Regex} + ), TD = timer:now_diff(os:timestamp(), SW), io:format( "~w terms found on index idx1 with a " "regex in ~w microseconds~n", - [T, TD]), + [T, TD] + ), Acc + T end, 0, - NameList), + NameList + ), true = TotalNameByName == Index1Count, {ok, RegMia} = re:compile("[0-9]+Mia"), - Query1 = {index_query, - BucketBin, - {fun testutil:foldkeysfun/3, []}, - {<<"idx2_bin">>, <<"2000">>, <<"2000|">>}, - {false, RegMia}}, - {async, - Mia2KFolder1} = leveled_bookie:book_returnfolder(Book2, Query1), + Query1 = + {index_query, BucketBin, {fun testutil:foldkeysfun/3, []}, + {<<"idx2_bin">>, <<"2000">>, <<"2000|">>}, {false, RegMia}}, + {async, Mia2KFolder1} = leveled_bookie:book_returnfolder(Book2, Query1), Mia2000Count1 = length(Mia2KFolder1()), - Query2 = {index_query, - BucketBin, - {fun testutil:foldkeysfun/3, []}, - {<<"idx2_bin">>, <<"2000">>, <<"2001">>}, - {true, undefined}}, - {async, - Mia2KFolder2} = leveled_bookie:book_returnfolder(Book2, Query2), + Query2 = + {index_query, BucketBin, {fun testutil:foldkeysfun/3, []}, + {<<"idx2_bin">>, <<"2000">>, <<"2001">>}, {true, undefined}}, + {async, Mia2KFolder2} = leveled_bookie:book_returnfolder(Book2, Query2), Mia2000Count2 = lists:foldl( fun({Term, _Key}, Acc) -> @@ -734,46 +780,44 @@ query_count(_Config) -> end end, 0, - Mia2KFolder2()), - ok = case Mia2000Count2 of - Mia2000Count1 when Mia2000Count1 > 0 -> - io:format("Mia2000 counts match at ~w~n", - [Mia2000Count1]), - ok - end, + Mia2KFolder2() + ), + ok = + case Mia2000Count2 of + Mia2000Count1 when Mia2000Count1 > 0 -> + io:format( + "Mia2000 counts match at ~w~n", + [Mia2000Count1] + ), + ok + end, {ok, RxMia2K} = leveled_util:regex_compile("^2000[0-9]+Mia"), - Query3 = {index_query, - BucketBin, - {fun testutil:foldkeysfun/3, []}, - {<<"idx2_bin">>, <<"1980">>, <<"2100">>}, - {false, RxMia2K}}, - {async, Mia2KFolder3} = leveled_bookie:book_returnfolder(Book2, Query3), + Query3 = + {index_query, BucketBin, {fun testutil:foldkeysfun/3, []}, + {<<"idx2_bin">>, <<"1980">>, <<"2100">>}, {false, RxMia2K}}, + {async, Mia2KFolder3} = leveled_bookie:book_returnfolder(Book2, Query3), Mia2000Count1 = length(Mia2KFolder3()), {ok, RxMia2KPCRE} = re:compile("^2000[0-9]+Mia"), Query3PCRE = - {index_query, - BucketBin, - {fun testutil:foldkeysfun/3, []}, - {<<"idx2_bin">>, <<"1980">>, <<"2100">>}, - {false, RxMia2KPCRE}}, + {index_query, BucketBin, {fun testutil:foldkeysfun/3, []}, + {<<"idx2_bin">>, <<"1980">>, <<"2100">>}, {false, RxMia2KPCRE}}, {async, Mia2KFolder3PCRE} = leveled_bookie:book_returnfolder(Book2, Query3PCRE), Mia2000Count1 = length(Mia2KFolder3PCRE()), - + V9 = testutil:get_compressiblevalue(), Indexes9 = testutil:get_randomindexes_generator(8), [{_RN, Obj9, Spc9}] = testutil:generate_objects( - 1, binary_uuid, [], V9, Indexes9), + 1, binary_uuid, [], V9, Indexes9 + ), ok = testutil:book_riakput(Book2, Obj9, Spc9), R9 = lists:map( fun({add, IdxF, IdxT}) -> - Q = {index_query, - BucketBin, - {fun testutil:foldkeysfun/3, []}, - {IdxF, IdxT, IdxT}, - ?KEY_ONLY}, + Q = + {index_query, BucketBin, {fun testutil:foldkeysfun/3, []}, + {IdxF, IdxT, IdxT}, ?KEY_ONLY}, R = leveled_bookie:book_returnfolder(Book2, Q), {async, Fldr} = R, case length(Fldr()) of @@ -781,37 +825,37 @@ query_count(_Config) -> {IdxF, IdxT, X} end end, - Spc9), - Spc9Del = lists:map(fun({add, IdxF, IdxT}) -> {remove, IdxF, IdxT} end, - Spc9), + Spc9 + ), + Spc9Del = lists:map( + fun({add, IdxF, IdxT}) -> {remove, IdxF, IdxT} end, + Spc9 + ), ok = testutil:book_riakput(Book2, Obj9, Spc9Del), lists:foreach( fun({IdxF, IdxT, X}) -> - Q = {index_query, - BucketBin, - {fun testutil:foldkeysfun/3, []}, - {IdxF, IdxT, IdxT}, - ?KEY_ONLY}, + Q = + {index_query, BucketBin, {fun testutil:foldkeysfun/3, []}, + {IdxF, IdxT, IdxT}, ?KEY_ONLY}, R = leveled_bookie:book_returnfolder(Book2, Q), {async, Fldr} = R, case length(Fldr()) of Y -> Y = X - 1 end - end, + end, R9 ), ok = leveled_bookie:book_close(Book2), {ok, Book3} = leveled_bookie:book_start( - RootPath, 2000, 50000000, testutil:sync_strategy()), + RootPath, 2000, 50000000, testutil:sync_strategy() + ), lists:foreach( fun({IdxF, IdxT, X}) -> - Q = {index_query, - BucketBin, - {fun testutil:foldkeysfun/3, []}, - {IdxF, IdxT, IdxT}, - ?KEY_ONLY}, + Q = + {index_query, BucketBin, {fun testutil:foldkeysfun/3, []}, + {IdxF, IdxT, IdxT}, ?KEY_ONLY}, R = leveled_bookie:book_returnfolder(Book3, Q), {async, Fldr} = R, case length(Fldr()) of @@ -819,19 +863,19 @@ query_count(_Config) -> Y = X - 1 end end, - R9), + R9 + ), ok = testutil:book_riakput(Book3, Obj9, Spc9), ok = leveled_bookie:book_close(Book3), {ok, Book4} = leveled_bookie:book_start( - RootPath, 2000, 50000000, testutil:sync_strategy()), + RootPath, 2000, 50000000, testutil:sync_strategy() + ), lists:foreach( fun({IdxF, IdxT, X}) -> - Q = {index_query, - BucketBin, - {fun testutil:foldkeysfun/3, []}, - {IdxF, IdxT, IdxT}, - ?KEY_ONLY}, + Q = + {index_query, BucketBin, {fun testutil:foldkeysfun/3, []}, + {IdxF, IdxT, IdxT}, ?KEY_ONLY}, R = leveled_bookie:book_returnfolder(Book4, Q), {async, Fldr} = R, case length(Fldr()) of @@ -839,56 +883,64 @@ query_count(_Config) -> ok end end, - R9), + R9 + ), testutil:check_forobject(Book4, TestObject), - + FoldBucketsFun = fun(B, Acc) -> sets:add_element(B, Acc) end, - BucketListQuery = {bucket_list, - ?RIAK_TAG, - {FoldBucketsFun, sets:new()}}, + BucketListQuery = {bucket_list, ?RIAK_TAG, {FoldBucketsFun, sets:new()}}, {async, BLF1} = leveled_bookie:book_returnfolder(Book4, BucketListQuery), SW_QA = os:timestamp(), BucketSet1 = BLF1(), - io:format("Bucket set returned in ~w microseconds", - [timer:now_diff(os:timestamp(), SW_QA)]), - - true = sets:size(BucketSet1) == 1, - + io:format( + "Bucket set returned in ~w microseconds", + [timer:now_diff(os:timestamp(), SW_QA)] + ), + + true = sets:size(BucketSet1) == 1, + ObjList10A = testutil:generate_objects( - 5000, binary_uuid, [], V9, Indexes9, <<"BucketA">>), + 5000, binary_uuid, [], V9, Indexes9, <<"BucketA">> + ), ObjList10B = testutil:generate_objects( - 5000, binary_uuid, [], V9, Indexes9, <<"BucketB">>), + 5000, binary_uuid, [], V9, Indexes9, <<"BucketB">> + ), ObjList10C = testutil:generate_objects( - 5000, binary_uuid, [], V9, Indexes9, <<"BucketC">>), + 5000, binary_uuid, [], V9, Indexes9, <<"BucketC">> + ), testutil:riakload(Book4, ObjList10A), testutil:riakload(Book4, ObjList10B), testutil:riakload(Book4, ObjList10C), {async, BLF2} = leveled_bookie:book_returnfolder(Book4, BucketListQuery), SW_QB = os:timestamp(), BucketSet2 = BLF2(), - io:format("Bucket set returned in ~w microseconds", - [timer:now_diff(os:timestamp(), SW_QB)]), + io:format( + "Bucket set returned in ~w microseconds", + [timer:now_diff(os:timestamp(), SW_QB)] + ), true = sets:size(BucketSet2) == 4, - + ok = leveled_bookie:book_close(Book4), - + {ok, Book5} = leveled_bookie:book_start( - RootPath, 2000, 50000000, testutil:sync_strategy()), + RootPath, 2000, 50000000, testutil:sync_strategy() + ), {async, BLF3} = leveled_bookie:book_returnfolder(Book5, BucketListQuery), SW_QC = os:timestamp(), BucketSet3 = BLF3(), - io:format("Bucket set returned in ~w microseconds", - [timer:now_diff(os:timestamp(), SW_QC)]), + io:format( + "Bucket set returned in ~w microseconds", + [timer:now_diff(os:timestamp(), SW_QC)] + ), true = sets:size(BucketSet3) == 4, - + ok = leveled_bookie:book_close(Book5), - - testutil:reset_filestructure(). + testutil:reset_filestructure(). capture_and_filter_terms(_Config) -> RootPath = testutil:reset_filestructure(), @@ -896,7 +948,8 @@ capture_and_filter_terms(_Config) -> IdxName = <<"people_bin">>, {ok, Book1} = leveled_bookie:book_start( - RootPath, 2000, 50000000, testutil:sync_strategy()), + RootPath, 2000, 50000000, testutil:sync_strategy() + ), V1 = <<"V1">>, IndexGen = fun() -> @@ -904,7 +957,8 @@ capture_and_filter_terms(_Config) -> end, ObjL1 = testutil:generate_objects( - 100000, binary_uuid, [], V1, IndexGen, Bucket), + 100000, binary_uuid, [], V1, IndexGen, Bucket + ), testutil:riakload(Book1, ObjL1), StartDoB = <<"19740301">>, @@ -916,13 +970,10 @@ capture_and_filter_terms(_Config) -> SW0 = os:timestamp(), {ok, WillowLeedsPCRE} = re:compile(WillowLeedsFinder), - + QueryPCRE0 = - {index_query, - {Bucket, null}, - {fun testutil:foldkeysfun/3, []}, - {IdxName, <<"M">>, <<"Z">>}, - {true, WillowLeedsPCRE}}, + {index_query, {Bucket, null}, {fun testutil:foldkeysfun/3, []}, + {IdxName, <<"M">>, <<"Z">>}, {true, WillowLeedsPCRE}}, {async, Runner0} = leveled_bookie:book_returnfolder(Book1, QueryPCRE0), Results0 = Runner0(), BornMid70s0 = @@ -946,8 +997,8 @@ capture_and_filter_terms(_Config) -> ), SW1 = os:timestamp(), - - WillowLeedsExtractor = + + WillowLeedsExtractor = "[^\\|]*\\|(?P[0-9]{8})\\|[0-9]{0,8}\\|[^\\|]*#Willow[^\\|]*\\|" "[^\\|]*#LS[^\\|]*", FilterFun1 = @@ -960,14 +1011,11 @@ capture_and_filter_terms(_Config) -> "regex($term, :regex, pcre, ($dob))", #{<<"regex">> => list_to_binary(WillowLeedsExtractor)} ), - + QueryPCRE1 = - {index_query, - {Bucket, null}, - {fun testutil:foldkeysfun/3, []}, + {index_query, {Bucket, null}, {fun testutil:foldkeysfun/3, []}, {IdxName, <<"M">>, <<"Z">>}, - {false, {query, EvalFunPCRE, FilterFun1}} - }, + {false, {query, EvalFunPCRE, FilterFun1}}}, {async, RunnerPCRE1} = leveled_bookie:book_returnfolder(Book1, QueryPCRE1), BornMid70sPCRE1 = RunnerPCRE1(), @@ -979,12 +1027,9 @@ capture_and_filter_terms(_Config) -> #{<<"regex">> => list_to_binary(WillowLeedsExtractor)} ), QueryRE2_2 = - {index_query, - {Bucket, null}, - {fun testutil:foldkeysfun/3, []}, + {index_query, {Bucket, null}, {fun testutil:foldkeysfun/3, []}, {IdxName, <<"M">>, <<"Z">>}, - {false, {query, EvalFunRE2, FilterFun1}} - }, + {false, {query, EvalFunRE2, FilterFun1}}}, {async, RunnerRE2_2} = leveled_bookie:book_returnfolder(Book1, QueryRE2_2), BornMid70sRE2_2 = RunnerRE2_2(), @@ -992,12 +1037,9 @@ capture_and_filter_terms(_Config) -> AllFun = fun(_) -> true end, QueryRE2_3 = - {index_query, - {Bucket, null}, - {fun testutil:foldkeysfun/3, []}, + {index_query, {Bucket, null}, {fun testutil:foldkeysfun/3, []}, {IdxName, <<"M">>, <<"Z">>}, - {<<"dob">>, {query, EvalFunRE2, AllFun}} - }, + {<<"dob">>, {query, EvalFunRE2, AllFun}}}, {async, RunnerRE2_3} = leveled_bookie:book_returnfolder(Book1, QueryRE2_3), Results3 = RunnerRE2_3(), BornMid70sRE2_3 = @@ -1015,7 +1057,7 @@ capture_and_filter_terms(_Config) -> SW4 = os:timestamp(), - WillowLeedsDoubleExtractor = + WillowLeedsDoubleExtractor = "[^\\|]*\\|(?P[0-9]{8})\\|(?P[0-9]{0,8})\\|" "[^\\|]*#Willow[^\\|]*\\|[^\\|]*#LS[^\\|]*", EvalFunRE2_2 = @@ -1023,31 +1065,25 @@ capture_and_filter_terms(_Config) -> "regex($term, :regex, pcre, ($dob, $dod))", #{<<"regex">> => list_to_binary(WillowLeedsDoubleExtractor)} ), - + FilterFun2 = fun(Captures) -> DoB = maps:get(<<"dob">>, Captures, notfound), (DoB >= StartDoB) andalso (DoB =< EndDoB) end, QueryRE2_4 = - {index_query, - {Bucket, null}, - {fun testutil:foldkeysfun/3, []}, + {index_query, {Bucket, null}, {fun testutil:foldkeysfun/3, []}, {IdxName, <<"M">>, <<"Z">>}, - {false, {query, EvalFunRE2_2, FilterFun2}} - }, + {false, {query, EvalFunRE2_2, FilterFun2}}}, {async, RunnerRE2_4} = leveled_bookie:book_returnfolder(Book1, QueryRE2_4), BornMid70sRE2_4 = RunnerRE2_4(), - + SW5 = os:timestamp(), QueryRE2_5 = - {index_query, - {Bucket, null}, - {fun testutil:foldkeysfun/3, []}, + {index_query, {Bucket, null}, {fun testutil:foldkeysfun/3, []}, {IdxName, <<"M">>, <<"Z">>}, - {true, {query, EvalFunRE2, FilterFun1}} - }, + {true, {query, EvalFunRE2, FilterFun1}}}, {async, RunnerRE2_5} = leveled_bookie:book_returnfolder(Book1, QueryRE2_5), {ok, WillowLeedsExtractorRE} = re:compile(WillowLeedsExtractor), BornMid70sRE2_5 = @@ -1057,21 +1093,19 @@ capture_and_filter_terms(_Config) -> leveled_util:regex_run(T, WillowLeedsExtractorRE, []), {true, K} end, - RunnerRE2_5()), + RunnerRE2_5() + ), SW8 = os:timestamp(), - + FilterExpression1 = "($dob BETWEEN \"19740301\" AND \"19761030\")", FilterFun5 = leveled_filter:generate_filter_function(FilterExpression1, maps:new()), - + QueryRE2_8 = - {index_query, - {Bucket, null}, - {fun testutil:foldkeysfun/3, []}, + {index_query, {Bucket, null}, {fun testutil:foldkeysfun/3, []}, {IdxName, <<"M">>, <<"Z">>}, - {false, {query, EvalFunRE2, FilterFun5}} - }, + {false, {query, EvalFunRE2, FilterFun5}}}, {async, RunnerRE2_8} = leveled_bookie:book_returnfolder(Book1, QueryRE2_8), BornMid70sRE2_8 = RunnerRE2_8(), @@ -1086,20 +1120,17 @@ capture_and_filter_terms(_Config) -> "regex($term, :regex, pcre, ($dob))", #{<<"regex">> => list_to_binary(PreFilterRE)} ), - + QueryRE2_9 = - {index_query, - {Bucket, null}, - {fun testutil:foldkeysfun/3, []}, + {index_query, {Bucket, null}, {fun testutil:foldkeysfun/3, []}, {IdxName, <<"M">>, <<"Z">>}, - {false, {query, PreFilterEvalFun, FilterFun5}} - }, + {false, {query, PreFilterEvalFun, FilterFun5}}}, {async, RunnerRE2_9} = leveled_bookie:book_returnfolder(Book1, QueryRE2_9), BornMid70sRE2_9 = RunnerRE2_9(), SW10 = os:timestamp(), - WillowLeedsExtractor = + WillowLeedsExtractor = "[^\\|]*\\|(?P[0-9]{8})\\|[0-9]{0,8}\\|[^\\|]*#Willow[^\\|]*\\|" "[^\\|]*#LS[^\\|]*", @@ -1114,13 +1145,12 @@ capture_and_filter_terms(_Config) -> maps:new() ), QueryRE2_10 = - {index_query, - {Bucket, null}, - {fun testutil:foldkeysfun/3, []}, + {index_query, {Bucket, null}, {fun testutil:foldkeysfun/3, []}, {IdxName, <<"M">>, <<"Z">>}, - {false, {query, EvalFun2, FilterFun6}} - }, - {async, RunnerRE2_10} = leveled_bookie:book_returnfolder(Book1, QueryRE2_10), + {false, {query, EvalFun2, FilterFun6}}}, + {async, RunnerRE2_10} = leveled_bookie:book_returnfolder( + Book1, QueryRE2_10 + ), BornMid70sRE2_10 = RunnerRE2_10(), SW11 = os:timestamp(), @@ -1138,43 +1168,47 @@ capture_and_filter_terms(_Config) -> maybe_log_toscreen( "~nFilter outside took ~w ms~n", - [timer:now_diff(SW1, SW0) div 1000]), + [timer:now_diff(SW1, SW0) div 1000] + ), maybe_log_toscreen( "~nPCRE Capture filter inside took ~w ms~n", - [timer:now_diff(SW2, SW1) div 1000]), + [timer:now_diff(SW2, SW1) div 1000] + ), maybe_log_toscreen( "~nRE2 Capture filter inside took ~w ms~n", - [timer:now_diff(SW3, SW2) div 1000]), + [timer:now_diff(SW3, SW2) div 1000] + ), maybe_log_toscreen( "~nRE2 Capture filter outside took ~w ms~n", - [timer:now_diff(SW4, SW3) div 1000]), + [timer:now_diff(SW4, SW3) div 1000] + ), maybe_log_toscreen( "~nRE2 double-capture filter outside took ~w ms~n", - [timer:now_diff(SW5, SW4) div 1000]), + [timer:now_diff(SW5, SW4) div 1000] + ), maybe_log_toscreen( "~nRE2 single-capture filter with parsed filter expression took ~w ms~n", - [timer:now_diff(SW9, SW8) div 1000]), + [timer:now_diff(SW9, SW8) div 1000] + ), maybe_log_toscreen( "~nRE2 single-capture pre-filter with parsed query string took ~w ms~n", - [timer:now_diff(SW10, SW9) div 1000]), + [timer:now_diff(SW10, SW9) div 1000] + ), maybe_log_toscreen( "~nEval processed index with parsed filter expression took ~w ms~n", - [timer:now_diff(SW11, SW10) div 1000]), - + [timer:now_diff(SW11, SW10) div 1000] + ), QueryRE2_3_WrongCapture = - {index_query, - {Bucket, null}, - {fun testutil:foldkeysfun/3, []}, + {index_query, {Bucket, null}, {fun testutil:foldkeysfun/3, []}, {IdxName, <<"M">>, <<"Z">>}, - {<<"gns">>, {query, EvalFunRE2, FilterFun6}} - }, + {<<"gns">>, {query, EvalFunRE2, FilterFun6}}}, {async, RunnerRE2_3_WC} = leveled_bookie:book_returnfolder(Book1, QueryRE2_3_WrongCapture), true = [] == RunnerRE2_3_WC(), ok = leveled_bookie:book_close(Book1), - + testutil:reset_filestructure(). maybe_log_toscreen(Log, Subs) -> @@ -1194,7 +1228,8 @@ complex_queries(_Config) -> IdxFullData = <<"fulldata_bin">>, {ok, Book1} = leveled_bookie:book_start( - RootPath, 2000, 50000000, testutil:sync_strategy()), + RootPath, 2000, 50000000, testutil:sync_strategy() + ), V1 = <<"V1">>, IndexGen = fun() -> @@ -1216,7 +1251,8 @@ complex_queries(_Config) -> PCIdx3 = set_index_term(PC3, DoB, DoD), FullIdx = set_full_index_term( - FN, DoB, DoD, GN1, GN2, GN3, PC1, PC2, PC3), + FN, DoB, DoD, GN1, GN2, GN3, PC1, PC2, PC3 + ), [ {add, IdxFamilyName, FNIdx1}, {add, IdxGivenName, GNIdx1}, @@ -1230,7 +1266,8 @@ complex_queries(_Config) -> end, ObjL1 = testutil:generate_objects( - KeyCount, binary_uuid, [], V1, IndexGen, Bucket), + KeyCount, binary_uuid, [], V1, IndexGen, Bucket + ), testutil:riakload(Book1, ObjL1), DoBLow = <<"19730930">>, @@ -1242,18 +1279,21 @@ complex_queries(_Config) -> FullIndexEvalFun = leveled_eval:generate_eval_function( "delim($term, \"|\", ($fn, $dob, $dod, $gns, $pcs))", - maps:new()), + maps:new() + ), FilterString = "($dob BETWEEN :doblow AND :dobhigh) AND (contains($gcs, :givenname) " "OR contains($pcs, :postcode))", FullIndexFilterFun = leveled_filter:generate_filter_function( FilterString, - #{<<"doblow">> => DoBLow, + #{ + <<"doblow">> => DoBLow, <<"dobhigh">> => DobHigh, <<"givenname">> => GivenName, <<"postcode">> => PostCode - }), + } + ), {async, FullR0} = leveled_bookie:book_indexfold( Book1, @@ -1269,24 +1309,23 @@ complex_queries(_Config) -> SplitIndexEvalFun = leveled_eval:generate_eval_function( "delim($term, \"|\", ($sk, $dob, $dod))", - maps:new()), + maps:new() + ), SplitIndexFilterFun = leveled_filter:generate_filter_function( "$dob BETWEEN :doblow AND :dobhigh", - #{<<"doblow">> => DoBLow, <<"dobhigh">> => DobHigh}), + #{<<"doblow">> => DoBLow, <<"dobhigh">> => DobHigh} + ), Q1 = - {IdxFamilyName, - <<"Sm">>, <<"Sm~">>, + {IdxFamilyName, <<"Sm">>, <<"Sm~">>, {query, SplitIndexEvalFun, SplitIndexFilterFun}}, Q2 = - {IdxGivenName, - <<"Willow">>, <<"Willow#">>, + {IdxGivenName, <<"Willow">>, <<"Willow#">>, {query, SplitIndexEvalFun, SplitIndexFilterFun}}, Q3 = - {IdxPostCode, - <<"LS8 ">>, <<"LS8#">>, + {IdxPostCode, <<"LS8 ">>, <<"LS8#">>, {query, SplitIndexEvalFun, SplitIndexFilterFun}}, - + ComboFun = leveled_setop:generate_setop_function("$1 INTERSECT ($2 UNION $3)"), @@ -1296,7 +1335,8 @@ complex_queries(_Config) -> Bucket, {fun testutil:foldkeysfun/3, []}, [{1, Q1}, {2, Q2}, {3, Q3}], - ComboFun), + ComboFun + ), STSplit0 = os:system_time(millisecond), SplitKL0 = lists:sort(SplitR0()), print_query_results(STSplit0, multi_index, SplitKL0), @@ -1304,7 +1344,7 @@ complex_queries(_Config) -> true = FullKL0 == SplitKL0, ok = leveled_bookie:book_close(Book1), - + testutil:reset_filestructure(). print_query_results(ST, QT, Results) -> @@ -1319,16 +1359,20 @@ set_index_term(SortKey, DoB, DoD) -> lists:flatten( io_lib:format( "~s|~s|~s", - [SortKey, DoB, DoD]) - )). + [SortKey, DoB, DoD] + ) + ) + ). set_full_index_term(FN, DoB, DoD, GN1, GN2, GN3, PC1, PC2, PC3) -> list_to_binary( lists:flatten( io_lib:format( "~s|~s|~s|#~s#~s#~s|#~s#~s#~s", - [FN, DoB, DoD, GN1, GN2, GN3, PC1, PC2, PC3]) - )). + [FN, DoB, DoD, GN1, GN2, GN3, PC1, PC2, PC3] + ) + ) + ). count_termsonindex(Bucket, IdxField, Book, QType) -> lists:foldl( @@ -1337,27 +1381,28 @@ count_termsonindex(Bucket, IdxField, Book, QType) -> ST = list_to_binary(integer_to_list(X)), Pipe = <<"|">>, ET = <>, - Q = {index_query, - Bucket, - {fun testutil:foldkeysfun/3, []}, - {IdxField, ST, ET}, - QType}, + Q = + {index_query, Bucket, {fun testutil:foldkeysfun/3, []}, + {IdxField, ST, ET}, QType}, R = leveled_bookie:book_returnfolder(Book, Q), {async, Folder} = R, Items = length(Folder()), io:format( "2i query from term ~s on index ~s took ~w microseconds~n", - [ST, IdxField, timer:now_diff(os:timestamp(), SW)]), + [ST, IdxField, timer:now_diff(os:timestamp(), SW)] + ), Acc + Items end, 0, - lists:seq(190, 221)). + lists:seq(190, 221) + ). multibucket_fold(_Config) -> RootPath = testutil:reset_filestructure(), {ok, Bookie1} = leveled_bookie:book_start( - RootPath, 2000, 50000000, testutil:sync_strategy()), + RootPath, 2000, 50000000, testutil:sync_strategy() + ), ObjectGen = <<"V1">>, IndexGen = fun() -> [] end, B1 = {<<"Type1">>, <<"Bucket1">>}, @@ -1366,32 +1411,37 @@ multibucket_fold(_Config) -> B4 = {<<"Type2">>, <<"Bucket4">>}, ObjL1 = testutil:generate_objects( - 13000, binary_uuid, [], ObjectGen, IndexGen, B1), + 13000, binary_uuid, [], ObjectGen, IndexGen, B1 + ), testutil:riakload(Bookie1, ObjL1), ObjL2 = testutil:generate_objects( - 17000, binary_uuid, [], ObjectGen, IndexGen, B2), + 17000, binary_uuid, [], ObjectGen, IndexGen, B2 + ), testutil:riakload(Bookie1, ObjL2), ObjL3 = testutil:generate_objects( - 7000, binary_uuid, [], ObjectGen, IndexGen, B3), + 7000, binary_uuid, [], ObjectGen, IndexGen, B3 + ), testutil:riakload(Bookie1, ObjL3), ObjL4 = testutil:generate_objects( - 23000, binary_uuid, [], ObjectGen, IndexGen, B4), + 23000, binary_uuid, [], ObjectGen, IndexGen, B4 + ), testutil:riakload(Bookie1, ObjL4), FF = fun(B, K, _PO, Acc) -> - [{B, K}|Acc] - end, + [{B, K} | Acc] + end, FoldAccT = {FF, []}, - {async, R1} = + {async, R1} = leveled_bookie:book_headfold( Bookie1, ?RIAK_TAG, - {bucket_list, - [{<<"Type1">>, <<"Bucket1">>}, {<<"Type2">>, <<"Bucket4">>}]}, + {bucket_list, [ + {<<"Type1">>, <<"Bucket1">>}, {<<"Type2">>, <<"Bucket4">>} + ]}, FoldAccT, false, true, @@ -1400,13 +1450,13 @@ multibucket_fold(_Config) -> O1 = length(R1()), io:format("Result R1 of length ~w~n", [O1]), - - {async, R2} = + + {async, R2} = leveled_bookie:book_headfold( Bookie1, ?RIAK_TAG, - {bucket_list, [<<"Bucket2">>, <<"Bucket3">>]}, - {fun(_B, _K, _PO, Acc) -> Acc +1 end, 0}, + {bucket_list, [<<"Bucket2">>, <<"Bucket3">>]}, + {fun(_B, _K, _PO, Acc) -> Acc + 1 end, 0}, false, true, false @@ -1417,14 +1467,19 @@ multibucket_fold(_Config) -> true = 36000 == O1, true = 24000 == O2, - FoldBucketsFun = fun(B, Acc) -> [B|Acc] end, - {async, Folder} = + FoldBucketsFun = fun(B, Acc) -> [B | Acc] end, + {async, Folder} = leveled_bookie:book_bucketlist( - Bookie1, ?RIAK_TAG, {FoldBucketsFun, []}, all), + Bookie1, ?RIAK_TAG, {FoldBucketsFun, []}, all + ), BucketList = lists:reverse(Folder()), - ExpectedBucketList = - [{<<"Type1">>, <<"Bucket1">>}, {<<"Type2">>, <<"Bucket4">>}, - <<"Bucket2">>, <<"Bucket3">>], + ExpectedBucketList = + [ + {<<"Type1">>, <<"Bucket1">>}, + {<<"Type2">>, <<"Bucket4">>}, + <<"Bucket2">>, + <<"Bucket3">> + ], io:format("BucketList ~w", [BucketList]), true = ExpectedBucketList == BucketList, @@ -1445,15 +1500,17 @@ foldobjects_bybucket_range(_Config) -> RootPath = testutil:reset_filestructure(), {ok, Bookie1} = leveled_bookie:book_start( - RootPath, 2000, 50000000, testutil:sync_strategy()), + RootPath, 2000, 50000000, testutil:sync_strategy() + ), ObjectGen = testutil:get_compressiblevalue_andinteger(), IndexGen = fun() -> [] end, ObjL1 = testutil:generate_objects( - 1300, {fixed_binary, 1}, [], ObjectGen, IndexGen, <<"Bucket1">>), + 1300, {fixed_binary, 1}, [], ObjectGen, IndexGen, <<"Bucket1">> + ), testutil:riakload(Bookie1, ObjL1), - FoldKeysFun = fun(_B, K, _V, Acc) -> [ K |Acc] end, + FoldKeysFun = fun(_B, K, _V, Acc) -> [K | Acc] end, StartKey = testutil:fixed_bin_key(123), EndKey = testutil:fixed_bin_key(779), diff --git a/test/end_to_end/perf_SUITE.erl b/test/end_to_end/perf_SUITE.erl index 462c020f..30beb87f 100644 --- a/test/end_to_end/perf_SUITE.erl +++ b/test/end_to_end/perf_SUITE.erl @@ -15,24 +15,25 @@ get_random_givenname/0, get_random_surname/0, get_random_postcode/0 - ]). + ] +). -ifdef(test_filter_expression). - -define(TEST_FE, true). +-define(TEST_FE, true). -else. - -define(TEST_FE, false). +-define(TEST_FE, false). -endif. -ifndef(performance). - -define(performance, riak_ctperf). +-define(performance, riak_ctperf). -endif. all() -> [?performance]. -if(?performance == riak_profileperf andalso ?OTP_RELEASE >= 24). - % Requires map functions from OTP 24 - -define(ACCOUNTING, true). +% Requires map functions from OTP 24 +-define(ACCOUNTING, true). -else. - -define(ACCOUNTING, false). +-define(ACCOUNTING, false). -endif. -define(PEOPLE_INDEX, <<"people_bin">>). @@ -43,7 +44,7 @@ all() -> [?performance]. suite() -> [{timetrap, {hours, 16}}]. init_per_suite(Config) -> - testutil:init_per_suite([{suite, "perf"}|Config]), + testutil:init_per_suite([{suite, "perf"} | Config]), Config. end_per_suite(Config) -> @@ -70,15 +71,25 @@ riak_fullperf(ObjSize, PM, LC) -> R5B = riak_load_tester(Bucket, 5000000, ObjSize, [], PM, LC), output_result(R5B), R10 = riak_load_tester(Bucket, 8000000, ObjSize, [], PM, LC), - output_result(R10) - . + output_result(R10). riak_profileperf(_Config) -> riak_load_tester( {<<"SensibleBucketTypeName">>, <<"SensibleBucketName0">>}, 1200000, 2048, - [load, head, get, query, mini_query, regex_query, full, guess, estimate, update], + [ + load, + head, + get, + query, + mini_query, + regex_query, + full, + guess, + estimate, + update + ], zstd, as_store ). @@ -102,13 +113,13 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) -> RootPath = testutil:reset_filestructure("riakLoad"), StartOpts1 = - [{root_path, RootPath}, + [ + {root_path, RootPath}, {sync_strategy, testutil:sync_strategy()}, {log_level, warn}, {compression_method, PM}, {ledger_compression, LC}, - {forced_logs, - [b0015, b0016, b0017, b0018, p0032, sst12]} + {forced_logs, [b0015, b0016, b0017, b0018, p0032, sst12]} ], {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), @@ -119,11 +130,15 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) -> RandInt = rand:uniform(IndexCount - 1), IntIndex = ["integer", integer_to_list(ListID), "_int"], BinIndex = ["binary", integer_to_list(ListID), "_bin"], - [{add, iolist_to_binary(IntIndex), RandInt}, - {add, ?PEOPLE_INDEX, list_to_binary(random_people_index())}, - {add, iolist_to_binary(IntIndex), RandInt + 1}, - {add, iolist_to_binary(BinIndex), <>}, - {add, iolist_to_binary(BinIndex), <<(RandInt + 1):32/integer>>}] + [ + {add, iolist_to_binary(IntIndex), RandInt}, + {add, ?PEOPLE_INDEX, list_to_binary(random_people_index())}, + {add, iolist_to_binary(IntIndex), RandInt + 1}, + {add, iolist_to_binary(BinIndex), <>}, + {add, iolist_to_binary(BinIndex), << + (RandInt + 1):32/integer + >>} + ] end end, @@ -149,7 +164,8 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) -> "Load time per group ~w ~w ~w ~w ~w ~w ~w ~w ~w ~w ms", lists:map( fun(T) -> T div 1000 end, - [TC4, TC1, TC9, TC8, TC5, TC2, TC6, TC3, TC7, TC10]) + [TC4, TC1, TC9, TC8, TC5, TC2, TC6, TC3, TC7, TC10] + ) ), TotalLoadTime = (TC1 + TC2 + TC3 + TC4 + TC5 + TC6 + TC7 + TC8 + TC9 + TC10) div 1000, @@ -157,14 +173,14 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) -> HeadMemoryTracker = memory_tracking(head, 1000), HeadAccountant = accounting(head, 2000, ProfileList), - TotalHeadTime = + TotalHeadTime = random_fetches(head, Bookie1, Bucket, KeyCount, HeadFetches), ok = stop_accounting(HeadAccountant), {MT1, MP1, MB1} = stop_tracker(HeadMemoryTracker), GetMemoryTracker = memory_tracking(get, 1000), GetAccountant = accounting(get, 3000, ProfileList), - TotalGetTime = + TotalGetTime = random_fetches(riakget, Bookie1, Bucket, KeyCount div 2, GetFetches), ok = stop_accounting(GetAccountant), {MT2, MP2, MB2} = stop_tracker(GetMemoryTracker), @@ -179,7 +195,8 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) -> 10, IndexCount, QuerySize, - IndexesReturned), + IndexesReturned + ), ok = stop_accounting(QueryAccountant), {MT3a, MP3a, MB3a} = stop_tracker(QueryMemoryTracker), @@ -193,7 +210,8 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) -> 10, IndexCount, MiniQuerySize, - IndexesReturned div ?MINI_QUERY_DIVISOR), + IndexesReturned div ?MINI_QUERY_DIVISOR + ), ok = stop_accounting(MiniQueryAccountant), {MT3b, MP3b, MB3b} = stop_tracker(MiniQueryMemoryTracker), @@ -203,7 +221,8 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) -> random_people_queries( Bookie1, Bucket, - IndexesReturned div ?RGEX_QUERY_DIVISOR), + IndexesReturned div ?RGEX_QUERY_DIVISOR + ), ok = stop_accounting(RegexQueryAccountant), {MT3c, MP3c, MB3c} = stop_tracker(RegexQueryMemoryTracker), @@ -236,7 +255,7 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) -> {MT4b, MP4b, MB4b} = stop_tracker(EstimateMemoryTracker), SegFoldTime = (GuessTime + EstimateTime) div 1000, - + FullFoldMemoryTracker = memory_tracking(full, 1000), FullFoldAccountant = accounting(full, 2000, ProfileList), {FullFoldTime, FullFoldCount} = @@ -294,24 +313,23 @@ riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PM, LC) -> ProFun = profile_fun(P0, ProfileData), profile_test(Bookie1, ProFun, P) end, - ProfileList), + ProfileList + ), {_Inker, _Pcl, SSTPids, _PClerk, CDBPids, _IClerk} = get_pids(Bookie1), leveled_bookie:book_destroy(Bookie1), - - {KeyCount, ObjSize, {PM, LC}, - TotalLoadTime, - TotalHeadTime, TotalGetTime, + + {KeyCount, ObjSize, {PM, LC}, TotalLoadTime, TotalHeadTime, TotalGetTime, TotalQueryTime, TotalMiniQueryTime, RegexQueryTime, - FullFoldTime div 1000, SegFoldTime, - TotalUpdateTime, - DiskSpace, - {(MT0 + MT1 + MT2 + MT3a + MT3b + MT3c + MT4a + MT4b + MT5 + MT6) - div 9, - (MP0 + MP1 + MP2 + MP3a + MP3b + MP3c + MP4a + MP4b + MP5 + MP6) - div 9, - (MB0 + MB1 + MB2 + MB3a + MB3b + MB3c + MB4a + MB4b + MB5 + MB6) - div 9}, + FullFoldTime div 1000, SegFoldTime, TotalUpdateTime, DiskSpace, + { + (MT0 + MT1 + MT2 + MT3a + MT3b + MT3c + MT4a + MT4b + MT5 + MT6) div + 9, + (MP0 + MP1 + MP2 + MP3a + MP3b + MP3c + MP4a + MP4b + MP5 + MP6) div + 9, + (MB0 + MB1 + MB2 + MB3a + MB3b + MB3c + MB4a + MB4b + MB5 + MB6) div + 9 + }, SSTPids, CDBPids}. profile_test(Bookie, ProfileFun, P) -> @@ -332,15 +350,10 @@ get_pids(Bookie) -> {Inker, Pcl, SSTPids, PClerk, CDBPids, IClerk}. output_result( - {KeyCount, ObjSize, PressMethod, - TotalLoadTime, - TotalHeadTime, TotalGetTime, - TotalQueryTime, TotalMiniQueryTime, RegexQueryTime, - TotalFullFoldTime, TotalSegFoldTime, - TotalUpdateTime, - DiskSpace, - {TotalMemoryMB, ProcessMemoryMB, BinaryMemoryMB}, - SSTPids, CDBPids} + {KeyCount, ObjSize, PressMethod, TotalLoadTime, TotalHeadTime, TotalGetTime, + TotalQueryTime, TotalMiniQueryTime, RegexQueryTime, TotalFullFoldTime, + TotalSegFoldTime, TotalUpdateTime, DiskSpace, + {TotalMemoryMB, ProcessMemoryMB, BinaryMemoryMB}, SSTPids, CDBPids} ) -> %% TODO ct:pal not working? even with rebar3 ct --verbose? io:format( @@ -360,24 +373,37 @@ output_result( "Average Memory usage for test - Total ~p Proc ~p Bin ~p MB~n" "Closing count of SST Files - ~w~n" "Closing count of CDB Files - ~w~n", - [KeyCount, ObjSize, PressMethod, - TotalLoadTime, TotalHeadTime, TotalGetTime, - TotalQueryTime, TotalMiniQueryTime, RegexQueryTime, - TotalFullFoldTime, TotalSegFoldTime, + [ + KeyCount, + ObjSize, + PressMethod, + TotalLoadTime, + TotalHeadTime, + TotalGetTime, + TotalQueryTime, + TotalMiniQueryTime, + RegexQueryTime, + TotalFullFoldTime, + TotalSegFoldTime, TotalUpdateTime, DiskSpace, - TotalMemoryMB, ProcessMemoryMB, BinaryMemoryMB, - length(SSTPids), length(CDBPids)] + TotalMemoryMB, + ProcessMemoryMB, + BinaryMemoryMB, + length(SSTPids), + length(CDBPids) + ] ). memory_usage() -> MemoryUsage = erlang:memory(), - {element(2, lists:keyfind(total, 1, MemoryUsage)), + { + element(2, lists:keyfind(total, 1, MemoryUsage)), element(2, lists:keyfind(processes, 1, MemoryUsage)), - element(2, lists:keyfind(binary, 1, MemoryUsage))}. + element(2, lists:keyfind(binary, 1, MemoryUsage)) + }. profile_app(Pids, ProfiledFun, P) -> - MinTime = case P of P when P == query; P == mini_query -> @@ -398,25 +424,25 @@ profile_app(Pids, ProfiledFun, P) -> eprof:analyze(total, [{filter, [{time, MinTime}]}]), eprof:stop(), {ok, Analysis} = file:read_file(atom_to_list(P) ++ ".log"), - io:format(user, "~n~s~n", [Analysis]) - . + io:format(user, "~n~s~n", [Analysis]). rotate_chunk(Bookie, Bucket, KeyCount, ObjSize, IdxCount) -> ct:log( ?INFO, "Rotating an ObjList ~w - " "time includes object generation", - [KeyCount]), - {TC, ok} = + [KeyCount] + ), + {TC, ok} = timer:tc( fun() -> rotation_withnocheck( Bookie, Bucket, KeyCount, ObjSize, IdxCount ) - end), + end + ), TC div 1000. - rotation_with_prefetch(_Book, _B, 0, _Value, _IdxCnt) -> garbage_collect(), ok; @@ -433,8 +459,8 @@ rotation_with_prefetch(Book, B, Count, Value, IdxCnt) -> not_found -> []; {ok, Head} -> - {{SibMetaBin, _Vclock, _Hash, size}, _LMS} - = leveled_head:riak_extract_metadata(Head, size), + {{SibMetaBin, _Vclock, _Hash, size}, _LMS} = + leveled_head:riak_extract_metadata(Head, size), lists:map( fun({Fld, Trm}) -> {add, Fld, Trm} end, leveled_head:get_indexes_from_siblingmetabin( @@ -453,7 +479,6 @@ rotation_with_prefetch(Book, B, Count, Value, IdxCnt) -> end, rotation_with_prefetch(Book, B, Count - 1, Value, IdxCnt). - rotation_withnocheck(Book, B, NumberOfObjects, ObjSize, IdxCnt) -> rotation_with_prefetch( Book, @@ -494,8 +519,9 @@ rotation_withnocheck(Book, B, NumberOfObjects, ObjSize, IdxCnt) -> generate_chunk(CountPerList, ObjSize, IndexGenFun, Bucket, Chunk) -> testutil:generate_objects( - CountPerList, - {fixed_binary, (Chunk - 1) * CountPerList + 1}, [], + CountPerList, + {fixed_binary, (Chunk - 1) * CountPerList + 1}, + [], base64:encode(crypto:strong_rand_bytes(ObjSize)), IndexGenFun(Chunk), Bucket @@ -505,7 +531,8 @@ load_chunk(Bookie, CountPerList, ObjSize, IndexGenFun, Bucket, Chunk) -> ct:log(?INFO, "Generating and loading ObjList ~w", [Chunk]), time_load_chunk( Bookie, - {Bucket, base64:encode(crypto:strong_rand_bytes(ObjSize)), IndexGenFun(Chunk)}, + {Bucket, base64:encode(crypto:strong_rand_bytes(ObjSize)), + IndexGenFun(Chunk)}, (Chunk - 1) * CountPerList + 1, Chunk * CountPerList, 0, @@ -513,8 +540,10 @@ load_chunk(Bookie, CountPerList, ObjSize, IndexGenFun, Bucket, Chunk) -> ). time_load_chunk( - _Bookie, _ObjDetails, KeyNumber, TopKey, TotalTime, PC) - when KeyNumber > TopKey -> + _Bookie, _ObjDetails, KeyNumber, TopKey, TotalTime, PC +) when + KeyNumber > TopKey +-> garbage_collect(), timer:sleep(2000), ct:log( @@ -524,20 +553,26 @@ time_load_chunk( ), TotalTime; time_load_chunk( - Bookie, {Bucket, Value, IndexGen}, KeyNumber, TopKey, TotalTime, PC) -> + Bookie, {Bucket, Value, IndexGen}, KeyNumber, TopKey, TotalTime, PC +) -> ThisProcess = self(), spawn( fun() -> {RiakObj, IndexSpecs} = testutil:set_object( - Bucket, testutil:fixed_bin_key(KeyNumber), Value, IndexGen, []), + Bucket, + testutil:fixed_bin_key(KeyNumber), + Value, + IndexGen, + [] + ), {TC, R} = timer:tc( testutil, book_riakput, [Bookie, RiakObj, IndexSpecs] ), case R of ok -> - ThisProcess! {TC, 0}; + ThisProcess ! {TC, 0}; pause -> timer:sleep(?PUT_PAUSE), ThisProcess ! {TC + 40000, 1} @@ -547,7 +582,7 @@ time_load_chunk( receive {PutTime, Pause} -> time_load_chunk( - Bookie, + Bookie, {Bucket, Value, IndexGen}, KeyNumber + 1, TopKey, @@ -561,7 +596,7 @@ counter(Bookie, full) -> leveled_bookie:book_headfold( Bookie, ?RIAK_TAG, - {fun(_B, _K, _V, AccC) -> AccC + 1 end, 0}, + {fun(_B, _K, _V, AccC) -> AccC + 1 end, 0}, false, true, false @@ -574,7 +609,7 @@ counter(Bookie, guess) -> leveled_bookie:book_headfold( Bookie, ?RIAK_TAG, - {fun(_B, _K, _V, AccC) -> AccC + 1024 end, 0}, + {fun(_B, _K, _V, AccC) -> AccC + 1024 end, 0}, false, true, lists:seq(RandomSegment, RandomSegment + 31) @@ -587,22 +622,22 @@ counter(Bookie, estimate) -> leveled_bookie:book_headfold( Bookie, ?RIAK_TAG, - {fun(_B, _K, _V, AccC) -> AccC + 256 end, 0}, + {fun(_B, _K, _V, AccC) -> AccC + 256 end, 0}, false, true, lists:seq(RandomSegment, RandomSegment + 127) ), timer:tc(DataSizeEstimater). - random_fetches(FetchType, Bookie, Bucket, ObjCount, Fetches) -> Twenty = ObjCount div 5, - KeyFun = - fun(I) -> + KeyFun = + fun(I) -> case I rem 5 of 1 -> testutil:fixed_bin_key( - Twenty + rand:uniform(ObjCount - Twenty)); + Twenty + rand:uniform(ObjCount - Twenty) + ); _ -> testutil:fixed_bin_key(rand:uniform(Twenty)) end @@ -637,11 +672,11 @@ random_fetches(FetchType, Bookie, Bucket, ObjCount, Fetches) -> [FetchType, Fetches, TC div 1000] ), TC div 1000. - + random_queries(Bookie, Bucket, IDs, IdxCnt, MaxRange, IndexesReturned) -> QueryFun = fun() -> - ID = rand:uniform(IDs), + ID = rand:uniform(IDs), BinIndex = iolist_to_binary(["binary", integer_to_list(ID), "_bin"]), Twenty = IdxCnt div 5, @@ -655,17 +690,18 @@ random_queries(Bookie, Bucket, IDs, IdxCnt, MaxRange, IndexesReturned) -> R0 = rand:uniform(Twenty - RI), [R0, R0 + RI] end, - FoldKeysFun = fun(_B, _K, Cnt) -> Cnt + 1 end, + FoldKeysFun = fun(_B, _K, Cnt) -> Cnt + 1 end, {async, R} = leveled_bookie:book_indexfold( Bookie, - {Bucket, <<>>}, + {Bucket, <<>>}, {FoldKeysFun, 0}, {BinIndex, <>, <>}, - {true, undefined}), + {true, undefined} + ), R() end, - + {TC, {QC, EF}} = timer:tc(fun() -> run_queries(QueryFun, 0, 0, IndexesReturned) end), ct:log( @@ -684,7 +720,8 @@ random_people_queries(true, Bookie, Bucket, IndexesReturned) -> "AND (contains($gns, \"#Willow\") AND contains($pcs, \"#LS\"))", {ok, ParsedFilter} = leveled_filter:generate_filter_expression( - FilterExpression, maps:new()), + FilterExpression, maps:new() + ), FilterFun = fun(AttrMap) -> leveled_filter:apply_filter(ParsedFilter, AttrMap) end, EvalExpression = "delim($term, \"|\", ($surname, $dob, $dod, $gns, $pcs))", @@ -694,27 +731,24 @@ random_people_queries(true, Bookie, Bucket, IndexesReturned) -> fun(Term, Key) -> leveled_eval:apply_eval(ParsedEval, Term, Key, maps:new()) end, - + QueryFun = fun() -> Surname = get_random_surname(), Range = - {?PEOPLE_INDEX, - Surname, - <> - }, - FoldKeysFun = fun(_B, _K, Cnt) -> Cnt + 1 end, + {?PEOPLE_INDEX, Surname, <>}, + FoldKeysFun = fun(_B, _K, Cnt) -> Cnt + 1 end, {async, R} = leveled_bookie:book_indexfold( Bookie, - {Bucket, <<>>}, + {Bucket, <<>>}, {FoldKeysFun, 0}, Range, - {true, {eval, EvalFun, FilterFun} - }), + {true, {eval, EvalFun, FilterFun}} + ), R() end, - + {TC, {QC, EF}} = timer:tc(fun() -> run_queries(QueryFun, 0, 0, IndexesReturned) end), ct:log( @@ -728,27 +762,25 @@ random_people_queries(false, Bookie, Bucket, IndexesReturned) -> SeventiesWillowRegex = "[^\\|]*\\|197[0-9]{5}\\|[^\\|]*\\|" "[^\\|]*#Willow[^\\|]*\\|[^\\|]*#LS[^\\|]*", - %% born in the 70s with Willow as a given name + %% born in the 70s with Willow as a given name QueryFun = fun() -> Surname = get_random_surname(), Range = - {?PEOPLE_INDEX, - Surname, - <> - }, + {?PEOPLE_INDEX, Surname, <>}, {ok, TermRegex} = leveled_util:regex_compile(SeventiesWillowRegex), - FoldKeysFun = fun(_B, _K, Cnt) -> Cnt + 1 end, + FoldKeysFun = fun(_B, _K, Cnt) -> Cnt + 1 end, {async, R} = leveled_bookie:book_indexfold( Bookie, - {Bucket, <<>>}, + {Bucket, <<>>}, {FoldKeysFun, 0}, Range, - {true, TermRegex}), + {true, TermRegex} + ), R() end, - + {TC, {QC, EF}} = timer:tc(fun() -> run_queries(QueryFun, 0, 0, IndexesReturned) end), ct:log( @@ -759,68 +791,84 @@ random_people_queries(false, Bookie, Bucket, IndexesReturned) -> ), TC div 1000. - -run_queries(_QueryFun, QueryCount, EntriesFound, TargetEntries) - when EntriesFound >= TargetEntries -> +run_queries(_QueryFun, QueryCount, EntriesFound, TargetEntries) when + EntriesFound >= TargetEntries +-> {QueryCount, EntriesFound}; run_queries(QueryFun, QueryCount, EntriesFound, TargetEntries) -> Matches = QueryFun(), run_queries( - QueryFun, QueryCount + 1, EntriesFound + Matches, TargetEntries). + QueryFun, QueryCount + 1, EntriesFound + Matches, TargetEntries + ). profile_fun(false, _ProfileData) -> fun() -> ok end; profile_fun( - {mini_query, QuerySize}, - {Bookie, Bucket, _KeyCount, _ObjSize, IndexCount, IndexesReturned}) -> + {mini_query, QuerySize}, + {Bookie, Bucket, _KeyCount, _ObjSize, IndexCount, IndexesReturned} +) -> fun() -> random_queries( - Bookie, Bucket, 10, IndexCount, QuerySize, - (IndexesReturned * 2) div ?MINI_QUERY_DIVISOR) + Bookie, + Bucket, + 10, + IndexCount, + QuerySize, + (IndexesReturned * 2) div ?MINI_QUERY_DIVISOR + ) end; profile_fun( - {query, QuerySize}, - {Bookie, Bucket, _KeyCount, _ObjSize, IndexCount, IndexesReturned}) -> + {query, QuerySize}, + {Bookie, Bucket, _KeyCount, _ObjSize, IndexCount, IndexesReturned} +) -> fun() -> random_queries( - Bookie, Bucket, 10, IndexCount, QuerySize, IndexesReturned * 2) + Bookie, Bucket, 10, IndexCount, QuerySize, IndexesReturned * 2 + ) end; profile_fun( - regex_query, - {Bookie, Bucket, _KeyCount, _ObjSize, _IndexCount, IndexesReturned}) -> + regex_query, + {Bookie, Bucket, _KeyCount, _ObjSize, _IndexCount, IndexesReturned} +) -> fun() -> random_people_queries( - Bookie, Bucket, (IndexesReturned * 2) div ?RGEX_QUERY_DIVISOR) + Bookie, Bucket, (IndexesReturned * 2) div ?RGEX_QUERY_DIVISOR + ) end; profile_fun( - {head, HeadFetches}, - {Bookie, Bucket, KeyCount, _ObjSize, _IndexCount, _IndexesReturned}) -> + {head, HeadFetches}, + {Bookie, Bucket, KeyCount, _ObjSize, _IndexCount, _IndexesReturned} +) -> fun() -> random_fetches(head, Bookie, Bucket, KeyCount, HeadFetches) end; profile_fun( - {get, GetFetches}, - {Bookie, Bucket, KeyCount, _ObjSize, _IndexCount, _IndexesReturned}) -> + {get, GetFetches}, + {Bookie, Bucket, KeyCount, _ObjSize, _IndexCount, _IndexesReturned} +) -> fun() -> random_fetches(get, Bookie, Bucket, KeyCount, GetFetches) end; profile_fun( - {load, IndexGenFun}, - {Bookie, Bucket, KeyCount, ObjSize, _IndexCount, _IndexesReturned}) -> + {load, IndexGenFun}, + {Bookie, Bucket, KeyCount, ObjSize, _IndexCount, _IndexesReturned} +) -> ObjList11 = generate_chunk(KeyCount div 10, ObjSize, IndexGenFun, Bucket, 11), fun() -> testutil:riakload(Bookie, ObjList11) end; profile_fun( - update, - {Bookie, _Bucket, KeyCount, ObjSize, _IndexCount, _IndexesReturned}) -> + update, + {Bookie, _Bucket, KeyCount, ObjSize, _IndexCount, _IndexesReturned} +) -> fun() -> rotate_chunk(Bookie, <<"ProfileB">>, KeyCount div 100, ObjSize, 2) end; profile_fun( - CounterFold, - {Bookie, _Bucket, _KeyCount, _ObjSize, _IndexCount, _IndexesReturned}) -> + CounterFold, + {Bookie, _Bucket, _KeyCount, _ObjSize, _IndexCount, _IndexesReturned} +) -> Runs = case CounterFold of full -> @@ -842,47 +890,152 @@ profile_fun( random_people_index() -> io_lib:format( "~s|~s|~s|#~s#~s#~s|#~s#~s#~s", - [get_random_surname(), + [ + get_random_surname(), get_random_dob(), get_random_dod(), - get_random_givenname(), get_random_givenname(), get_random_givenname(), - get_random_postcode(), get_random_postcode(), get_random_postcode() + get_random_givenname(), + get_random_givenname(), + get_random_givenname(), + get_random_postcode(), + get_random_postcode(), + get_random_postcode() ] ). get_random_surname() -> lists:nth( rand:uniform(100), - [<<"Smith">>, <<"Jones">>, <<"Taylor">>, <<"Brown">>, <<"Williams">>, - <<"Wilson">>, <<"Johnson">>, <<"Davies">>, <<"Patel">>, <<"Robinson">>, - <<"Wright">>, <<"Thompson">>, <<"Evans">>, <<"Walker">>, <<"White">>, - <<"Roberts">>, <<"Green">>, <<"Hall">>, <<"Thomas">>, <<"Clarke">>, - <<"Jackson">>, <<"Wood">>, <<"Harris">>, <<"Edwards">>, <<"Turner">>, - <<"Martin">>, <<"Cooper">>, <<"Hill">>, <<"Ward">>, <<"Hughes">>, - <<"Moore">>, <<"Clark">>, <<"King">>, <<"Harrison">>, <<"Lewis">>, - <<"Baker">>, <<"Lee">>, <<"Allen">>, <<"Morris">>, <<"Khan">>, - <<"Scott">>, <<"Watson">>, <<"Davis">>, <<"Parker">>, <<"James">>, - <<"Bennett">>, <<"Young">>, <<"Phillips">>, <<"Richardson">>, <<"Mitchell">>, - <<"Bailey">>, <<"Carter">>, <<"Cook">>, <<"Singh">>, <<"Shaw">>, - <<"Bell">>, <<"Collins">>, <<"Morgan">>, <<"Kelly">>, <<"Begum">>, - <<"Miller">>, <<"Cox">>, <<"Hussain">>, <<"Marshall">>, <<"Simpson">>, - <<"Price">>, <<"Anderson">>, <<"Adams">>, <<"Wilkinson">>, <<"Ali">>, - <<"Ahmed">>, <<"Foster">>, <<"Ellis">>, <<"Murphy">>, <<"Chapman">>, - <<"Mason">>, <<"Gray">>, <<"Richards">>, <<"Webb">>, <<"Griffiths">>, - <<"Hunt">>, <<"Palmer">>, <<"Campbell">>, <<"Holmes">>, <<"Mills">>, - <<"Rogers">>, <<"Barnes">>, <<"Knight">>, <<"Matthews">>, <<"Barker">>, - <<"Powell">>, <<"Stevens">>, <<"Kaur">>, <<"Fisher">>, <<"Butler">>, - <<"Dixon">>, <<"Russell">>, <<"Harvey">>, <<"Pearson">>, <<"Graham">>] + [ + <<"Smith">>, + <<"Jones">>, + <<"Taylor">>, + <<"Brown">>, + <<"Williams">>, + <<"Wilson">>, + <<"Johnson">>, + <<"Davies">>, + <<"Patel">>, + <<"Robinson">>, + <<"Wright">>, + <<"Thompson">>, + <<"Evans">>, + <<"Walker">>, + <<"White">>, + <<"Roberts">>, + <<"Green">>, + <<"Hall">>, + <<"Thomas">>, + <<"Clarke">>, + <<"Jackson">>, + <<"Wood">>, + <<"Harris">>, + <<"Edwards">>, + <<"Turner">>, + <<"Martin">>, + <<"Cooper">>, + <<"Hill">>, + <<"Ward">>, + <<"Hughes">>, + <<"Moore">>, + <<"Clark">>, + <<"King">>, + <<"Harrison">>, + <<"Lewis">>, + <<"Baker">>, + <<"Lee">>, + <<"Allen">>, + <<"Morris">>, + <<"Khan">>, + <<"Scott">>, + <<"Watson">>, + <<"Davis">>, + <<"Parker">>, + <<"James">>, + <<"Bennett">>, + <<"Young">>, + <<"Phillips">>, + <<"Richardson">>, + <<"Mitchell">>, + <<"Bailey">>, + <<"Carter">>, + <<"Cook">>, + <<"Singh">>, + <<"Shaw">>, + <<"Bell">>, + <<"Collins">>, + <<"Morgan">>, + <<"Kelly">>, + <<"Begum">>, + <<"Miller">>, + <<"Cox">>, + <<"Hussain">>, + <<"Marshall">>, + <<"Simpson">>, + <<"Price">>, + <<"Anderson">>, + <<"Adams">>, + <<"Wilkinson">>, + <<"Ali">>, + <<"Ahmed">>, + <<"Foster">>, + <<"Ellis">>, + <<"Murphy">>, + <<"Chapman">>, + <<"Mason">>, + <<"Gray">>, + <<"Richards">>, + <<"Webb">>, + <<"Griffiths">>, + <<"Hunt">>, + <<"Palmer">>, + <<"Campbell">>, + <<"Holmes">>, + <<"Mills">>, + <<"Rogers">>, + <<"Barnes">>, + <<"Knight">>, + <<"Matthews">>, + <<"Barker">>, + <<"Powell">>, + <<"Stevens">>, + <<"Kaur">>, + <<"Fisher">>, + <<"Butler">>, + <<"Dixon">>, + <<"Russell">>, + <<"Harvey">>, + <<"Pearson">>, + <<"Graham">> + ] ). get_random_givenname() -> lists:nth( rand:uniform(20), - [<<"Noah">>, <<"Oliver">>, <<"George">>, <<"Arthur">>, <<"Muhammad">>, - <<"Leo">>, <<"Harry">>, <<"Oscar">> , <<"Archie">>, <<"Henry">>, - <<"Olivia">>, <<"Amelia">>, <<"Isla">>, <<"Ava">>, <<"Ivy">>, - <<"Freya">>, <<"Lily">>, <<"Florence">>, <<"Mia">>, <<"Willow">> - ]). + [ + <<"Noah">>, + <<"Oliver">>, + <<"George">>, + <<"Arthur">>, + <<"Muhammad">>, + <<"Leo">>, + <<"Harry">>, + <<"Oscar">>, + <<"Archie">>, + <<"Henry">>, + <<"Olivia">>, + <<"Amelia">>, + <<"Isla">>, + <<"Ava">>, + <<"Ivy">>, + <<"Freya">>, + <<"Lily">>, + <<"Florence">>, + <<"Mia">>, + <<"Willow">> + ] + ). get_random_dob() -> io_lib:format( @@ -901,7 +1054,6 @@ get_random_postcode() -> "LS~w ~wXX", [rand:uniform(26), rand:uniform(9)] ). - memory_tracking(Phase, Timeout) -> spawn( fun() -> @@ -921,10 +1073,10 @@ memory_tracking(Phase, Timeout, {TAcc, PAcc, BAcc}, Loops) -> after Timeout -> {T, P, B} = memory_usage(), memory_tracking( - Phase, Timeout, {TAcc + T, PAcc + P, BAcc + B}, Loops + 1) + Phase, Timeout, {TAcc + T, PAcc + P, BAcc + B}, Loops + 1 + ) end. - -if(?performance == riak_ctperf). print_memory_stats(_Phase, _TAvg, _PAvg, _BAvg) -> ok. @@ -938,17 +1090,25 @@ print_memory_stats(Phase, TAvg, PAvg, BAvg) -> -endif. dummy_accountant() -> - spawn(fun() -> receive {stop, Caller} -> Caller ! ok end end). - + spawn(fun() -> + receive + {stop, Caller} -> Caller ! ok + end + end). + stop_accounting(Accountant) -> Accountant ! {stop, self()}, - receive ok -> ok end. + receive + ok -> ok + end. stop_tracker(Tracker) -> garbage_collect(), - % Garbage collect the test process, before getting the memory stats + % Garbage collect the test process, before getting the memory stats Tracker ! {stop, self()}, - receive MemStats -> MemStats end. + receive + MemStats -> MemStats + end. -if(?ACCOUNTING). @@ -1019,19 +1179,24 @@ accounting(Phase, Timeout, Counters, Loops) -> scheduler_output(Scheduler, CounterMap) -> Total = maps:get(emulator, CounterMap) + - maps:get(aux, CounterMap) + - maps:get(check_io, CounterMap) + - maps:get(gc, CounterMap) + - maps:get(other, CounterMap), + maps:get(aux, CounterMap) + + maps:get(check_io, CounterMap) + + maps:get(gc, CounterMap) + + maps:get(other, CounterMap), GC = maps:get(gc, CounterMap), - GCperc = case Total > 0 of true -> GC/Total; false -> 0.0 end, + GCperc = + case Total > 0 of + true -> GC / Total; + false -> 0.0 + end, io:format( user, "~nFor ~w:~n" "emulator=~w, aux=~w, check_io=~w, gc=~w, other=~w~n" "total ~w~n" "percentage_gc ~.2f %~n", - [Scheduler, + [ + Scheduler, maps:get(emulator, CounterMap), maps:get(aux, CounterMap), maps:get(check_io, CounterMap), @@ -1047,4 +1212,4 @@ scheduler_output(Scheduler, CounterMap) -> accounting(_Phase, _Timeout, _ProfileList) -> dummy_accountant(). --endif. \ No newline at end of file +-endif. diff --git a/test/end_to_end/recovery_SUITE.erl b/test/end_to_end/recovery_SUITE.erl index 93e8cbd4..e97aa5c5 100644 --- a/test/end_to_end/recovery_SUITE.erl +++ b/test/end_to_end/recovery_SUITE.erl @@ -4,43 +4,44 @@ -export([all/0, init_per_suite/1, end_per_suite/1]). -export([ - recovery_with_samekeyupdates/1, - same_key_rotation_withindexes/1, - hot_backup_changes/1, - retain_strategy/1, - recalc_strategy/1, - recalc_transition_strategy/1, - recovr_strategy/1, - stdtag_recalc/1, - aae_missingjournal/1, - aae_bustedjournal/1, - journal_compaction_bustedjournal/1, - close_duringcompaction/1, - recompact_keydeltas/1, - simple_cachescoring/1, - replace_everything/1 - ]). - -all() -> [ - recovery_with_samekeyupdates, - same_key_rotation_withindexes, - hot_backup_changes, - retain_strategy, - recalc_strategy, - recalc_transition_strategy, - recovr_strategy, - aae_missingjournal, - aae_bustedjournal, - journal_compaction_bustedjournal, - close_duringcompaction, - recompact_keydeltas, - stdtag_recalc, - simple_cachescoring, - replace_everything - ]. + recovery_with_samekeyupdates/1, + same_key_rotation_withindexes/1, + hot_backup_changes/1, + retain_strategy/1, + recalc_strategy/1, + recalc_transition_strategy/1, + recovr_strategy/1, + stdtag_recalc/1, + aae_missingjournal/1, + aae_bustedjournal/1, + journal_compaction_bustedjournal/1, + close_duringcompaction/1, + recompact_keydeltas/1, + simple_cachescoring/1, + replace_everything/1 +]). + +all() -> + [ + recovery_with_samekeyupdates, + same_key_rotation_withindexes, + hot_backup_changes, + retain_strategy, + recalc_strategy, + recalc_transition_strategy, + recovr_strategy, + aae_missingjournal, + aae_bustedjournal, + journal_compaction_bustedjournal, + close_duringcompaction, + recompact_keydeltas, + stdtag_recalc, + simple_cachescoring, + replace_everything + ]. init_per_suite(Config) -> - testutil:init_per_suite([{suite, "recovery"}|Config]), + testutil:init_per_suite([{suite, "recovery"} | Config]), Config. end_per_suite(Config) -> @@ -56,13 +57,15 @@ replace_everything(_Config) -> CompPath = filename:join(RootPath, "journal/journal_files/post_compact"), SmallJournalCount = 7000, StdJournalCount = 20000, - BookOpts = + BookOpts = fun(JournalObjectCount) -> - [{root_path, RootPath}, - {cache_size, 2000}, - {max_journalobjectcount, JournalObjectCount}, - {sync_strategy, testutil:sync_strategy()}, - {reload_strategy, [{?RIAK_TAG, recalc}]}] + [ + {root_path, RootPath}, + {cache_size, 2000}, + {max_journalobjectcount, JournalObjectCount}, + {sync_strategy, testutil:sync_strategy()}, + {reload_strategy, [{?RIAK_TAG, recalc}]} + ] end, {ok, Book1} = leveled_bookie:book_start(BookOpts(StdJournalCount)), BKT = <<"ReplaceAll">>, @@ -72,33 +75,33 @@ replace_everything(_Config) -> {KSpcL1, V1} = testutil:put_indexed_objects(Book1, BKT, 50000), ok = testutil:check_indexed_objects(Book1, BKT, KSpcL1, V1), - {KSpcL2, V2} = + {KSpcL2, V2} = testutil:put_altered_indexed_objects(Book1, BKT, KSpcL1), ok = testutil:check_indexed_objects(Book1, BKT, KSpcL2, V2), - {ok, FileList0} = file:list_dir(JournalPath), + {ok, FileList0} = file:list_dir(JournalPath), io:format( - "Number of journal files before compaction ~w~n", + "Number of journal files before compaction ~w~n", [length(FileList0)] ), - {ok, FileList1} = file:list_dir(CompPath), + {ok, FileList1} = file:list_dir(CompPath), FileList2 = check_compaction(Book1, CompPath), true = FileList1 =< FileList2, - %% There will normally be 5 journal files after 50K write then alter - %% That may be two files with entirely altered objects - which will be - %% compacted, and will be compacted to nothing. - %% The "middle" file - which will be 50% compactable may be scored to - %% be part of the first run, or may end up in the second run. If in - %% the first run, the second run will not compact and FL1 == FL2. - %% Otherwise FL1 could be 0 and FL2 1. Hard to control this as there - %% is randomisation in both the scoring and the journal size (due to - %% jittering of parameters). + %% There will normally be 5 journal files after 50K write then alter + %% That may be two files with entirely altered objects - which will be + %% compacted, and will be compacted to nothing. + %% The "middle" file - which will be 50% compactable may be scored to + %% be part of the first run, or may end up in the second run. If in + %% the first run, the second run will not compact and FL1 == FL2. + %% Otherwise FL1 could be 0 and FL2 1. Hard to control this as there + %% is randomisation in both the scoring and the journal size (due to + %% jittering of parameters). compact_and_wait(Book1, 1000), - {ok, FileList3a} = file:list_dir(CompPath), + {ok, FileList3a} = file:list_dir(CompPath), io:format("Number of files after compaction ~w~n", [length(FileList3a)]), compact_and_wait(Book1, 1000), - {ok, FileList3b} = file:list_dir(CompPath), + {ok, FileList3b} = file:list_dir(CompPath), io:format("Number of files after compaction ~w~n", [length(FileList3b)]), - %% By the fourth compaction there should be no further changes + %% By the fourth compaction there should be no further changes true = FileList3a == FileList3b, true = 0 < FileList3b, {async, BackupFun} = leveled_bookie:book_hotbackup(Book1), @@ -107,30 +110,34 @@ replace_everything(_Config) -> io:format("Restarting without key store~n"), ok = leveled_bookie:book_close(Book1), - BookOptsBackup = [{root_path, BackupPath}, - {cache_size, 2000}, - {sync_strategy, testutil:sync_strategy()}], + BookOptsBackup = [ + {root_path, BackupPath}, + {cache_size, 2000}, + {sync_strategy, testutil:sync_strategy()} + ], SW1 = os:timestamp(), {ok, Book2} = leveled_bookie:book_start(BookOptsBackup), - + io:format( "Opened backup with no ledger in ~w ms~n", - [timer:now_diff(os:timestamp(), SW1) div 1000]), + [timer:now_diff(os:timestamp(), SW1) div 1000] + ), ok = testutil:check_indexed_objects(Book2, BKT, KSpcL2, V2), ok = leveled_bookie:book_close(Book2), - + SW2 = os:timestamp(), {ok, Book3} = leveled_bookie:book_start(BookOptsBackup), io:format( "Opened backup with ledger in ~w ms~n", - [timer:now_diff(os:timestamp(), SW2) div 1000]), + [timer:now_diff(os:timestamp(), SW2) div 1000] + ), ok = testutil:check_indexed_objects(Book3, BKT, KSpcL2, V2), ok = leveled_bookie:book_destroy(Book3), {ok, Book4} = leveled_bookie:book_start(BookOpts(StdJournalCount)), {KSpcL3, V3} = testutil:put_indexed_objects(Book4, BKT1, 1000), {KSpcL4, _V4} = testutil:put_indexed_objects(Book4, BKT2, 50000), - {KSpcL5, V5} = + {KSpcL5, V5} = testutil:put_altered_indexed_objects(Book4, BKT2, KSpcL4), compact_and_wait(Book4), {async, BackupFun4} = leveled_bookie:book_hotbackup(Book4), @@ -139,10 +146,11 @@ replace_everything(_Config) -> io:format("Restarting without key store~n"), SW5 = os:timestamp(), - {ok, Book5} = leveled_bookie:book_start(BookOptsBackup), + {ok, Book5} = leveled_bookie:book_start(BookOptsBackup), io:format( "Opened backup with no ledger in ~w ms~n", - [timer:now_diff(os:timestamp(), SW5) div 1000]), + [timer:now_diff(os:timestamp(), SW5) div 1000] + ), ok = testutil:check_indexed_objects(Book5, BKT, KSpcL2, V2), ok = testutil:check_indexed_objects(Book5, BKT1, KSpcL3, V3), ok = testutil:check_indexed_objects(Book5, BKT2, KSpcL5, V5), @@ -151,16 +159,17 @@ replace_everything(_Config) -> io:format("Testing with sparse distribution after update~n"), io:format( "Also use smaller Journal files and confirm value used " - "in compaction~n"), + "in compaction~n" + ), {ok, Book6} = leveled_bookie:book_start(BookOpts(SmallJournalCount)), {KSpcL6, _V6} = testutil:put_indexed_objects(Book6, BKT3, 60000), {OSpcL6, RSpcL6} = lists:split(200, lists:ukeysort(1, KSpcL6)), - {KSpcL7, V7} = + {KSpcL7, V7} = testutil:put_altered_indexed_objects(Book6, BKT3, RSpcL6), - {ok, FileList4} = file:list_dir(CompPath), + {ok, FileList4} = file:list_dir(CompPath), compact_and_wait(Book6), - {ok, FileList5} = file:list_dir(CompPath), - {OSpcL6A, V7} = + {ok, FileList5} = file:list_dir(CompPath), + {OSpcL6A, V7} = testutil:put_altered_indexed_objects(Book6, BKT3, OSpcL6, true, V7), {async, BackupFun6} = leveled_bookie:book_hotbackup(Book6), ok = BackupFun6(BackupPath), @@ -183,27 +192,29 @@ replace_everything(_Config) -> io:format("Restarting without key store~n"), SW7 = os:timestamp(), - {ok, Book7} = leveled_bookie:book_start(BookOptsBackup), + {ok, Book7} = leveled_bookie:book_start(BookOptsBackup), io:format( "Opened backup with no ledger in ~w ms~n", - [timer:now_diff(os:timestamp(), SW7) div 1000]), + [timer:now_diff(os:timestamp(), SW7) div 1000] + ), ok = testutil:check_indexed_objects(Book7, BKT3, KSpcL7 ++ OSpcL6A, V7), ok = leveled_bookie:book_destroy(Book7), testutil:reset_filestructure(BackupPath), testutil:reset_filestructure(). - close_duringcompaction(_Config) -> - % Prompt a compaction, and close immedately - confirm that the close + % Prompt a compaction, and close immedately - confirm that the close % happens without error. % This should trigger the iclerk to receive a close during the file % scoring stage RootPath = testutil:reset_filestructure(), - BookOpts = [{root_path, RootPath}, - {cache_size, 2000}, - {max_journalsize, 2000000}, - {sync_strategy, testutil:sync_strategy()}], + BookOpts = [ + {root_path, RootPath}, + {cache_size, 2000}, + {max_journalsize, 2000000}, + {sync_strategy, testutil:sync_strategy()} + ], {ok, Spcl1, LastV1} = rotating_object_check(BookOpts, <<"Bucket1">>, 6400), {ok, Book1} = leveled_bookie:book_start(BookOpts), ok = leveled_bookie:book_compactjournal(Book1, 30000), @@ -217,15 +228,20 @@ recovery_with_samekeyupdates(_Config) -> % run a test that involves many updates to the same key, and check that % this doesn't cause performance to flatline in either the normal "PUT" % case, or in the case of the recovery from a lost keystore - AcceptableDuration = 180, % 3 minutes - E2E_SW = os:timestamp(), % Used to track time for overall job - + + % 3 minutes + AcceptableDuration = 180, + % Used to track time for overall job + E2E_SW = os:timestamp(), + RootPath = testutil:reset_filestructure(), BackupPath = testutil:reset_filestructure("backupSKU"), - BookOpts = [{root_path, RootPath}, - {cache_size, 2000}, - {max_journalsize, 20000000}, - {sync_strategy, testutil:sync_strategy()}], + BookOpts = [ + {root_path, RootPath}, + {cache_size, 2000}, + {max_journalsize, 20000000}, + {sync_strategy, testutil:sync_strategy()} + ], {ok, Book1} = leveled_bookie:book_start(BookOpts), % Load in 5K different keys @@ -234,32 +250,38 @@ recovery_with_samekeyupdates(_Config) -> io:format("Commence object generation and load~n"), ObjectGen = testutil:get_compressiblevalue_andinteger(), IndexGen = fun() -> [] end, - ObjL1 = testutil:generate_objects(5000, - {fixed_binary, 1}, - [], - ObjectGen, - IndexGen, - <<"Bucket1">>), + ObjL1 = testutil:generate_objects( + 5000, + {fixed_binary, 1}, + [], + ObjectGen, + IndexGen, + <<"Bucket1">> + ), testutil:riakload(Book1, ObjL1), RepeatedLoadFun = fun(_I, _Acc) -> ObjRL = - testutil:generate_objects(5, - {fixed_binary, 5001}, - [], - ObjectGen, - IndexGen, - <<"Bucket1">>), + testutil:generate_objects( + 5, + {fixed_binary, 5001}, + [], + ObjectGen, + IndexGen, + <<"Bucket1">> + ), testutil:riakload(Book1, ObjRL), ObjRL end, FinalObjRL = lists:foldl(RepeatedLoadFun, [], lists:seq(1, 5000)), - ObjL2 = testutil:generate_objects(5000, - {fixed_binary, 6001}, - [], - ObjectGen, - IndexGen, - <<"Bucket1">>), + ObjL2 = testutil:generate_objects( + 5000, + {fixed_binary, 6001}, + [], + ObjectGen, + IndexGen, + <<"Bucket1">> + ), testutil:riakload(Book1, ObjL2), % Fetch all of ObjL1 @@ -269,7 +291,7 @@ recovery_with_samekeyupdates(_Config) -> ok = testutil:checkhead_forlist(Book1, ObjL2), io:format("Check for presence of repeated objects~n"), % Fetch repeated objects 200 times each - CheckFun1 = + CheckFun1 = fun(_I) -> ok = testutil:checkhead_forlist(Book1, FinalObjRL) end, lists:foreach(CheckFun1, lists:seq(1, 200)), io:format("Checks complete~n"), @@ -281,10 +303,12 @@ recovery_with_samekeyupdates(_Config) -> io:format("Restarting without key store~n"), ok = leveled_bookie:book_close(Book1), - BookOptsBackup = [{root_path, BackupPath}, - {cache_size, 2000}, - {max_journalsize, 20000000}, - {sync_strategy, testutil:sync_strategy()}], + BookOptsBackup = [ + {root_path, BackupPath}, + {cache_size, 2000}, + {max_journalsize, 20000000}, + {sync_strategy, testutil:sync_strategy()} + ], {ok, Book2} = leveled_bookie:book_start(BookOptsBackup), % Fetch all of ObjL1 @@ -294,12 +318,12 @@ recovery_with_samekeyupdates(_Config) -> ok = testutil:checkhead_forlist(Book2, ObjL2), io:format("Check for presence of repeated objects~n"), % Fetch repeated objects 200 times each - CheckFun2 = + CheckFun2 = fun(_I) -> ok = testutil:checkhead_forlist(Book2, FinalObjRL) end, lists:foreach(CheckFun2, lists:seq(1, 200)), io:format("Checks complete from backup~n"), - - DurationOfTest = timer:now_diff(os:timestamp(), E2E_SW)/(1000 * 1000), + + DurationOfTest = timer:now_diff(os:timestamp(), E2E_SW) / (1000 * 1000), io:format("Duration of test was ~w s~n", [DurationOfTest]), true = DurationOfTest < AcceptableDuration, @@ -309,7 +333,7 @@ recovery_with_samekeyupdates(_Config) -> check_compaction(Book, CompPath) -> compact_and_wait(Book, 1000), - {ok, FileList} = file:list_dir(CompPath), + {ok, FileList} = file:list_dir(CompPath), io:format("Number of files after compaction ~w~n", [length(FileList)]), case FileList > 0 of true -> @@ -323,11 +347,13 @@ same_key_rotation_withindexes(_Config) -> % recalc the indexes correctly, even when the key exists multiple times % in the loader's mock ledger cache RootPath = testutil:reset_filestructure(), - BookOpts = [{root_path, RootPath}, - {cache_size, 2000}, - {max_journalsize, 20000000}, - {reload_strategy, [{?RIAK_TAG, recalc}]}, - {sync_strategy, testutil:sync_strategy()}], + BookOpts = [ + {root_path, RootPath}, + {cache_size, 2000}, + {max_journalsize, 20000000}, + {reload_strategy, [{?RIAK_TAG, recalc}]}, + {sync_strategy, testutil:sync_strategy()} + ], {ok, Book1} = leveled_bookie:book_start(BookOpts), IndexGenFun = fun(ID) -> @@ -335,7 +361,7 @@ same_key_rotation_withindexes(_Config) -> [{add, list_to_binary("binary_bin"), <>}] end end, - + Bucket = <<"TestBucket">>, ObjectGenFun = @@ -343,27 +369,34 @@ same_key_rotation_withindexes(_Config) -> Key = list_to_binary("Key" ++ integer_to_list(KeyID)), Value = <>, GenRemoveFun = IndexGenFun(IndexID - 1), - testutil:set_object(Bucket, - Key, - Value, - IndexGenFun(IndexID), - GenRemoveFun()) + testutil:set_object( + Bucket, + Key, + Value, + IndexGenFun(IndexID), + GenRemoveFun() + ) end, - + IdxCnt = 8, KeyCnt = 50, Sequence = - lists:map(fun(K) -> lists:map(fun(I) -> {K, I} end, lists:seq(1, IdxCnt)) end, - lists:seq(1, KeyCnt)), + lists:map( + fun(K) -> lists:map(fun(I) -> {K, I} end, lists:seq(1, IdxCnt)) end, + lists:seq(1, KeyCnt) + ), ObjList = - lists:map(fun({K, I}) -> ObjectGenFun(K, I) end, lists:flatten(Sequence)), + lists:map( + fun({K, I}) -> ObjectGenFun(K, I) end, lists:flatten(Sequence) + ), lists:foreach( - fun({Obj, SpcL}) -> testutil:book_riakput(Book1, Obj, SpcL) end, - ObjList), + fun({Obj, SpcL}) -> testutil:book_riakput(Book1, Obj, SpcL) end, + ObjList + ), - FoldKeysFun = fun(_B, K, Acc) -> [K|Acc] end, + FoldKeysFun = fun(_B, K, Acc) -> [K | Acc] end, CheckFun = fun(Bookie) -> {async, R} = @@ -371,15 +404,20 @@ same_key_rotation_withindexes(_Config) -> Bookie, {Bucket, <<>>}, {FoldKeysFun, []}, - {list_to_binary("binary_bin"), - <<0:32/integer>>, - <<255:32/integer>>}, - {true, undefined}), + {list_to_binary("binary_bin"), <<0:32/integer>>, << + 255:32/integer + >>}, + {true, undefined} + ), QR = R(), BadAnswers = - lists:filter(fun({I, _K}) -> I =/= <> end, QR), - io:format("Results ~w BadAnswers ~w~n", - [length(QR), length(BadAnswers)]), + lists:filter( + fun({I, _K}) -> I =/= <> end, QR + ), + io:format( + "Results ~w BadAnswers ~w~n", + [length(QR), length(BadAnswers)] + ), true = length(QR) == KeyCnt, true = [] == BadAnswers end, @@ -393,29 +431,30 @@ same_key_rotation_withindexes(_Config) -> testutil:reset_filestructure(). - hot_backup_changes(_Config) -> RootPath = testutil:reset_filestructure(), BackupPath = testutil:reset_filestructure("backup0"), - BookOpts = [{root_path, RootPath}, - {cache_size, 1000}, - {max_journalsize, 10000000}, - {sync_strategy, testutil:sync_strategy()}], - B = <<"Bucket0">>, + BookOpts = [ + {root_path, RootPath}, + {cache_size, 1000}, + {max_journalsize, 10000000}, + {sync_strategy, testutil:sync_strategy()} + ], + B = <<"Bucket0">>, {ok, Book1} = leveled_bookie:book_start(BookOpts), {KSpcL1, _V1} = testutil:put_indexed_objects(Book1, B, 20000), - + {async, BackupFun1} = leveled_bookie:book_hotbackup(Book1), ok = BackupFun1(BackupPath), - {ok, FileList1} = + {ok, FileList1} = file:list_dir(filename:join(BackupPath, "journal/journal_files/")), - + {KSpcL2, V2} = testutil:put_altered_indexed_objects(Book1, B, KSpcL1), {async, BackupFun2} = leveled_bookie:book_hotbackup(Book1), ok = BackupFun2(BackupPath), - {ok, FileList2} = + {ok, FileList2} = file:list_dir(filename:join(BackupPath, "journal/journal_files/")), ok = testutil:check_indexed_objects(Book1, B, KSpcL2, V2), @@ -423,12 +462,12 @@ hot_backup_changes(_Config) -> {async, BackupFun3} = leveled_bookie:book_hotbackup(Book1), ok = BackupFun3(BackupPath), - {ok, FileList3} = + {ok, FileList3} = file:list_dir(filename:join(BackupPath, "journal/journal_files/")), % Confirm null impact of backing up twice in a row {async, BackupFun4} = leveled_bookie:book_hotbackup(Book1), ok = BackupFun4(BackupPath), - {ok, FileList4} = + {ok, FileList4} = file:list_dir(filename:join(BackupPath, "journal/journal_files/")), true = length(FileList2) > length(FileList1), @@ -438,10 +477,12 @@ hot_backup_changes(_Config) -> ok = leveled_bookie:book_close(Book1), RootPath = testutil:reset_filestructure(), - BookOptsBackup = [{root_path, BackupPath}, - {cache_size, 2000}, - {max_journalsize, 20000000}, - {sync_strategy, testutil:sync_strategy()}], + BookOptsBackup = [ + {root_path, BackupPath}, + {cache_size, 2000}, + {max_journalsize, 20000000}, + {sync_strategy, testutil:sync_strategy()} + ], {ok, BookBackup} = leveled_bookie:book_start(BookOptsBackup), ok = testutil:check_indexed_objects(BookBackup, B, KSpcL2, V2), @@ -451,7 +492,6 @@ hot_backup_changes(_Config) -> testutil:reset_filestructure("backup0"), testutil:reset_filestructure(). - retain_strategy(_Config) -> rotate_wipe_compact(retain, retain). @@ -461,20 +501,23 @@ recalc_strategy(_Config) -> recalc_transition_strategy(_Config) -> rotate_wipe_compact(retain, recalc). - rotate_wipe_compact(Strategy1, Strategy2) -> RootPath = testutil:reset_filestructure(), - BookOpts = [{root_path, RootPath}, - {cache_size, 1000}, - {max_journalobjectcount, 5000}, - {sync_strategy, testutil:sync_strategy()}, - {reload_strategy, [{?RIAK_TAG, Strategy1}]}], - BookOptsAlt = [{root_path, RootPath}, - {cache_size, 1000}, - {max_journalobjectcount, 2000}, - {sync_strategy, testutil:sync_strategy()}, - {reload_strategy, [{?RIAK_TAG, Strategy2}]}, - {max_run_length, 8}], + BookOpts = [ + {root_path, RootPath}, + {cache_size, 1000}, + {max_journalobjectcount, 5000}, + {sync_strategy, testutil:sync_strategy()}, + {reload_strategy, [{?RIAK_TAG, Strategy1}]} + ], + BookOptsAlt = [ + {root_path, RootPath}, + {cache_size, 1000}, + {max_journalobjectcount, 2000}, + {sync_strategy, testutil:sync_strategy()}, + {reload_strategy, [{?RIAK_TAG, Strategy2}]}, + {max_run_length, 8} + ], {ok, Spcl3, LastV3} = rotating_object_check(BookOpts, <<"Bucket3">>, 400), ok = restart_from_blankledger(BookOpts, [{<<"Bucket3">>, Spcl3, LastV3}]), @@ -482,13 +525,13 @@ rotate_wipe_compact(Strategy1, Strategy2) -> rotating_object_check(BookOpts, <<"Bucket4">>, 800), ok = restart_from_blankledger( - BookOpts, + BookOpts, [{<<"Bucket3">>, Spcl3, LastV3}, {<<"Bucket4">>, Spcl4, LastV4}] ), {ok, Spcl5, LastV5} = rotating_object_check(BookOpts, <<"Bucket5">>, 1600), ok = restart_from_blankledger( - BookOpts, + BookOpts, [{<<"Bucket3">>, Spcl3, LastV3}, {<<"Bucket5">>, Spcl5, LastV5}] ), {ok, Spcl6, LastV6} = rotating_object_check(BookOpts, <<"Bucket6">>, 3200), @@ -528,12 +571,9 @@ rotate_wipe_compact(Strategy1, Strategy2) -> {KSpcL2, _V2} = testutil:put_indexed_objects(Book3, <<"AltBucket6">>, 3000), Q2 = - fun(RT) -> - {index_query, - <<"AltBucket6">>, - {fun testutil:foldkeysfun/3, []}, - {<<"idx1_bin">>, <<"#">>, <<"|">>}, - {RT, undefined}} + fun(RT) -> + {index_query, <<"AltBucket6">>, {fun testutil:foldkeysfun/3, []}, + {<<"idx1_bin">>, <<"#">>, <<"|">>}, {RT, undefined}} end, {async, KFolder2A} = leveled_bookie:book_returnfolder(Book3, Q2(false)), KeyList2A = lists:usort(KFolder2A()), @@ -562,7 +602,7 @@ rotate_wipe_compact(Strategy1, Strategy2) -> io:format("Compact after deletions~n"), compact_and_wait(Book4), - + {async, KFolder4AD} = leveled_bookie:book_returnfolder(Book4, Q2(false)), KeyList4AD = lists:usort(KFolder4AD()), true = length(KeyList4AD) == 0, @@ -571,7 +611,6 @@ rotate_wipe_compact(Strategy1, Strategy2) -> testutil:reset_filestructure(). - stdtag_recalc(_Config) -> %% Setting the ?STD_TAG to do recalc, should result in the ?STD_TAG %% behaving like recovr - as no recalc is done for ?STD_TAG @@ -581,20 +620,29 @@ stdtag_recalc(_Config) -> RootPath = testutil:reset_filestructure(), B0 = <<"B0">>, KeyCount = 7000, - BookOpts = [{root_path, RootPath}, - {cache_size, 1000}, - {max_journalobjectcount, 5000}, - {max_pencillercachesize, 10000}, - {sync_strategy, testutil:sync_strategy()}, - {reload_strategy, [{?STD_TAG, recalc}]}], + BookOpts = [ + {root_path, RootPath}, + {cache_size, 1000}, + {max_journalobjectcount, 5000}, + {max_pencillercachesize, 10000}, + {sync_strategy, testutil:sync_strategy()}, + {reload_strategy, [{?STD_TAG, recalc}]} + ], {ok, Book1} = leveled_bookie:book_start(BookOpts), LoadFun = fun(Book) -> fun(I) -> - testutil:stdload_object(Book, - B0, erlang:phash2(I rem KeyCount), - I, erlang:phash2({value, I}), - infinity, ?STD_TAG, false, false) + testutil:stdload_object( + Book, + B0, + erlang:phash2(I rem KeyCount), + I, + erlang:phash2({value, I}), + infinity, + ?STD_TAG, + false, + false + ) end end, lists:foreach(LoadFun(Book1), lists:seq(1, KeyCount)), @@ -602,18 +650,21 @@ stdtag_recalc(_Config) -> CountFold = fun(Book, CurrentCount) -> - leveled_bookie:book_indexfold(Book, - B0, - {fun(_BF, _KT, Acc) -> Acc + 1 end, - 0}, - {<<"temp_int">>, 0, CurrentCount}, - {true, undefined}) + leveled_bookie:book_indexfold( + Book, + B0, + {fun(_BF, _KT, Acc) -> Acc + 1 end, 0}, + {<<"temp_int">>, 0, CurrentCount}, + {true, undefined} + ) end, {async, FolderA} = CountFold(Book1, 2 * KeyCount), CountA = FolderA(), - io:format("Counted double index entries ~w - everything loaded OK~n", - [CountA]), + io:format( + "Counted double index entries ~w - everything loaded OK~n", + [CountA] + ), true = 2 * KeyCount == CountA, ok = leveled_bookie:book_close(Book1), @@ -623,8 +674,10 @@ stdtag_recalc(_Config) -> {async, FolderB} = CountFold(Book2, 3 * KeyCount), CountB = FolderB(), - io:format("Maybe counted less index entries ~w - everything not loaded~n", - [CountB]), + io:format( + "Maybe counted less index entries ~w - everything not loaded~n", + [CountB] + ), true = 3 * KeyCount >= CountB, compact_and_wait(Book2), @@ -632,35 +685,41 @@ stdtag_recalc(_Config) -> io:format("Restart from blank ledger"), - leveled_penciller:clean_testdir(proplists:get_value(root_path, BookOpts) ++ - "/ledger"), + leveled_penciller:clean_testdir( + proplists:get_value(root_path, BookOpts) ++ + "/ledger" + ), {ok, Book3} = leveled_bookie:book_start(BookOpts), {async, FolderC} = CountFold(Book3, 3 * KeyCount), CountC = FolderC(), - io:format("Missing index entries ~w - recalc not supported on ?STD_TAG~n", - [CountC]), + io:format( + "Missing index entries ~w - recalc not supported on ?STD_TAG~n", + [CountC] + ), true = 3 * KeyCount > CountC, ok = leveled_bookie:book_close(Book3), - - testutil:reset_filestructure(). + testutil:reset_filestructure(). recovr_strategy(_Config) -> RootPath = testutil:reset_filestructure(), - BookOpts = [{root_path, RootPath}, - {cache_size, 1000}, - {max_journalobjectcount, 8000}, - {sync_strategy, testutil:sync_strategy()}, - {reload_strategy, [{?RIAK_TAG, recovr}]}], - + BookOpts = [ + {root_path, RootPath}, + {cache_size, 1000}, + {max_journalobjectcount, 8000}, + {sync_strategy, testutil:sync_strategy()}, + {reload_strategy, [{?RIAK_TAG, recovr}]} + ], + R6 = rotating_object_check(BookOpts, <<"Bucket6">>, 6400), {ok, AllSpcL, V4} = R6, leveled_penciller:clean_testdir( - proplists:get_value(root_path, BookOpts) ++ "/ledger"), + proplists:get_value(root_path, BookOpts) ++ "/ledger" + ), {ok, Book1} = leveled_bookie:book_start(BookOpts), - + {TestObject, TestSpec} = testutil:generate_testobject(), ok = testutil:book_riakput(Book1, TestObject, TestSpec), ok = @@ -670,9 +729,9 @@ recovr_strategy(_Config) -> testutil:get_key(TestObject), [] ), - + lists:foreach( - fun({K, _SpcL}) -> + fun({K, _SpcL}) -> {ok, OH} = testutil:book_riakhead(Book1, <<"Bucket6">>, K), VCH = testutil:get_vclock(OH), {ok, OG} = testutil:book_riakget(Book1, <<"Bucket6">>, K), @@ -681,32 +740,34 @@ recovr_strategy(_Config) -> true = V == V4, true = VCH == VCG end, - lists:nthtail(6400, AllSpcL)), + lists:nthtail(6400, AllSpcL) + ), Q = fun(RT) -> - {index_query, - <<"Bucket6">>, - {fun testutil:foldkeysfun/3, []}, - {<<"idx1_bin">>, <<"#">>, <<"|">>}, - {RT, undefined}} + {index_query, <<"Bucket6">>, {fun testutil:foldkeysfun/3, []}, + {<<"idx1_bin">>, <<"#">>, <<"|">>}, {RT, undefined}} end, {async, TFolder} = leveled_bookie:book_returnfolder(Book1, Q(true)), KeyTermList = TFolder(), {async, KFolder} = leveled_bookie:book_returnfolder(Book1, Q(false)), KeyList = lists:usort(KFolder()), - io:format("KeyList ~w KeyTermList ~w~n", - [length(KeyList), length(KeyTermList)]), + io:format( + "KeyList ~w KeyTermList ~w~n", + [length(KeyList), length(KeyTermList)] + ), true = length(KeyList) == 6400, true = length(KeyList) < length(KeyTermList), true = length(KeyTermList) < 25600, ok = leveled_bookie:book_close(Book1), - RevisedOpts = [{root_path, RootPath}, - {cache_size, 1000}, - {max_journalobjectcount, 2000}, - {sync_strategy, testutil:sync_strategy()}, - {reload_strategy, [{?RIAK_TAG, recovr}]}], + RevisedOpts = [ + {root_path, RootPath}, + {cache_size, 1000}, + {max_journalobjectcount, 2000}, + {sync_strategy, testutil:sync_strategy()}, + {reload_strategy, [{?RIAK_TAG, recovr}]} + ], {ok, Book2} = leveled_bookie:book_start(RevisedOpts), @@ -717,11 +778,8 @@ recovr_strategy(_Config) -> Q2 = fun(RT) -> - {index_query, - <<"AltBucket6">>, - {fun testutil:foldkeysfun/3, []}, - {<<"idx1_bin">>, <<"#">>, <<"|">>}, - {RT, undefined}} + {index_query, <<"AltBucket6">>, {fun testutil:foldkeysfun/3, []}, + {<<"idx1_bin">>, <<"#">>, <<"|">>}, {RT, undefined}} end, {async, KFolder2A} = leveled_bookie:book_returnfolder(Book2, Q2(false)), KeyList2A = lists:usort(KFolder2A()), @@ -731,7 +789,8 @@ recovr_strategy(_Config) -> fun({DK, [{add, DIdx, DTerm}]}) -> ok = testutil:book_riakdelete( - Book2, <<"AltBucket6">>, DK, [{remove, DIdx, DTerm}]) + Book2, <<"AltBucket6">>, DK, [{remove, DIdx, DTerm}] + ) end, lists:foreach(DeleteFun, KSpcL2), @@ -753,76 +812,92 @@ recovr_strategy(_Config) -> testutil:reset_filestructure(). - aae_missingjournal(_Config) -> RootPath = testutil:reset_filestructure(), - StartOpts = [{root_path, RootPath}, - {max_journalsize, 20000000}, - {sync_strategy, testutil:sync_strategy()}], + StartOpts = [ + {root_path, RootPath}, + {max_journalsize, 20000000}, + {sync_strategy, testutil:sync_strategy()} + ], {ok, Bookie1} = leveled_bookie:book_start(StartOpts), {TestObject, TestSpec} = testutil:generate_testobject(), ok = testutil:book_riakput(Bookie1, TestObject, TestSpec), testutil:check_forobject(Bookie1, TestObject), GenList = [2], - _CLs = testutil:load_objects(20000, GenList, Bookie1, TestObject, - fun testutil:generate_objects/2), - + _CLs = testutil:load_objects( + 20000, + GenList, + Bookie1, + TestObject, + fun testutil:generate_objects/2 + ), + FoldHeadsFun = - fun(B, K, _V, Acc) -> [{B, K}|Acc] end, - + fun(B, K, _V, Acc) -> [{B, K} | Acc] end, + {async, AllHeadF1} = - leveled_bookie:book_headfold(Bookie1, - ?RIAK_TAG, - {FoldHeadsFun, []}, - true, - true, - false), + leveled_bookie:book_headfold( + Bookie1, + ?RIAK_TAG, + {FoldHeadsFun, []}, + true, + true, + false + ), HeadL1 = length(AllHeadF1()), io:format("Fold head returned ~w objects~n", [HeadL1]), - + ok = leveled_bookie:book_close(Bookie1), CDBFiles = testutil:find_journals(RootPath), - [HeadF|_Rest] = CDBFiles, + [HeadF | _Rest] = CDBFiles, io:format("Selected Journal for removal of ~s~n", [HeadF]), ok = file:delete(RootPath ++ "/journal/journal_files/" ++ HeadF), - + {ok, Bookie2} = leveled_bookie:book_start(StartOpts), % Check that fold heads picks up on the missing file {async, AllHeadF2} = - leveled_bookie:book_returnfolder(Bookie2, - {foldheads_allkeys, - ?RIAK_TAG, - FoldHeadsFun, - true, true, false, - false, false}), + leveled_bookie:book_returnfolder( + Bookie2, + {foldheads_allkeys, ?RIAK_TAG, FoldHeadsFun, true, true, false, + false, false} + ), HeadL2 = length(AllHeadF2()), io:format("Fold head returned ~w objects~n", [HeadL2]), true = HeadL2 < HeadL1, true = HeadL2 > 0, - + ok = leveled_bookie:book_close(Bookie2), testutil:reset_filestructure(). simple_cachescoring(_Config) -> RootPath = testutil:reset_filestructure(), - StartOpts = [{root_path, RootPath}, - {max_journalobjectcount, 2000}, - {sync_strategy, testutil:sync_strategy()}], + StartOpts = [ + {root_path, RootPath}, + {max_journalobjectcount, 2000}, + {sync_strategy, testutil:sync_strategy()} + ], {ok, Bookie1} = - leveled_bookie:book_start(StartOpts ++ - [{journalcompaction_scoreonein, 8}]), + leveled_bookie:book_start( + StartOpts ++ + [{journalcompaction_scoreonein, 8}] + ), {TestObject, TestSpec} = testutil:generate_testobject(), ok = testutil:book_riakput(Bookie1, TestObject, TestSpec), testutil:check_forobject(Bookie1, TestObject), GenList = [2, 32002, 64002, 96002], - _CLs = testutil:load_objects(32000, GenList, Bookie1, TestObject, - fun testutil:generate_objects/2), - + _CLs = testutil:load_objects( + 32000, + GenList, + Bookie1, + TestObject, + fun testutil:generate_objects/2 + ), + F = fun leveled_bookie:book_islastcompactionpending/1, WaitForCompaction = - fun(B) -> + fun(B) -> fun(X, Pending) -> - case X of + case X of 1 -> leveled_bookie:book_compactjournal(B, 30000); _ -> @@ -832,8 +907,11 @@ simple_cachescoring(_Config) -> false -> false; true -> - io:format("Loop ~w waiting for journal " - ++ "compaction to complete~n", [X]), + io:format( + "Loop ~w waiting for journal " ++ + "compaction to complete~n", + [X] + ), timer:sleep(100), F(B) end @@ -847,11 +925,13 @@ simple_cachescoring(_Config) -> {TC2, false} = timer:tc(lists, foldl, Args1), {TC3, false} = timer:tc(lists, foldl, Args1), {TC4, false} = timer:tc(lists, foldl, Args1), - + ok = leveled_bookie:book_close(Bookie1), {ok, Bookie2} = leveled_bookie:book_start(StartOpts), - io:format("Re-opened bookie withour caching - re-compare compaction time~n"), + io:format( + "Re-opened bookie withour caching - re-compare compaction time~n" + ), io:format("Scoring for first time - every file should need scoring~n"), Args2 = [WaitForCompaction(Bookie2), true, lists:seq(1, 300)], {TN0, false} = timer:tc(lists, foldl, Args2), @@ -860,285 +940,338 @@ simple_cachescoring(_Config) -> {TN2, false} = timer:tc(lists, foldl, Args2), {TN3, false} = timer:tc(lists, foldl, Args2), {TN4, false} = timer:tc(lists, foldl, Args2), - - AvgSecondRunCache = (TC1 + TC2 +TC3 + TC4) div 4000, - AvgSecondRunNoCache = (TN1 + TN2 +TN3 + TN4) div 4000, - - io:format("With caching ~w first run ~w average other runs~n", - [TC0 div 1000, AvgSecondRunCache]), - io:format("Without caching ~w first run ~w average other runs~n", - [TN0 div 1000, AvgSecondRunNoCache]), + + AvgSecondRunCache = (TC1 + TC2 + TC3 + TC4) div 4000, + AvgSecondRunNoCache = (TN1 + TN2 + TN3 + TN4) div 4000, + + io:format( + "With caching ~w first run ~w average other runs~n", + [TC0 div 1000, AvgSecondRunCache] + ), + io:format( + "Without caching ~w first run ~w average other runs~n", + [TN0 div 1000, AvgSecondRunNoCache] + ), true = (TC0 > AvgSecondRunCache), - true = (TC0/AvgSecondRunCache) > (TN0/AvgSecondRunNoCache), + true = (TC0 / AvgSecondRunCache) > (TN0 / AvgSecondRunNoCache), ok = leveled_bookie:book_close(Bookie2), io:format("Exit having proven simply that caching score is faster~n"), testutil:reset_filestructure(). - aae_bustedjournal(_Config) -> RootPath = testutil:reset_filestructure(), - StartOpts = [{root_path, RootPath}, - {max_journalsize, 20000000}, - {sync_strategy, testutil:sync_strategy()}], + StartOpts = [ + {root_path, RootPath}, + {max_journalsize, 20000000}, + {sync_strategy, testutil:sync_strategy()} + ], {ok, Bookie1} = leveled_bookie:book_start(StartOpts), {TestObject, TestSpec} = testutil:generate_testobject(), ok = testutil:book_riakput(Bookie1, TestObject, TestSpec), testutil:check_forobject(Bookie1, TestObject), GenList = [2], - _CLs = testutil:load_objects(16000, GenList, Bookie1, TestObject, - fun testutil:generate_objects/2), + _CLs = testutil:load_objects( + 16000, + GenList, + Bookie1, + TestObject, + fun testutil:generate_objects/2 + ), ok = leveled_bookie:book_close(Bookie1), CDBFiles = testutil:find_journals(RootPath), - [HeadF|_Rest] = CDBFiles, - % Select the file to corrupt before completing the load - so as - % not to corrupt the journal required on startup + [HeadF | _Rest] = CDBFiles, + % Select the file to corrupt before completing the load - so as + % not to corrupt the journal required on startup {ok, TempB} = leveled_bookie:book_start(StartOpts), - % Load the remaining objects which may be reloaded on startup due to + % Load the remaining objects which may be reloaded on startup due to % non-writing of L0 - _CLsAdd = testutil:load_objects(4000, - [16002], - TempB, - TestObject, - fun testutil:generate_objects/2), + _CLsAdd = testutil:load_objects( + 4000, + [16002], + TempB, + TestObject, + fun testutil:generate_objects/2 + ), ok = leveled_bookie:book_close(TempB), io:format("Selected Journal for corruption of ~s~n", [HeadF]), testutil:corrupt_journal(RootPath, HeadF, 1000, 2048, 1000), {ok, Bookie2} = leveled_bookie:book_start(StartOpts), - - FoldKeysFun = fun(B, K, Acc) -> [{B, K}|Acc] end, + + FoldKeysFun = fun(B, K, Acc) -> [{B, K} | Acc] end, AllKeyQuery = {keylist, o_rkv, {FoldKeysFun, []}}, {async, KeyF} = leveled_bookie:book_returnfolder(Bookie2, AllKeyQuery), KeyList = KeyF(), 20001 = length(KeyList), - HeadCount = lists:foldl(fun({B, K}, Acc) -> - case testutil:book_riakhead(Bookie2, - B, - K) of - {ok, _} -> Acc + 1; - not_found -> Acc - end - end, - 0, - KeyList), + HeadCount = lists:foldl( + fun({B, K}, Acc) -> + case + testutil:book_riakhead( + Bookie2, + B, + K + ) + of + {ok, _} -> Acc + 1; + not_found -> Acc + end + end, + 0, + KeyList + ), 20001 = HeadCount, - GetCount = lists:foldl(fun({B, K}, Acc) -> - case testutil:book_riakget(Bookie2, - B, - K) of - {ok, _} -> Acc + 1; - not_found -> Acc - end - end, - 0, - KeyList), + GetCount = lists:foldl( + fun({B, K}, Acc) -> + case + testutil:book_riakget( + Bookie2, + B, + K + ) + of + {ok, _} -> Acc + 1; + not_found -> Acc + end + end, + 0, + KeyList + ), true = GetCount > 19000, true = GetCount < HeadCount, - - {async, HashTreeF1} = leveled_bookie:book_returnfolder(Bookie2, - {hashlist_query, - ?RIAK_TAG, - false}), + + {async, HashTreeF1} = leveled_bookie:book_returnfolder( + Bookie2, + {hashlist_query, ?RIAK_TAG, false} + ), KeyHashList1 = HashTreeF1(), 20001 = length(KeyHashList1), - {async, HashTreeF2} = leveled_bookie:book_returnfolder(Bookie2, - {hashlist_query, - ?RIAK_TAG, - true}), + {async, HashTreeF2} = leveled_bookie:book_returnfolder( + Bookie2, + {hashlist_query, ?RIAK_TAG, true} + ), KeyHashList2 = HashTreeF2(), % The file is still there, and the hashtree is not corrupted KeyHashList2 = KeyHashList1, % Will need to remove the file or corrupt the hashtree to get presence to % fail - - FoldObjectsFun = - fun(B, K, V, Acc) -> + + FoldObjectsFun = + fun(B, K, V, Acc) -> VC = testutil:get_vclock(V), H = erlang:phash2(lists:sort(VC)), - [{B, K, H}|Acc] + [{B, K, H} | Acc] end, SW = os:timestamp(), - {async, HashTreeF3} = leveled_bookie:book_returnfolder(Bookie2, - {foldobjects_allkeys, - ?RIAK_TAG, - FoldObjectsFun, - false}), + {async, HashTreeF3} = leveled_bookie:book_returnfolder( + Bookie2, + {foldobjects_allkeys, ?RIAK_TAG, FoldObjectsFun, false} + ), KeyHashList3 = HashTreeF3(), - + true = length(KeyHashList3) > 19000, true = length(KeyHashList3) < HeadCount, Delta = length(lists:subtract(KeyHashList1, KeyHashList3)), true = Delta < 1001, - io:format("Fetch of hashtree using fold objects took ~w microseconds" ++ - " and found a Delta of ~w and an objects count of ~w~n", - [timer:now_diff(os:timestamp(), SW), - Delta, - length(KeyHashList3)]), - + io:format( + "Fetch of hashtree using fold objects took ~w microseconds" ++ + " and found a Delta of ~w and an objects count of ~w~n", + [ + timer:now_diff(os:timestamp(), SW), + Delta, + length(KeyHashList3) + ] + ), + ok = leveled_bookie:book_close(Bookie2), {ok, BytesCopied} = testutil:restore_file(RootPath, HeadF), io:format("File restored is of size ~w~n", [BytesCopied]), {ok, Bookie3} = leveled_bookie:book_start(StartOpts), - + SW4 = os:timestamp(), - {async, HashTreeF4} = leveled_bookie:book_returnfolder(Bookie3, - {foldobjects_allkeys, - ?RIAK_TAG, - FoldObjectsFun, - false}), + {async, HashTreeF4} = leveled_bookie:book_returnfolder( + Bookie3, + {foldobjects_allkeys, ?RIAK_TAG, FoldObjectsFun, false} + ), KeyHashList4 = HashTreeF4(), - + true = length(KeyHashList4) == 20001, - io:format("Fetch of hashtree using fold objects took ~w microseconds" ++ - " and found an object count of ~w~n", - [timer:now_diff(os:timestamp(), SW4), length(KeyHashList4)]), - + io:format( + "Fetch of hashtree using fold objects took ~w microseconds" ++ + " and found an object count of ~w~n", + [timer:now_diff(os:timestamp(), SW4), length(KeyHashList4)] + ), + ok = leveled_bookie:book_close(Bookie3), testutil:corrupt_journal(RootPath, HeadF, 500, BytesCopied - 8000, 14), - + {ok, Bookie4} = leveled_bookie:book_start(StartOpts), - + SW5 = os:timestamp(), - {async, HashTreeF5} = leveled_bookie:book_returnfolder(Bookie4, - {foldobjects_allkeys, - ?RIAK_TAG, - FoldObjectsFun, - false}), + {async, HashTreeF5} = leveled_bookie:book_returnfolder( + Bookie4, + {foldobjects_allkeys, ?RIAK_TAG, FoldObjectsFun, false} + ), KeyHashList5 = HashTreeF5(), - + true = length(KeyHashList5) > 19000, true = length(KeyHashList5) < HeadCount, Delta5 = length(lists:subtract(KeyHashList1, KeyHashList5)), true = Delta5 < 1001, - io:format("Fetch of hashtree using fold objects took ~w microseconds" ++ - " and found a Delta of ~w and an objects count of ~w~n", - [timer:now_diff(os:timestamp(), SW5), - Delta5, - length(KeyHashList5)]), - - {async, HashTreeF6} = leveled_bookie:book_returnfolder(Bookie4, - {hashlist_query, - ?RIAK_TAG, - true}), + io:format( + "Fetch of hashtree using fold objects took ~w microseconds" ++ + " and found a Delta of ~w and an objects count of ~w~n", + [ + timer:now_diff(os:timestamp(), SW5), + Delta5, + length(KeyHashList5) + ] + ), + + {async, HashTreeF6} = leveled_bookie:book_returnfolder( + Bookie4, + {hashlist_query, ?RIAK_TAG, true} + ), KeyHashList6 = HashTreeF6(), true = length(KeyHashList6) > 19000, true = length(KeyHashList6) < HeadCount, - + ok = leveled_bookie:book_close(Bookie4), - + testutil:restore_topending(RootPath, HeadF), - + {ok, Bookie5} = leveled_bookie:book_start(StartOpts), - + SW6 = os:timestamp(), - {async, HashTreeF7} = leveled_bookie:book_returnfolder(Bookie5, - {foldobjects_allkeys, - ?RIAK_TAG, - FoldObjectsFun, - false}), + {async, HashTreeF7} = leveled_bookie:book_returnfolder( + Bookie5, + {foldobjects_allkeys, ?RIAK_TAG, FoldObjectsFun, false} + ), KeyHashList7 = HashTreeF7(), - + true = length(KeyHashList7) == 20001, - io:format("Fetch of hashtree using fold objects took ~w microseconds" ++ - " and found an object count of ~w~n", - [timer:now_diff(os:timestamp(), SW6), length(KeyHashList7)]), - + io:format( + "Fetch of hashtree using fold objects took ~w microseconds" ++ + " and found an object count of ~w~n", + [timer:now_diff(os:timestamp(), SW6), length(KeyHashList7)] + ), + ok = leveled_bookie:book_close(Bookie5), testutil:reset_filestructure(). - journal_compaction_bustedjournal(_Config) -> % Different circumstances will be created in different runs busted_journal_test(10000000, native, on_receipt, true), busted_journal_test(7777777, lz4, on_compact, true), busted_journal_test(8888888, lz4, on_receipt, true), busted_journal_test(7777777, lz4, on_compact, false). - busted_journal_test(MaxJournalSize, PressMethod, PressPoint, Bust) -> % Simply confirms that none of this causes a crash RootPath = testutil:reset_filestructure(), - StartOpts1 = [{root_path, RootPath}, - {max_journalsize, MaxJournalSize}, - {max_run_length, 10}, - {sync_strategy, testutil:sync_strategy()}, - {compression_method, PressMethod}, - {compression_point, PressPoint}], + StartOpts1 = [ + {root_path, RootPath}, + {max_journalsize, MaxJournalSize}, + {max_run_length, 10}, + {sync_strategy, testutil:sync_strategy()}, + {compression_method, PressMethod}, + {compression_point, PressPoint} + ], {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), {TestObject, TestSpec} = testutil:generate_testobject(), ok = testutil:book_riakput(Bookie1, TestObject, TestSpec), testutil:check_forobject(Bookie1, TestObject), ObjList1 = testutil:generate_objects(50000, 2), - lists:foreach(fun({_RN, Obj, Spc}) -> - testutil:book_riakput(Bookie1, Obj, Spc) end, - ObjList1), + lists:foreach( + fun({_RN, Obj, Spc}) -> + testutil:book_riakput(Bookie1, Obj, Spc) + end, + ObjList1 + ), %% Now replace all the objects ObjList2 = testutil:generate_objects(50000, 2), - lists:foreach(fun({_RN, Obj, Spc}) -> - testutil:book_riakput(Bookie1, Obj, Spc) end, - ObjList2), + lists:foreach( + fun({_RN, Obj, Spc}) -> + testutil:book_riakput(Bookie1, Obj, Spc) + end, + ObjList2 + ), ok = leveled_bookie:book_close(Bookie1), - + case Bust of true -> CDBFiles = testutil:find_journals(RootPath), - lists:foreach(fun(FN) -> - testutil:corrupt_journal(RootPath, - FN, - 100, 2048, 1000) - end, - CDBFiles); + lists:foreach( + fun(FN) -> + testutil:corrupt_journal( + RootPath, + FN, + 100, + 2048, + 1000 + ) + end, + CDBFiles + ); false -> - ok + ok end, - + {ok, Bookie2} = leveled_bookie:book_start(StartOpts1), - + ok = leveled_bookie:book_compactjournal(Bookie2, 30000), F = fun leveled_bookie:book_islastcompactionpending/1, - lists:foldl(fun(X, Pending) -> - case Pending of - false -> - false; - true -> - io:format("Loop ~w waiting for journal " - ++ "compaction to complete~n", [X]), - timer:sleep(20000), - F(Bookie2) - end end, - true, - lists:seq(1, 15)), - + lists:foldl( + fun(X, Pending) -> + case Pending of + false -> + false; + true -> + io:format( + "Loop ~w waiting for journal " ++ + "compaction to complete~n", + [X] + ), + timer:sleep(20000), + F(Bookie2) + end + end, + true, + lists:seq(1, 15) + ), + ok = leveled_bookie:book_close(Bookie2), testutil:reset_filestructure(10000). recompact_keydeltas(_Config) -> RootPath = testutil:reset_filestructure(), B = <<"test_bucket">>, - StartOptsFun = + StartOptsFun = fun(JOC) -> - [{root_path, RootPath}, + [ + {root_path, RootPath}, {max_journalobjectcount, JOC}, {max_run_length, 4}, {singlefile_compactionpercentage, 70.0}, {maxrunlength_compactionpercentage, 85.0}, - {sync_strategy, testutil:sync_strategy()}] + {sync_strategy, testutil:sync_strategy()} + ] end, {ok, Bookie1} = leveled_bookie:book_start(StartOptsFun(45000)), {KSpcL1, _V1} = testutil:put_indexed_objects(Bookie1, B, 24000), {KSpcL2, _V2} = testutil:put_altered_indexed_objects(Bookie1, B, KSpcL1, false), ok = leveled_bookie:book_close(Bookie1), - {ok, Bookie2} = leveled_bookie:book_start(StartOptsFun(45000)), + {ok, Bookie2} = leveled_bookie:book_start(StartOptsFun(45000)), compact_and_wait(Bookie2, 0), {KSpcL3, V3} = testutil:put_altered_indexed_objects(Bookie2, B, KSpcL2, false), compact_and_wait(Bookie2, 0), ok = testutil:check_indexed_objects( - Bookie2, B, KSpcL1 ++ KSpcL2 ++ KSpcL3, V3), + Bookie2, B, KSpcL1 ++ KSpcL2 ++ KSpcL3, V3 + ), ok = leveled_bookie:book_close(Bookie2), testutil:reset_filestructure(10000). @@ -1148,33 +1281,38 @@ rotating_object_check(BookOpts, B, NumberOfObjects) -> ok = testutil:check_indexed_objects(Book1, B, KSpcL1, V1), {KSpcL2, V2} = testutil:put_altered_indexed_objects(Book1, B, KSpcL1, false), - ok = + ok = testutil:check_indexed_objects( - Book1, B, KSpcL1 ++ KSpcL2, V2), + Book1, B, KSpcL1 ++ KSpcL2, V2 + ), {KSpcL3, V3} = testutil:put_altered_indexed_objects(Book1, B, KSpcL2, false), - ok = + ok = testutil:check_indexed_objects( - Book1, B, KSpcL1 ++ KSpcL2 ++ KSpcL3, V3), + Book1, B, KSpcL1 ++ KSpcL2 ++ KSpcL3, V3 + ), ok = leveled_bookie:book_close(Book1), {ok, Book2} = leveled_bookie:book_start(BookOpts), - ok = + ok = testutil:check_indexed_objects( - Book2, B, KSpcL1 ++ KSpcL2 ++ KSpcL3, V3), + Book2, B, KSpcL1 ++ KSpcL2 ++ KSpcL3, V3 + ), {KSpcL4, V4} = testutil:put_altered_indexed_objects(Book2, B, KSpcL3, false), io:format("Bucket complete - checking index before compaction~n"), - ok = + ok = testutil:check_indexed_objects( - Book2, B, KSpcL1 ++ KSpcL2 ++ KSpcL3 ++ KSpcL4, V4), - + Book2, B, KSpcL1 ++ KSpcL2 ++ KSpcL3 ++ KSpcL4, V4 + ), + compact_and_wait(Book2), - + io:format("Checking index following compaction~n"), ok = testutil:check_indexed_objects( - Book2, B, KSpcL1 ++ KSpcL2 ++ KSpcL3 ++ KSpcL4, V4), - + Book2, B, KSpcL1 ++ KSpcL2 ++ KSpcL3 ++ KSpcL4, V4 + ), + ok = leveled_bookie:book_close(Book2), {ok, KSpcL1 ++ KSpcL2 ++ KSpcL3 ++ KSpcL4, V4}. @@ -1184,29 +1322,39 @@ compact_and_wait(Book) -> compact_and_wait(Book, WaitForDelete) -> ok = leveled_bookie:book_compactjournal(Book, 30000), F = fun leveled_bookie:book_islastcompactionpending/1, - lists:foldl(fun(X, Pending) -> - case Pending of - false -> - false; - true -> - io:format("Loop ~w waiting for journal " - ++ "compaction to complete~n", [X]), - timer:sleep(20000), - F(Book) - end end, - true, - lists:seq(1, 15)), + lists:foldl( + fun(X, Pending) -> + case Pending of + false -> + false; + true -> + io:format( + "Loop ~w waiting for journal " ++ + "compaction to complete~n", + [X] + ), + timer:sleep(20000), + F(Book) + end + end, + true, + lists:seq(1, 15) + ), io:format("Waiting for journal deletes~n"), timer:sleep(WaitForDelete). restart_from_blankledger(BookOpts, B_SpcL) -> - leveled_penciller:clean_testdir(proplists:get_value(root_path, BookOpts) ++ - "/ledger"), + leveled_penciller:clean_testdir( + proplists:get_value(root_path, BookOpts) ++ + "/ledger" + ), {ok, Book1} = leveled_bookie:book_start(BookOpts), io:format("Checking index following restart~n"), - lists:foreach(fun({B, SpcL, V}) -> - ok = testutil:check_indexed_objects(Book1, B, SpcL, V) - end, - B_SpcL), + lists:foreach( + fun({B, SpcL, V}) -> + ok = testutil:check_indexed_objects(Book1, B, SpcL, V) + end, + B_SpcL + ), ok = leveled_bookie:book_close(Book1), ok. diff --git a/test/end_to_end/riak_SUITE.erl b/test/end_to_end/riak_SUITE.erl index 6b19536e..4b283221 100644 --- a/test/end_to_end/riak_SUITE.erl +++ b/test/end_to_end/riak_SUITE.erl @@ -4,47 +4,48 @@ -export([all/0, init_per_suite/1, end_per_suite/1, suite/0]). -export([ - test_large_lsm_merge/1, - basic_riak/1, - block_version_change/1, - fetchclocks_modifiedbetween/1, - crossbucket_aae/1, - handoff/1, - handoff_close/1, - handoff_withcompaction/1, - dollar_bucket_index/1, - dollar_key_index/1, - bigobject_memorycheck/1, - summarisable_sstindex/1 - ]). + test_large_lsm_merge/1, + basic_riak/1, + block_version_change/1, + fetchclocks_modifiedbetween/1, + crossbucket_aae/1, + handoff/1, + handoff_close/1, + handoff_withcompaction/1, + dollar_bucket_index/1, + dollar_key_index/1, + bigobject_memorycheck/1, + summarisable_sstindex/1 +]). suite() -> [{timetrap, {hours, 2}}]. -all() -> [ - basic_riak, - block_version_change, - fetchclocks_modifiedbetween, - crossbucket_aae, - handoff, - handoff_close, - handoff_withcompaction, - dollar_bucket_index, - dollar_key_index, - bigobject_memorycheck, - summarisable_sstindex, - test_large_lsm_merge - ]. - --define(MAGIC, 53). % riak_kv -> riak_object +all() -> + [ + basic_riak, + block_version_change, + fetchclocks_modifiedbetween, + crossbucket_aae, + handoff, + handoff_close, + handoff_withcompaction, + dollar_bucket_index, + dollar_key_index, + bigobject_memorycheck, + summarisable_sstindex, + test_large_lsm_merge + ]. + +% riak_kv -> riak_object +-define(MAGIC, 53). init_per_suite(Config) -> - testutil:init_per_suite([{suite, "riak"}|Config]), + testutil:init_per_suite([{suite, "riak"} | Config]), Config. end_per_suite(Config) -> testutil:end_per_suite(Config). - test_large_lsm_merge(_Config) -> lsm_merge_tester(24). @@ -57,7 +58,7 @@ lsm_merge_tester(LoopsPerBucket) -> {root_path, RootPath}, {max_pencillercachesize, 16000}, {max_sstslots, 96}, - % Make SST files smaller, to accelerate merges + % Make SST files smaller, to accelerate merges {max_mergebelow, 24}, {sync_strategy, testutil:sync_strategy()}, {log_level, warn}, @@ -65,9 +66,18 @@ lsm_merge_tester(LoopsPerBucket) -> { forced_logs, [ - b0015, b0016, b0017, b0018, p0032, sst12, - pc008, pc010, pc011, pc026, - p0018, p0024 + b0015, + b0016, + b0017, + b0018, + p0032, + sst12, + pc008, + pc010, + pc011, + pc026, + p0018, + p0024 ] } ], @@ -88,7 +98,7 @@ lsm_merge_tester(LoopsPerBucket) -> end, lists:seq(1, Loops) ), - V + V end, V1 = LoadBucketFun(Bookie1, <<"B1">>, LoopsPerBucket), @@ -105,20 +115,20 @@ lsm_merge_tester(LoopsPerBucket) -> maps:update_with(B, fun(C) -> C + 1 end, 1, CountAcc), case rand:uniform(SampleOneIn) of R when R == 1 -> - {[{B, K}|SampleKeys], UpdCntAcc}; + {[{B, K} | SampleKeys], UpdCntAcc}; _ -> {SampleKeys, UpdCntAcc} end end, {async, HeadFolder} = leveled_bookie:book_headfold( - Book, + Book, ?RIAK_TAG, {BookHeadFoldFun, {[], maps:new()}}, true, false, false - ), + ), {Time, R} = timer:tc(HeadFolder), io:format( "CheckBucketFold returned counts ~w in ~w ms~n", @@ -154,7 +164,7 @@ lsm_merge_tester(LoopsPerBucket) -> ok = leveled_bookie:book_close(Bookie1), {ok, Bookie2} = leveled_bookie:book_start( - lists:ukeysort(1, [{max_sstslots, 64}|StartOpts1]) + lists:ukeysort(1, [{max_sstslots, 64} | StartOpts1]) ), {SampleKeysF2, CountMapF2} = CheckBucketFun(Bookie2), @@ -184,7 +194,9 @@ lsm_merge_tester(LoopsPerBucket) -> {GT3, ok} = timer:tc( fun() -> - lists:foreach(TestSampleKeyFun(Bookie2, UpdValueMap), SampleKeysF3) + lists:foreach( + TestSampleKeyFun(Bookie2, UpdValueMap), SampleKeysF3 + ) end ), io:format( @@ -219,12 +231,12 @@ block_version_change(_Config) -> ID = integer_to_list(ListID), [ { - add, + add, list_to_binary("integer" ++ ID ++ "_int"), RandInt }, { - add, + add, list_to_binary("binary" ++ ID ++ "_bin"), <> } @@ -232,10 +244,11 @@ block_version_change(_Config) -> end end, - ObjList1 = + ObjList1 = testutil:generate_objects( - KeyCount, - {fixed_binary, 1}, [], + KeyCount, + {fixed_binary, 1}, + [], crypto:strong_rand_bytes(512), IndexGenFun(1), Bucket @@ -245,13 +258,13 @@ block_version_change(_Config) -> SubList1 = lists:sublist(lists:ukeysort(1, ObjList1), 1000), ok = testutil:check_forlist(Bookie1, SubList1), - FoldKeysFun = fun(_B, K, Acc) -> [K|Acc] end, + FoldKeysFun = fun(_B, K, Acc) -> [K | Acc] end, IntIndexFold = fun(Idx, Book) -> fun(IC, CountAcc) -> ID = integer_to_list(Idx), Index = list_to_binary("integer" ++ ID ++ "_int"), - {async, R} = + {async, R} = leveled_bookie:book_indexfold( Book, {Bucket, <<>>}, @@ -268,7 +281,7 @@ block_version_change(_Config) -> fun(IC, CountAcc) -> ID = integer_to_list(Idx), Index = list_to_binary("binary" ++ ID ++ "_bin"), - {async, R} = + {async, R} = leveled_bookie:book_indexfold( Book, {Bucket, <<>>}, @@ -293,7 +306,7 @@ block_version_change(_Config) -> io:format( "~w queries returned count=~w in ~w ms~n", [ - IndexCount, + IndexCount, TotalIntIndexEntries, timer:now_diff(os:timestamp(), SWA) div 1000 ] @@ -309,25 +322,26 @@ block_version_change(_Config) -> io:format( "~w queries returned count=~w in ~w ms~n", [ - IndexCount, + IndexCount, TotalBinIndexEntries, timer:now_diff(os:timestamp(), SWB) div 1000 ] ), true = TotalBinIndexEntries == length(ObjList) end, - + CheckIndices(Bookie1, ObjList1, 1), - + ok = leveled_bookie:book_close(Bookie1), - StartOpts2 = lists:ukeysort(1, [{block_version, 1}|StartOpts1]), + StartOpts2 = lists:ukeysort(1, [{block_version, 1} | StartOpts1]), {ok, Bookie2} = leveled_bookie:book_start(StartOpts2), - ObjList2 = + ObjList2 = testutil:generate_objects( - KeyCount, - {fixed_binary, KeyCount + 1}, [], + KeyCount, + {fixed_binary, KeyCount + 1}, + [], crypto:strong_rand_bytes(512), IndexGenFun(2), Bucket @@ -342,13 +356,14 @@ block_version_change(_Config) -> CheckIndices(Bookie2, ObjList2, 2), ok = leveled_bookie:book_close(Bookie2), - + {ok, Bookie3} = leveled_bookie:book_start(StartOpts1), - ObjList3 = + ObjList3 = testutil:generate_objects( - KeyCount, - {fixed_binary, KeyCount + KeyCount + 1}, [], + KeyCount, + {fixed_binary, KeyCount + KeyCount + 1}, + [], crypto:strong_rand_bytes(512), IndexGenFun(3), Bucket @@ -363,7 +378,7 @@ block_version_change(_Config) -> CheckIndices(Bookie3, ObjList1, 1), CheckIndices(Bookie3, ObjList2, 2), CheckIndices(Bookie3, ObjList3, 3), - + ok = leveled_bookie:book_destroy(Bookie3). basic_riak(_Config) -> @@ -379,13 +394,15 @@ basic_riak_tester(Bucket, KeyCount) -> IndexCount = 20, RootPath = testutil:reset_filestructure("basicRiak"), - StartOpts1 = [{root_path, RootPath}, - {max_journalsize, 500000000}, - {max_pencillercachesize, 24000}, - {sync_strategy, testutil:sync_strategy()}, - {database_id, 32}, - {stats_logfrequency, 5}, - {stats_probability, 80}], + StartOpts1 = [ + {root_path, RootPath}, + {max_journalsize, 500000000}, + {max_pencillercachesize, 24000}, + {sync_strategy, testutil:sync_strategy()}, + {database_id, 32}, + {stats_logfrequency, 5}, + {stats_probability, 80} + ], {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), IndexGenFun = @@ -393,70 +410,75 @@ basic_riak_tester(Bucket, KeyCount) -> fun() -> RandInt = rand:uniform(IndexCount), ID = integer_to_list(ListID), - [{add, - list_to_binary("integer" ++ ID ++ "_int"), - RandInt}, - {add, - list_to_binary("binary" ++ ID ++ "_bin"), - <>}] + [ + {add, list_to_binary("integer" ++ ID ++ "_int"), RandInt}, + {add, list_to_binary("binary" ++ ID ++ "_bin"), << + RandInt:32/integer + >>} + ] end end, CountPerList = KeyCount div 5, - ObjList1 = + ObjList1 = testutil:generate_objects( - CountPerList, - {fixed_binary, 1}, [], + CountPerList, + {fixed_binary, 1}, + [], crypto:strong_rand_bytes(512), IndexGenFun(1), Bucket ), ObjList2 = testutil:generate_objects( - CountPerList, - {fixed_binary, CountPerList + 1}, [], + CountPerList, + {fixed_binary, CountPerList + 1}, + [], crypto:strong_rand_bytes(512), IndexGenFun(2), Bucket ), - + ObjList3 = testutil:generate_objects( - CountPerList, - {fixed_binary, 2 * CountPerList + 1}, [], + CountPerList, + {fixed_binary, 2 * CountPerList + 1}, + [], crypto:strong_rand_bytes(512), IndexGenFun(3), Bucket ), - + ObjList4 = testutil:generate_objects( - CountPerList, - {fixed_binary, 3 * CountPerList + 1}, [], + CountPerList, + {fixed_binary, 3 * CountPerList + 1}, + [], crypto:strong_rand_bytes(512), IndexGenFun(4), Bucket ), - + ObjList5 = testutil:generate_objects( - CountPerList, - {fixed_binary, 4 * CountPerList + 1}, [], + CountPerList, + {fixed_binary, 4 * CountPerList + 1}, + [], crypto:strong_rand_bytes(512), IndexGenFun(5), Bucket ), - + % Mix with the ordering on the load, just in case ordering hides issues testutil:riakload(Bookie1, ObjList4), testutil:riakload(Bookie1, ObjList1), testutil:riakload(Bookie1, ObjList3), testutil:riakload(Bookie1, ObjList5), - testutil:riakload(Bookie1, ObjList2), - % This needs to stay last, - % as the last key of this needs to be the last key added - % so that headfold check, checks something in memory + testutil:riakload(Bookie1, ObjList2), + % This needs to stay last, + % as the last key of this needs to be the last key added + % so that headfold check, checks something in memory % Take a subset, and do some HEAD/GET requests SubList1 = lists:sublist(lists:ukeysort(1, ObjList1), 1000), @@ -467,13 +489,13 @@ basic_riak_tester(Bucket, KeyCount) -> ok = testutil:checkhead_forlist(Bookie1, SubList1), ok = testutil:checkhead_forlist(Bookie1, SubList5), - FoldKeysFun = fun(_B, K, Acc) -> [K|Acc] end, + FoldKeysFun = fun(_B, K, Acc) -> [K | Acc] end, IntIndexFold = fun(Idx, Book) -> fun(IC, CountAcc) -> ID = integer_to_list(Idx), Index = list_to_binary("integer" ++ ID ++ "_int"), - {async, R} = + {async, R} = leveled_bookie:book_indexfold( Book, {Bucket, <<>>}, @@ -490,7 +512,7 @@ basic_riak_tester(Bucket, KeyCount) -> fun(IC, CountAcc) -> ID = integer_to_list(Idx), Index = list_to_binary("binary" ++ ID ++ "_bin"), - {async, R} = + {async, R} = leveled_bookie:book_indexfold( Book, {Bucket, <<>>}, @@ -506,36 +528,50 @@ basic_riak_tester(Bucket, KeyCount) -> SWA = os:timestamp(), TotalIndexEntries2 = lists:foldl(IntIndexFold(2, Bookie1), 0, lists:seq(1, IndexCount)), - io:format("~w queries returned count=~w in ~w ms~n", - [IndexCount, - TotalIndexEntries2, - timer:now_diff(os:timestamp(), SWA)/1000]), + io:format( + "~w queries returned count=~w in ~w ms~n", + [ + IndexCount, + TotalIndexEntries2, + timer:now_diff(os:timestamp(), SWA) / 1000 + ] + ), true = TotalIndexEntries2 == length(ObjList2), SWB = os:timestamp(), TotalIndexEntries4 = lists:foldl(IntIndexFold(4, Bookie1), 0, lists:seq(1, IndexCount)), - io:format("~w queries returned count=~w in ~w ms~n", - [IndexCount, - TotalIndexEntries4, - timer:now_diff(os:timestamp(), SWB)/1000]), + io:format( + "~w queries returned count=~w in ~w ms~n", + [ + IndexCount, + TotalIndexEntries4, + timer:now_diff(os:timestamp(), SWB) / 1000 + ] + ), true = TotalIndexEntries4 == length(ObjList4), - + SWC = os:timestamp(), TotalIndexEntries3 = lists:foldl(BinIndexFold(3, Bookie1), 0, lists:seq(1, IndexCount)), - io:format("~w queries returned count=~w in ~w ms~n", - [IndexCount, - TotalIndexEntries3, - timer:now_diff(os:timestamp(), SWC)/1000]), + io:format( + "~w queries returned count=~w in ~w ms~n", + [ + IndexCount, + TotalIndexEntries3, + timer:now_diff(os:timestamp(), SWC) / 1000 + ] + ), true = TotalIndexEntries3 == length(ObjList3), - + ok = leveled_bookie:book_close(Bookie1), - StartOpts2 = [{root_path, RootPath}, - {max_journalsize, 200000000}, - {max_pencillercachesize, 12000}, - {sync_strategy, testutil:sync_strategy()}], + StartOpts2 = [ + {root_path, RootPath}, + {max_journalsize, 200000000}, + {max_pencillercachesize, 12000}, + {sync_strategy, testutil:sync_strategy()} + ], {ok, Bookie2} = leveled_bookie:book_start(StartOpts2), ok = testutil:check_forlist(Bookie2, SubList5), @@ -547,54 +583,71 @@ basic_riak_tester(Bucket, KeyCount) -> lists:foldl(BinIndexFold(3, Bookie2), 0, lists:seq(1, IndexCount)), true = TotalIndexEntries3B == length(ObjList3), - HeadFoldFun = fun(B, K, _Hd, Acc) -> [{B, K}|Acc] end, - [{_I1, Obj1, _Spc1}|_Rest1] = ObjList1, - [{_I2, Obj2, _Spc2}|_Rest2] = ObjList2, - [{_I3, Obj3, _Spc3}|_Rest3] = ObjList3, - [{_I4, Obj4, _Spc4}|_Rest4] = ObjList4, - [{_I5, Obj5, _Spc5}|_Rest5] = ObjList5, + HeadFoldFun = fun(B, K, _Hd, Acc) -> [{B, K} | Acc] end, + [{_I1, Obj1, _Spc1} | _Rest1] = ObjList1, + [{_I2, Obj2, _Spc2} | _Rest2] = ObjList2, + [{_I3, Obj3, _Spc3} | _Rest3] = ObjList3, + [{_I4, Obj4, _Spc4} | _Rest4] = ObjList4, + [{_I5, Obj5, _Spc5} | _Rest5] = ObjList5, {_I2L, Obj2L, _Spc2L} = lists:last(ObjList2), SegList = - lists:map(fun(Obj) -> testutil:get_aae_segment(Obj) end, - [Obj1, Obj2, Obj3, Obj4, Obj5, Obj2L]), - BKList = - lists:map(fun(Obj) -> - {testutil:get_bucket(Obj), testutil:get_key(Obj)} - end, - [Obj1, Obj2, Obj3, Obj4, Obj5, Obj2L]), - + lists:map( + fun(Obj) -> testutil:get_aae_segment(Obj) end, + [Obj1, Obj2, Obj3, Obj4, Obj5, Obj2L] + ), + BKList = + lists:map( + fun(Obj) -> + {testutil:get_bucket(Obj), testutil:get_key(Obj)} + end, + [Obj1, Obj2, Obj3, Obj4, Obj5, Obj2L] + ), + {async, HeadR} = leveled_bookie:book_headfold( - Bookie2, + Bookie2, ?RIAK_TAG, {HeadFoldFun, []}, - true, false, + true, + false, SegList ), SW_SL0 = os:timestamp(), KLBySeg = HeadR(), - io:format("SegList Headfold returned ~w heads in ~w ms~n", - [length(KLBySeg), - timer:now_diff(os:timestamp(), SW_SL0)/1000]), - true = length(KLBySeg) < KeyCount div 1000, % not too many false answers + io:format( + "SegList Headfold returned ~w heads in ~w ms~n", + [ + length(KLBySeg), + timer:now_diff(os:timestamp(), SW_SL0) / 1000 + ] + ), + % not too many false answers + true = length(KLBySeg) < KeyCount div 1000, KLBySegRem = lists:subtract(KLBySeg, BKList), true = length(KLBySeg) - length(KLBySegRem) == length(BKList), {async, HeadRFalsePositive} = leveled_bookie:book_headfold( - Bookie2, + Bookie2, ?RIAK_TAG, {HeadFoldFun, []}, - true, false, + true, + false, SegList ++ lists:seq(1, 256) - ), % Make it a large seg list + % Make it a large seg list + ), SW_SL1 = os:timestamp(), KLByXcessSeg = HeadRFalsePositive(), - io:format("SegList Headfold with xcess segments returned ~w heads in ~w ms~n", - [length(KLByXcessSeg), - timer:now_diff(os:timestamp(), SW_SL1)/1000]), - true = length(KLByXcessSeg) < KeyCount div 10, % Still not too many false answers + io:format( + "SegList Headfold with xcess segments returned ~w heads in ~w ms~n", + [ + length(KLByXcessSeg), + timer:now_diff(os:timestamp(), SW_SL1) / 1000 + ] + ), + % Still not too many false answers + true = length(KLByXcessSeg) < KeyCount div 10, KLByXcessSegRem = lists:subtract(KLByXcessSeg, BKList), true = length(KLByXcessSeg) - length(KLByXcessSegRem) == length(BKList), @@ -608,24 +661,31 @@ summarisable_sstindex(_Config) -> KeyGen = fun(I) -> list_to_binary(io_lib:format("~10..0w", [I])) end, ObjListToSort = lists:map( - fun(I) -> - {rand:uniform(KeyCount * 10), - testutil:set_object( - Bucket, KeyGen(I), integer_to_binary(I), IndexGen, [])} - end, - lists:seq(1, KeyCount)), + fun(I) -> + { + rand:uniform(KeyCount * 10), + testutil:set_object( + Bucket, KeyGen(I), integer_to_binary(I), IndexGen, [] + ) + } + end, + lists:seq(1, KeyCount) + ), UnsortedList = lists:map( fun({I, {O, S}}) -> {I, O, S} end, - lists:keysort(1, ObjListToSort)), + lists:keysort(1, ObjListToSort) + ), true = KeyCount == length(UnsortedList), - StartOpts1 = [{root_path, RootPathA}, + StartOpts1 = [ + {root_path, RootPathA}, {max_journalsize, 500000000}, {max_pencillercachesize, 8000}, - {sync_strategy, testutil:sync_strategy()}], + {sync_strategy, testutil:sync_strategy()} + ], {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), testutil:riakload(Bookie1, UnsortedList), - FoldAccT = {fun(_B, K, Acc) -> [K|Acc] end, []}, + FoldAccT = {fun(_B, K, Acc) -> [K | Acc] end, []}, KeyRangeCheckFun = fun(SK, EK) -> {async, FoldFun} = @@ -635,15 +695,17 @@ summarisable_sstindex(_Config) -> Bucket, {SK, EK}, FoldAccT, - undefined), + undefined + ), QueryList = FoldFun(), io:format( "QueryCount ~w against total ~w for range ~p ~p~n", - [length(QueryList), KeyCount, SK, EK]), + [length(QueryList), KeyCount, SK, EK] + ), QueryList end, - - true = KeyCount == length(KeyRangeCheckFun(<<"00">>, <<"02">>)), + + true = KeyCount == length(KeyRangeCheckFun(<<"00">>, <<"02">>)), true = KeyCount == length(KeyRangeCheckFun(<<"000">>, <<"002">>)), true = KeyCount == length(KeyRangeCheckFun(<<"0000">>, <<"0002">>)), true = @@ -678,55 +740,69 @@ summarisable_sstindex(_Config) -> end, lists:map( fun(_I) -> rand:uniform(KeyCount - 200) end, - lists:seq(1, 100))), + lists:seq(1, 100) + ) + ), IdxObjKeyCount = 50000, TermGen = fun(I, C) -> list_to_binary( lists:flatten( - io_lib:format("~10..0w", [I]) ++ integer_to_list(C))) + io_lib:format("~10..0w", [I]) ++ integer_to_list(C) + ) + ) end, SequentialIndexGen = fun(I) -> fun() -> lists:map( fun(C) -> - {add, <<"indexf_bin">>,TermGen(I, C)} + {add, <<"indexf_bin">>, TermGen(I, C)} end, - lists:seq(1, 8)) + lists:seq(1, 8) + ) end end, IdxObjListToSort = lists:map( - fun(I) -> - {rand:uniform(KeyCount * 10), + fun(I) -> + { + rand:uniform(KeyCount * 10), testutil:set_object( Bucket, KeyGen(I), integer_to_binary(I - KeyCount), SequentialIndexGen(I - KeyCount), - [])} - end, - lists:seq(KeyCount + 1, KeyCount + IdxObjKeyCount)), + [] + ) + } + end, + lists:seq(KeyCount + 1, KeyCount + IdxObjKeyCount) + ), UnsortedIdxObjList = lists:map( fun({I, {O, S}}) -> {I, O, S} end, - lists:keysort(1, IdxObjListToSort)), + lists:keysort(1, IdxObjListToSort) + ), testutil:riakload(Bookie1, UnsortedIdxObjList), IdxCount = IdxObjKeyCount * 8, IdxQueryFun = fun(StartTerm, EndTerm) -> - {async, FoldFun} = + {async, FoldFun} = leveled_bookie:book_indexfold( - Bookie1, {Bucket, <<>>}, FoldAccT, + Bookie1, + {Bucket, <<>>}, + FoldAccT, {<<"indexf_bin">>, StartTerm, EndTerm}, - {true, undefined}), + {true, undefined} + ), IdxQueryList = FoldFun(), io:format( "IdxQueryCount ~w for range ~p ~p~n", - [length(IdxQueryList), StartTerm, EndTerm]), + [length(IdxQueryList), StartTerm, EndTerm] + ), IdxQueryList end, true = IdxCount == length(IdxQueryFun(<<"00">>, <<"05">>)), @@ -754,7 +830,9 @@ summarisable_sstindex(_Config) -> fun(_I) -> rand:uniform(IdxObjKeyCount - 20) end, - lists:seq(1, 100))), + lists:seq(1, 100) + ) + ), lists:foreach( fun(I) -> StartTerm = TermGen(I, 0), @@ -765,7 +843,9 @@ summarisable_sstindex(_Config) -> fun(_I) -> rand:uniform(IdxObjKeyCount - 10) end, - lists:seq(1, 100))), + lists:seq(1, 100) + ) + ), io:format("Redo object count checks:~n"), NewKeyCount = KeyCount + IdxObjKeyCount, @@ -785,39 +865,46 @@ summarisable_sstindex(_Config) -> end, lists:map( fun(_I) -> rand:uniform(KeyCount - 200) end, - lists:seq(1, 100))), - - ok = leveled_bookie:book_destroy(Bookie1). + lists:seq(1, 100) + ) + ), + ok = leveled_bookie:book_destroy(Bookie1). fetchclocks_modifiedbetween(_Config) -> RootPathA = testutil:reset_filestructure("fetchClockA"), RootPathB = testutil:reset_filestructure("fetchClockB"), - StartOpts1A = [{root_path, RootPathA}, - {max_journalsize, 500000000}, - {max_pencillercachesize, 8000}, - {sync_strategy, testutil:sync_strategy()}], - StartOpts1B = [{root_path, RootPathB}, - {max_journalsize, 500000000}, - {max_pencillercachesize, 12000}, - {sync_strategy, testutil:sync_strategy()}], + StartOpts1A = [ + {root_path, RootPathA}, + {max_journalsize, 500000000}, + {max_pencillercachesize, 8000}, + {sync_strategy, testutil:sync_strategy()} + ], + StartOpts1B = [ + {root_path, RootPathB}, + {max_journalsize, 500000000}, + {max_pencillercachesize, 12000}, + {sync_strategy, testutil:sync_strategy()} + ], {ok, Bookie1A} = leveled_bookie:book_start(StartOpts1A), {ok, Bookie1B} = leveled_bookie:book_start(StartOpts1B), - ObjList0 = + ObjList0 = testutil:generate_objects( - 100000, - {fixed_binary, 1}, [], + 100000, + {fixed_binary, 1}, + [], crypto:strong_rand_bytes(32), fun() -> [] end, <<"BaselineB">> ), ObjL1StartTS = testutil:convert_to_seconds(os:timestamp()), - ObjList1 = + ObjList1 = testutil:generate_objects( - 20000, - {fixed_binary, 1}, [], + 20000, + {fixed_binary, 1}, + [], crypto:strong_rand_bytes(512), fun() -> [] end, <<"B0">> @@ -827,10 +914,11 @@ fetchclocks_modifiedbetween(_Config) -> timer:sleep(1000), _ObjL2StartTS = testutil:convert_to_seconds(os:timestamp()), - ObjList2 = + ObjList2 = testutil:generate_objects( - 15000, - {fixed_binary, 20001}, [], + 15000, + {fixed_binary, 20001}, + [], crypto:strong_rand_bytes(512), fun() -> [] end, <<"B0">> @@ -840,10 +928,11 @@ fetchclocks_modifiedbetween(_Config) -> timer:sleep(1000), ObjL3StartTS = testutil:convert_to_seconds(os:timestamp()), - ObjList3 = + ObjList3 = testutil:generate_objects( - 35000, - {fixed_binary, 35001}, [], + 35000, + {fixed_binary, 35001}, + [], crypto:strong_rand_bytes(512), fun() -> [] end, <<"B0">> @@ -853,10 +942,11 @@ fetchclocks_modifiedbetween(_Config) -> timer:sleep(1000), ObjL4StartTS = testutil:convert_to_seconds(os:timestamp()), - ObjList4 = + ObjList4 = testutil:generate_objects( - 30000, - {fixed_binary, 70001}, [], + 30000, + {fixed_binary, 70001}, + [], crypto:strong_rand_bytes(512), fun() -> [] end, <<"B0">> @@ -866,10 +956,11 @@ fetchclocks_modifiedbetween(_Config) -> timer:sleep(1000), ObjL5StartTS = testutil:convert_to_seconds(os:timestamp()), - ObjList5 = + ObjList5 = testutil:generate_objects( - 8000, - {fixed_binary, 1}, [], + 8000, + {fixed_binary, 1}, + [], crypto:strong_rand_bytes(512), fun() -> [] end, <<"B1">> @@ -879,10 +970,11 @@ fetchclocks_modifiedbetween(_Config) -> timer:sleep(1000), ObjL6StartTS = testutil:convert_to_seconds(os:timestamp()), - ObjList6 = + ObjList6 = testutil:generate_objects( - 7000, - {fixed_binary, 1}, [], + 7000, + {fixed_binary, 1}, + [], crypto:strong_rand_bytes(512), fun() -> [] end, <<"B2">> @@ -904,30 +996,30 @@ fetchclocks_modifiedbetween(_Config) -> testutil:riakload(Bookie1B, ObjList1), testutil:riakload(Bookie1B, ObjList6), testutil:riakload(Bookie1B, ObjList3), - - RevertFixedBinKey = + + RevertFixedBinKey = fun(FBK) -> <<$K, $e, $y, KeyNumber:64/integer>> = FBK, KeyNumber end, - StoreFoldFun = + StoreFoldFun = fun(_B, K, _V, {_LK, AccC}) -> {RevertFixedBinKey(K), AccC + 1} end, - KeyRangeFun = + KeyRangeFun = fun(StartNumber, EndNumber) -> - {range, - <<"B0">>, - {testutil:fixed_bin_key(StartNumber), - testutil:fixed_bin_key(EndNumber)}} + {range, <<"B0">>, { + testutil:fixed_bin_key(StartNumber), + testutil:fixed_bin_key(EndNumber) + }} end, - + % Count with max object count FoldRangesFun = fun(FoldTarget, ModRange, EndNumber, MaxCount) -> fun(_I, {LKN, KC}) -> - {async, Runner} = + {async, Runner} = leveled_bookie:book_headfold( FoldTarget, ?RIAK_TAG, @@ -944,39 +1036,62 @@ fetchclocks_modifiedbetween(_Config) -> end end, - R1A = lists:foldl(FoldRangesFun(Bookie1A, false, 50000, 13000), - {0, 0}, lists:seq(1, 4)), + R1A = lists:foldl( + FoldRangesFun(Bookie1A, false, 50000, 13000), + {0, 0}, + lists:seq(1, 4) + ), io:format("R1A ~w~n", [R1A]), true = {50000, 50000} == R1A, - - R1B = lists:foldl(FoldRangesFun(Bookie1B, false, 50000, 13000), - {0, 0}, lists:seq(1, 3)), + + R1B = lists:foldl( + FoldRangesFun(Bookie1B, false, 50000, 13000), + {0, 0}, + lists:seq(1, 3) + ), io:format("R1B ~w~n", [R1B]), true = {50000, 35000} == R1B, - R2A = lists:foldl(FoldRangesFun(Bookie1A, - {ObjL3StartTS, ObjL3EndTS}, - 60000, - 13000), - {10000, 0}, lists:seq(1, 2)), + R2A = lists:foldl( + FoldRangesFun( + Bookie1A, + {ObjL3StartTS, ObjL3EndTS}, + 60000, + 13000 + ), + {10000, 0}, + lists:seq(1, 2) + ), io:format("R2A ~w~n", [R2A]), true = {60000, 25000} == R2A, - R2A_SR = lists:foldl(FoldRangesFun(Bookie1A, - {ObjL3StartTS, ObjL3EndTS}, - 60000, - 13000), - {10000, 0}, lists:seq(1, 1)), % Only single rotation + R2A_SR = lists:foldl( + FoldRangesFun( + Bookie1A, + {ObjL3StartTS, ObjL3EndTS}, + 60000, + 13000 + ), + % Only single rotation + {10000, 0}, + lists:seq(1, 1) + ), io:format("R2A_SingleRotation ~w~n", [R2A_SR]), - true = {48000, 13000} == R2A_SR, % Hit at max results - R2B = lists:foldl(FoldRangesFun(Bookie1B, - {ObjL3StartTS, ObjL3EndTS}, - 60000, - 13000), - {10000, 0}, lists:seq(1, 2)), + % Hit at max results + true = {48000, 13000} == R2A_SR, + R2B = lists:foldl( + FoldRangesFun( + Bookie1B, + {ObjL3StartTS, ObjL3EndTS}, + 60000, + 13000 + ), + {10000, 0}, + lists:seq(1, 2) + ), io:format("R2B ~w~n", [R1B]), true = {60000, 25000} == R2B, - CrudeStoreFoldFun = + CrudeStoreFoldFun = fun(LowLMD, HighLMD) -> fun(_B, K, V, {LK, AccC}) -> % Value is proxy_object? Can we get the metadata and @@ -994,20 +1109,24 @@ fetchclocks_modifiedbetween(_Config) -> end end, - io:format("Comparing queries for Obj1 TS range ~w ~w~n", - [ObjL1StartTS, ObjL1EndTS]), + io:format( + "Comparing queries for Obj1 TS range ~w ~w~n", + [ObjL1StartTS, ObjL1EndTS] + ), PlusFilterTimes = lists:map( - fun(_I) -> + fun(_I) -> time_filtered_query( - FoldRangesFun, Bookie1A, ObjL1StartTS, ObjL1EndTS) + FoldRangesFun, Bookie1A, ObjL1StartTS, ObjL1EndTS + ) end, - lists:seq(1, 4)), + lists:seq(1, 4) + ), PlusFilterTime = lists:sum(PlusFilterTimes) div 4, - + NoFilterStart = os:timestamp(), - {async, R3A_NoFilterRunner} = + {async, R3A_NoFilterRunner} = leveled_bookie:book_headfold( Bookie1A, ?RIAK_TAG, @@ -1021,13 +1140,15 @@ fetchclocks_modifiedbetween(_Config) -> NoFilterTime = timer:now_diff(os:timestamp(), NoFilterStart) div 1000, io:format("R3A_NoFilter ~w~n", [R3A_NoFilter]), true = {20000, 20000} == R3A_NoFilter, - io:format("Filtered query ~w ms and unfiltered query ~w ms~n", - [PlusFilterTime, NoFilterTime]), + io:format( + "Filtered query ~w ms and unfiltered query ~w ms~n", + [PlusFilterTime, NoFilterTime] + ), true = NoFilterTime > PlusFilterTime, SimpleCountFun = fun(BucketList) -> - fun(B, _K, _V, AccC) -> + fun(B, _K, _V, AccC) -> case lists:member(B, BucketList) of true -> AccC + 1; false -> AccC @@ -1035,7 +1156,7 @@ fetchclocks_modifiedbetween(_Config) -> end end, - {async, R4A_MultiBucketRunner} = + {async, R4A_MultiBucketRunner} = leveled_bookie:book_headfold( Bookie1A, ?RIAK_TAG, @@ -1045,21 +1166,21 @@ fetchclocks_modifiedbetween(_Config) -> true, false, {ObjL4StartTS, ObjL6EndTS}, - % Range includes ObjjL5 LMDs, - % but these ar enot in bucket list + % Range includes ObjjL5 LMDs, + % but these ar enot in bucket list false ), R4A_MultiBucket = R4A_MultiBucketRunner(), io:format("R4A_MultiBucket ~w ~n", [R4A_MultiBucket]), true = R4A_MultiBucket == 37000, - {async, R5A_MultiBucketRunner} = + {async, R5A_MultiBucketRunner} = leveled_bookie:book_headfold( Bookie1A, ?RIAK_TAG, {bucket_list, [<<"B2">>, <<"B0">>]}, - % Reverse the buckets in the bucket - % list + % Reverse the buckets in the bucket + % list {SimpleCountFun([<<"B0">>, <<"B2">>]), 0}, false, true, @@ -1071,8 +1192,7 @@ fetchclocks_modifiedbetween(_Config) -> io:format("R5A_MultiBucket ~w ~n", [R5A_MultiBucket]), true = R5A_MultiBucket == 37000, - - {async, R5B_MultiBucketRunner} = + {async, R5B_MultiBucketRunner} = leveled_bookie:book_headfold( Bookie1B, ?RIAK_TAG, @@ -1095,13 +1215,14 @@ fetchclocks_modifiedbetween(_Config) -> Bookie1A, {ObjL1StartTS, ObjL1EndTS}, 100000, 100000 ), {0, 0}, - lists:seq(1, 1)), + lists:seq(1, 1) + ), io:format("R6A_PlusFilter ~w~n", [R6A_PlusFilter]), true = 19000 == element(2, R6A_PlusFilter), % Hit limit of max count before trying next bucket, with and without a % timestamp filter - {async, R7A_MultiBucketRunner} = + {async, R7A_MultiBucketRunner} = leveled_bookie:book_headfold( Bookie1A, ?RIAK_TAG, @@ -1117,7 +1238,7 @@ fetchclocks_modifiedbetween(_Config) -> io:format("R7A_MultiBucket ~w ~n", [R7A_MultiBucket]), true = R7A_MultiBucket == {0, 5000}, - {async, R8A_MultiBucketRunner} = + {async, R8A_MultiBucketRunner} = leveled_bookie:book_headfold( Bookie1A, ?RIAK_TAG, @@ -1137,44 +1258,68 @@ fetchclocks_modifiedbetween(_Config) -> io:format("Double query to generate index cache and use~n"), {ok, Bookie1BS} = leveled_bookie:book_start(StartOpts1B), - + TooLate = testutil:convert_to_seconds(os:timestamp()), - lmdrange_tester(Bookie1BS, SimpleCountFun, - ObjL4StartTS, ObjL6StartTS, ObjL6EndTS, TooLate), + lmdrange_tester( + Bookie1BS, + SimpleCountFun, + ObjL4StartTS, + ObjL6StartTS, + ObjL6EndTS, + TooLate + ), io:format("Push tested keys down levels with new objects~n"), - ObjList7 = + ObjList7 = testutil:generate_objects( - 200000, - {fixed_binary, 1}, [], + 200000, + {fixed_binary, 1}, + [], crypto:strong_rand_bytes(32), fun() -> [] end, <<"B1.9">> ), testutil:riakload(Bookie1BS, ObjList7), - lmdrange_tester(Bookie1BS, SimpleCountFun, - ObjL4StartTS, ObjL6StartTS, ObjL6EndTS, TooLate), + lmdrange_tester( + Bookie1BS, + SimpleCountFun, + ObjL4StartTS, + ObjL6StartTS, + ObjL6EndTS, + TooLate + ), ok = leveled_bookie:book_destroy(Bookie1A), ok = leveled_bookie:book_destroy(Bookie1BS). time_filtered_query(FoldRangesFun, Bookie, ObjL1StartTS, ObjL1EndTS) -> PlusFilterStart = os:timestamp(), - R3A_PlusFilter = lists:foldl(FoldRangesFun(Bookie, - {ObjL1StartTS, ObjL1EndTS}, - 100000, - 100000), - {0, 0}, lists:seq(1, 1)), + R3A_PlusFilter = lists:foldl( + FoldRangesFun( + Bookie, + {ObjL1StartTS, ObjL1EndTS}, + 100000, + 100000 + ), + {0, 0}, + lists:seq(1, 1) + ), PlusFilterTime = timer:now_diff(os:timestamp(), PlusFilterStart) div 1000, io:format("R3A_PlusFilter ~w in ~w~n", [R3A_PlusFilter, PlusFilterTime]), true = {20000, 20000} == R3A_PlusFilter, PlusFilterTime. -lmdrange_tester(Bookie1BS, SimpleCountFun, - ObjL4StartTS, ObjL6StartTS, ObjL6EndTS, TooLate) -> - {async, R5B_MultiBucketRunner0} = +lmdrange_tester( + Bookie1BS, + SimpleCountFun, + ObjL4StartTS, + ObjL6StartTS, + ObjL6EndTS, + TooLate +) -> + {async, R5B_MultiBucketRunner0} = leveled_bookie:book_headfold( Bookie1BS, ?RIAK_TAG, @@ -1189,7 +1334,7 @@ lmdrange_tester(Bookie1BS, SimpleCountFun, R5B_MultiBucket0 = R5B_MultiBucketRunner0(), io:format("R5B_MultiBucket0 ~w ~n", [R5B_MultiBucket0]), true = R5B_MultiBucket0 == 37000, - {async, R5B_MultiBucketRunner1} = + {async, R5B_MultiBucketRunner1} = leveled_bookie:book_headfold( Bookie1BS, ?RIAK_TAG, @@ -1204,16 +1349,16 @@ lmdrange_tester(Bookie1BS, SimpleCountFun, R5B_MultiBucket1 = R5B_MultiBucketRunner1(), io:format("R5B_MultiBucket1 ~w ~n", [R5B_MultiBucket1]), true = R5B_MultiBucket1 == 37000, - SimpleMinMaxFun = + SimpleMinMaxFun = fun(B, K, _V, Acc) -> case lists:keyfind(B, 1, Acc) of {B, MinK, MaxK} -> - lists:ukeysort(1, [{B, min(K, MinK), max(K, MaxK)}|Acc]); + lists:ukeysort(1, [{B, min(K, MinK), max(K, MaxK)} | Acc]); false -> - lists:ukeysort(1, [{B, K, K}|Acc]) + lists:ukeysort(1, [{B, K, K} | Acc]) end end, - {async, R5B_MultiBucketRunner2} = + {async, R5B_MultiBucketRunner2} = leveled_bookie:book_headfold( Bookie1BS, ?RIAK_TAG, @@ -1230,7 +1375,7 @@ lmdrange_tester(Bookie1BS, SimpleCountFun, io:format("Found Min and Max Keys~n"), io:format("B ~s MinK ~s MaxK ~s~n", [<<"B0">>, MinB0K, MaxB0K]), io:format("B ~s MinK ~s MaxK ~s~n", [<<"B2">>, MinB2K, MaxB2K]), - {async, R5B_MultiBucketRunner3a} = + {async, R5B_MultiBucketRunner3a} = leveled_bookie:book_headfold( Bookie1BS, ?RIAK_TAG, @@ -1242,7 +1387,7 @@ lmdrange_tester(Bookie1BS, SimpleCountFun, {ObjL4StartTS, ObjL6EndTS}, false ), - {async, R5B_MultiBucketRunner3b} = + {async, R5B_MultiBucketRunner3b} = leveled_bookie:book_headfold( Bookie1BS, ?RIAK_TAG, @@ -1261,7 +1406,7 @@ lmdrange_tester(Bookie1BS, SimpleCountFun, true = (R5B_MultiBucket3a + R5B_MultiBucket3b) == 37000, io:format("Query outside of time range~n"), - {async, R5B_MultiBucketRunner4} = + {async, R5B_MultiBucketRunner4} = leveled_bookie:book_headfold( Bookie1BS, ?RIAK_TAG, @@ -1278,7 +1423,7 @@ lmdrange_tester(Bookie1BS, SimpleCountFun, true = R5B_MultiBucket4 == 0, io:format("Query with one foot inside of time range~n"), - {async, R5B_MultiBucketRunner5} = + {async, R5B_MultiBucketRunner5} = leveled_bookie:book_headfold( Bookie1BS, ?RIAK_TAG, @@ -1288,12 +1433,12 @@ lmdrange_tester(Bookie1BS, SimpleCountFun, true, false, {ObjL6StartTS, TooLate}, - false), + false + ), R5B_MultiBucket5 = R5B_MultiBucketRunner5(), io:format("R5B_MultiBucket5 ~w ~n", [R5B_MultiBucket5]), true = R5B_MultiBucket5 == 7000. - crossbucket_aae(_Config) -> % Test requires multiple different databases, so want to mount them all % on individual file paths @@ -1301,9 +1446,11 @@ crossbucket_aae(_Config) -> RootPathB = testutil:reset_filestructure("testB"), % Start the first database, load a test object, close it, start it again - StartOpts1 = [{root_path, RootPathA}, - {max_pencillercachesize, 16000}, - {sync_strategy, riak_sync}], + StartOpts1 = [ + {root_path, RootPathA}, + {max_pencillercachesize, 16000}, + {sync_strategy, riak_sync} + ], {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), {B1, K1, V1, S1, MD} = { @@ -1317,10 +1464,12 @@ crossbucket_aae(_Config) -> ok = testutil:book_riakput(Bookie1, TestObject, TestSpec), testutil:check_forobject(Bookie1, TestObject), ok = leveled_bookie:book_close(Bookie1), - StartOpts2 = [{root_path, RootPathA}, - {max_journalsize, 500000000}, - {max_pencillercachesize, 32000}, - {sync_strategy, testutil:sync_strategy()}], + StartOpts2 = [ + {root_path, RootPathA}, + {max_journalsize, 500000000}, + {max_pencillercachesize, 32000}, + {sync_strategy, testutil:sync_strategy()} + ], {ok, Bookie2} = leveled_bookie:book_start(StartOpts2), testutil:check_forobject(Bookie2, TestObject), @@ -1328,7 +1477,7 @@ crossbucket_aae(_Config) -> % the first store (outputting the generated objects as a list of lists) % to be used elsewhere - GenList = + GenList = [{binary, 2}, {binary, 40002}, {binary, 80002}, {binary, 120002}], CLs = testutil:load_objects( @@ -1350,10 +1499,12 @@ crossbucket_aae(_Config) -> % % This is now the comparison part of the test - StartOpts3 = [{root_path, RootPathB}, - {max_journalsize, 200000000}, - {max_pencillercachesize, 16000}, - {sync_strategy, testutil:sync_strategy()}], + StartOpts3 = [ + {root_path, RootPathB}, + {max_journalsize, 200000000}, + {max_pencillercachesize, 16000}, + {sync_strategy, testutil:sync_strategy()} + ], {ok, Bookie3} = leveled_bookie:book_start(StartOpts3), lists:foreach(fun(ObjL) -> testutil:riakload(Bookie3, ObjL) end, CLs), test_singledelta_stores(Bookie2, Bookie3, small, {B1, K1}), @@ -1373,13 +1524,12 @@ crossbucket_aae(_Config) -> ok = leveled_bookie:book_close(Bookie2A), ok = leveled_bookie:book_close(Bookie3). - test_segfilter_query(Bookie, CLs) -> % This part of the test tests an issue with accelerating folds by segment - % list, when there is more than one key with a matching segment in the + % list, when there is more than one key with a matching segment in the % slot. Previously this was not handled correctly - and this test part % of the test detects this, by finding slices of keys which are probably - % in the same slot + % in the same slot SW0 = os:timestamp(), SliceSize = 20, @@ -1388,13 +1538,13 @@ test_segfilter_query(Bookie, CLs) -> CL3 = lists:sublist(lists:nth(3, CLs), 100, SliceSize), CL4 = lists:sublist(lists:nth(4, CLs), 100, SliceSize), - SegMapFun = + SegMapFun = fun({_RN, RiakObject, _Spc}) -> B = testutil:get_bucket(RiakObject), K = testutil:get_key(RiakObject), leveled_tictac:keyto_segment32(<>) end, - BKMapFun = + BKMapFun = fun({_RN, RiakObject, _Spc}) -> B = testutil:get_bucket(RiakObject), K = testutil:get_key(RiakObject), @@ -1413,74 +1563,93 @@ test_segfilter_query(Bookie, CLs) -> HeadSegmentFolderGen = fun(SegL, BKL) -> - {foldheads_allkeys, - ?RIAK_TAG, - {fun(B, K, _PO, Acc) -> - case lists:member({B, K}, BKL) of + {foldheads_allkeys, ?RIAK_TAG, + { + fun(B, K, _PO, Acc) -> + case lists:member({B, K}, BKL) of true -> Acc + 1; false -> Acc end - end, 0}, + end, + 0 + }, false, true, SegL, false, false} end, {async, SL1Folder} = - leveled_bookie:book_returnfolder(Bookie, - HeadSegmentFolderGen(SL1, BK1)), + leveled_bookie:book_returnfolder( + Bookie, + HeadSegmentFolderGen(SL1, BK1) + ), {async, SL2Folder} = - leveled_bookie:book_returnfolder(Bookie, - HeadSegmentFolderGen(SL2, BK2)), + leveled_bookie:book_returnfolder( + Bookie, + HeadSegmentFolderGen(SL2, BK2) + ), {async, SL3Folder} = - leveled_bookie:book_returnfolder(Bookie, - HeadSegmentFolderGen(SL3, BK3)), + leveled_bookie:book_returnfolder( + Bookie, + HeadSegmentFolderGen(SL3, BK3) + ), {async, SL4Folder} = - leveled_bookie:book_returnfolder(Bookie, - HeadSegmentFolderGen(SL4, BK4)), + leveled_bookie:book_returnfolder( + Bookie, + HeadSegmentFolderGen(SL4, BK4) + ), Results = [SL1Folder(), SL2Folder(), SL3Folder(), SL4Folder()], - io:format("SegList folders returned results of ~w " ++ - "for SliceSize ~w in ~w ms~n", - [Results, SliceSize, - timer:now_diff(os:timestamp(), SW0)/1000]), + io:format( + "SegList folders returned results of ~w " ++ + "for SliceSize ~w in ~w ms~n", + [ + Results, + SliceSize, + timer:now_diff(os:timestamp(), SW0) / 1000 + ] + ), lists:foreach(fun(R) -> true = R == SliceSize end, Results). - test_singledelta_stores(BookA, BookB, TreeSize, DeltaKey) -> io:format("Test for single delta with tree size ~w~n", [TreeSize]), % Now run a tictac query against both stores to see the extent to which % state between stores is consistent - TicTacFolder = - {foldheads_allkeys, - ?RIAK_TAG, - {fun head_tictac_foldfun/4, - {0, leveled_tictac:new_tree(test, TreeSize)}}, + TicTacFolder = + {foldheads_allkeys, ?RIAK_TAG, + { + fun head_tictac_foldfun/4, + {0, leveled_tictac:new_tree(test, TreeSize)} + }, false, true, false, false, false}, % tictac query by bucket (should be same result as all stores) - TicTacByBucketFolder = - {foldheads_bybucket, - ?RIAK_TAG, <<"Bucket">>, - all, - {fun head_tictac_foldfun/4, - {0, leveled_tictac:new_tree(test, TreeSize)}}, - false, false, false, false, false}, - - DLs = check_tictacfold(BookA, BookB, - TicTacFolder, - DeltaKey, - TreeSize), - DLs = check_tictacfold(BookA, BookB, - TicTacByBucketFolder, - DeltaKey, - TreeSize), - - HeadSegmentFolder = - {foldheads_allkeys, - ?RIAK_TAG, - {get_segment_folder(DLs, TreeSize), []}, + TicTacByBucketFolder = + {foldheads_bybucket, ?RIAK_TAG, <<"Bucket">>, all, + { + fun head_tictac_foldfun/4, + {0, leveled_tictac:new_tree(test, TreeSize)} + }, + false, false, false, false, false}, + + DLs = check_tictacfold( + BookA, + BookB, + TicTacFolder, + DeltaKey, + TreeSize + ), + DLs = check_tictacfold( + BookA, + BookB, + TicTacByBucketFolder, + DeltaKey, + TreeSize + ), + + HeadSegmentFolder = + {foldheads_allkeys, ?RIAK_TAG, {get_segment_folder(DLs, TreeSize), []}, false, true, false, false, false}, - + SW_SL0 = os:timestamp(), {async, BookASegFolder} = leveled_bookie:book_returnfolder(BookA, HeadSegmentFolder), @@ -1488,23 +1657,25 @@ test_singledelta_stores(BookA, BookB, TreeSize, DeltaKey) -> leveled_bookie:book_returnfolder(BookB, HeadSegmentFolder), BookASegList = BookASegFolder(), BookBSegList = BookBSegFolder(), - Time_SL0 = timer:now_diff(os:timestamp(), SW_SL0)/1000, - io:format("Two unfiltered segment list folds took ~w milliseconds ~n", - [Time_SL0]), - io:format("Segment lists found of lengths ~w ~w~n", - [length(BookASegList), length(BookBSegList)]), + Time_SL0 = timer:now_diff(os:timestamp(), SW_SL0) / 1000, + io:format( + "Two unfiltered segment list folds took ~w milliseconds ~n", + [Time_SL0] + ), + io:format( + "Segment lists found of lengths ~w ~w~n", + [length(BookASegList), length(BookBSegList)] + ), Delta = lists:subtract(BookASegList, BookBSegList), true = length(Delta) == 1, SegFilterList = leveled_tictac:generate_segmentfilter_list(DLs, TreeSize), - - SuperHeadSegmentFolder = - {foldheads_allkeys, - ?RIAK_TAG, - {get_segment_folder(DLs, TreeSize), []}, + + SuperHeadSegmentFolder = + {foldheads_allkeys, ?RIAK_TAG, {get_segment_folder(DLs, TreeSize), []}, false, true, SegFilterList, false, false}, - + SW_SL1 = os:timestamp(), {async, BookASegFolder1} = leveled_bookie:book_returnfolder(BookA, SuperHeadSegmentFolder), @@ -1512,18 +1683,20 @@ test_singledelta_stores(BookA, BookB, TreeSize, DeltaKey) -> leveled_bookie:book_returnfolder(BookB, SuperHeadSegmentFolder), BookASegList1 = BookASegFolder1(), BookBSegList1 = BookBSegFolder1(), - Time_SL1 = timer:now_diff(os:timestamp(), SW_SL1)/1000, - io:format("Two filtered segment list folds took ~w milliseconds ~n", - [Time_SL1]), - io:format("Segment lists found of lengths ~w ~w~n", - [length(BookASegList1), length(BookBSegList1)]), - - SuperHeadSegmentFolderCP = - {foldheads_allkeys, - ?RIAK_TAG, - {get_segment_folder(DLs, TreeSize), []}, + Time_SL1 = timer:now_diff(os:timestamp(), SW_SL1) / 1000, + io:format( + "Two filtered segment list folds took ~w milliseconds ~n", + [Time_SL1] + ), + io:format( + "Segment lists found of lengths ~w ~w~n", + [length(BookASegList1), length(BookBSegList1)] + ), + + SuperHeadSegmentFolderCP = + {foldheads_allkeys, ?RIAK_TAG, {get_segment_folder(DLs, TreeSize), []}, true, true, SegFilterList, false, false}, - + SW_SL1CP = os:timestamp(), {async, BookASegFolder1CP} = leveled_bookie:book_returnfolder(BookA, SuperHeadSegmentFolderCP), @@ -1531,23 +1704,24 @@ test_singledelta_stores(BookA, BookB, TreeSize, DeltaKey) -> leveled_bookie:book_returnfolder(BookB, SuperHeadSegmentFolderCP), BookASegList1CP = BookASegFolder1CP(), BookBSegList1CP = BookBSegFolder1CP(), - Time_SL1CP = timer:now_diff(os:timestamp(), SW_SL1CP)/1000, - io:format("Two filtered segment list folds " ++ - "with presence check took ~w milliseconds ~n", - [Time_SL1CP]), - io:format("Segment lists found of lengths ~w ~w~n", - [length(BookASegList1CP), length(BookBSegList1CP)]), - + Time_SL1CP = timer:now_diff(os:timestamp(), SW_SL1CP) / 1000, + io:format( + "Two filtered segment list folds " ++ + "with presence check took ~w milliseconds ~n", + [Time_SL1CP] + ), + io:format( + "Segment lists found of lengths ~w ~w~n", + [length(BookASegList1CP), length(BookBSegList1CP)] + ), FalseMatchFilter = DLs ++ [1, 100, 101, 1000, 1001], - SegFilterListF = + SegFilterListF = leveled_tictac:generate_segmentfilter_list(FalseMatchFilter, TreeSize), - SuperHeadSegmentFolderF = - {foldheads_allkeys, - ?RIAK_TAG, - {get_segment_folder(DLs, TreeSize), []}, + SuperHeadSegmentFolderF = + {foldheads_allkeys, ?RIAK_TAG, {get_segment_folder(DLs, TreeSize), []}, false, true, SegFilterListF, false, false}, - + SW_SL1F = os:timestamp(), {async, BookASegFolder1F} = leveled_bookie:book_returnfolder(BookA, SuperHeadSegmentFolderF), @@ -1555,18 +1729,21 @@ test_singledelta_stores(BookA, BookB, TreeSize, DeltaKey) -> leveled_bookie:book_returnfolder(BookB, SuperHeadSegmentFolderF), BookASegList1F = BookASegFolder1F(), BookBSegList1F = BookBSegFolder1F(), - Time_SL1F = timer:now_diff(os:timestamp(), SW_SL1F)/1000, - io:format("Two filtered segment list folds " ++ - " with false positives took ~w milliseconds ~n", - [Time_SL1F]), - io:format("Segment lists found of lengths ~w ~w~n", - [length(BookASegList1F), length(BookBSegList1F)]), + Time_SL1F = timer:now_diff(os:timestamp(), SW_SL1F) / 1000, + io:format( + "Two filtered segment list folds " ++ + " with false positives took ~w milliseconds ~n", + [Time_SL1F] + ), + io:format( + "Segment lists found of lengths ~w ~w~n", + [length(BookASegList1F), length(BookBSegList1F)] + ), Delta1F = lists:subtract(BookASegList1F, BookBSegList1F), io:format("Delta found of ~w~n", [Delta1F]), true = length(Delta1F) == 1. - get_segment_folder(SegmentList, TreeSize) -> fun(B, K, PO, KeysAndClocksAcc) -> SegmentH = leveled_tictac:keyto_segment32(<>), @@ -1574,21 +1751,19 @@ get_segment_folder(SegmentList, TreeSize) -> case lists:member(Segment, SegmentList) of true -> {VC, _Sz, _SC} = summary_from_binary(PO), - [{B, K, VC}|KeysAndClocksAcc]; + [{B, K, VC} | KeysAndClocksAcc]; false -> KeysAndClocksAcc - end + end end. head_tictac_foldfun(B, K, PO, {Count, TreeAcc}) -> - ExtractFun = + ExtractFun = fun({BBin, KBin}, Obj) -> {VC, _Sz, _SC} = summary_from_binary(Obj), {<>, lists:sort(VC)} end, - {Count + 1, - leveled_tictac:add_kv(TreeAcc, {B, K}, PO, ExtractFun)}. - + {Count + 1, leveled_tictac:add_kv(TreeAcc, {B, K}, PO, ExtractFun)}. check_tictacfold(BookA, BookB, HeadTicTacFolder, DeltaKey, TreeSize) -> SW_TT0 = os:timestamp(), @@ -1598,15 +1773,19 @@ check_tictacfold(BookA, BookB, HeadTicTacFolder, DeltaKey, TreeSize) -> leveled_bookie:book_returnfolder(BookB, HeadTicTacFolder), {CountA, BookATree} = BookATreeFolder(), {CountB, BookBTree} = BookBTreeFolder(), - Time_TT0 = timer:now_diff(os:timestamp(), SW_TT0)/1000, + Time_TT0 = timer:now_diff(os:timestamp(), SW_TT0) / 1000, io:format("Two tree folds took ~w milliseconds ~n", [Time_TT0]), - io:format("Fold over keys revealed counts of ~w and ~w~n", - [CountA, CountB]), + io:format( + "Fold over keys revealed counts of ~w and ~w~n", + [CountA, CountB] + ), DLs = leveled_tictac:find_dirtyleaves(BookATree, BookBTree), - io:format("Found dirty leaves with Riak fold_heads of ~w~n", - [length(DLs)]), + io:format( + "Found dirty leaves with Riak fold_heads of ~w~n", + [length(DLs)] + ), case DeltaKey of {B1, K1} -> % There should be a single delta between the stores @@ -1622,23 +1801,18 @@ check_tictacfold(BookA, BookB, HeadTicTacFolder, DeltaKey, TreeSize) -> end, DLs. - -summary_from_binary(<<131, _Rest/binary>>=ObjBin) -> +summary_from_binary(<<131, _Rest/binary>> = ObjBin) -> {proxy_object, HeadBin, ObjSize, _Fetcher} = binary_to_term(ObjBin), summary_from_binary(HeadBin, ObjSize); summary_from_binary(ObjBin) when is_binary(ObjBin) -> summary_from_binary(ObjBin, byte_size(ObjBin)). summary_from_binary(ObjBin, ObjSize) -> - <> = ObjBin, {lists:usort(binary_to_term(VclockBin)), ObjSize, SibCount}. - - handoff(_Config) -> % Test requires multiple different databases, so want to mount them all % on individual file paths @@ -1648,9 +1822,11 @@ handoff(_Config) -> RootPathD = testutil:reset_filestructure("testD"), % Start the first database, load a test object, close it, start it again - StartOpts1 = [{root_path, RootPathA}, - {max_pencillercachesize, 16000}, - {sync_strategy, sync}], + StartOpts1 = [ + {root_path, RootPathA}, + {max_pencillercachesize, 16000}, + {sync_strategy, sync} + ], {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), % Add some none Riak objects in - which should be ignored in folds. @@ -1659,9 +1835,9 @@ handoff(_Config) -> % the first store (outputting the generated objects as a list of lists) % to be used elsewhere - GenList = + GenList = [binary_uuid, binary_uuid, binary_uuid, binary_uuid], - [CL0, CL1, CL2, CL3] = + [CL0, CL1, CL2, CL3] = testutil:load_objects( 40000, GenList, @@ -1670,7 +1846,7 @@ handoff(_Config) -> fun testutil:generate_smallobjects/2, 40000 ), - + % Update an delete some objects testutil:update_some_objects(Bookie1, CL0, 1000), testutil:update_some_objects(Bookie1, CL1, 20000), @@ -1682,32 +1858,38 @@ handoff(_Config) -> testutil:wait_for_compaction(Bookie1), % Start two new empty stores - StartOpts2 = [{root_path, RootPathB}, - {max_pencillercachesize, 24000}, - {sync_strategy, none}], + StartOpts2 = [ + {root_path, RootPathB}, + {max_pencillercachesize, 24000}, + {sync_strategy, none} + ], {ok, Bookie2} = leveled_bookie:book_start(StartOpts2), - StartOpts3 = [{root_path, RootPathC}, - {max_pencillercachesize, 30000}, - {sync_strategy, none}], + StartOpts3 = [ + {root_path, RootPathC}, + {max_pencillercachesize, 30000}, + {sync_strategy, none} + ], {ok, Bookie3} = leveled_bookie:book_start(StartOpts3), - StartOpts4 = [{root_path, RootPathD}, - {max_pencillercachesize, 30000}, - {sync_strategy, none}], + StartOpts4 = [ + {root_path, RootPathD}, + {max_pencillercachesize, 30000}, + {sync_strategy, none} + ], {ok, Bookie4} = leveled_bookie:book_start(StartOpts4), - FoldStObjectsFun = + FoldStObjectsFun = fun(B, K, V, Acc) -> - [{B, K, erlang:phash2(V)}|Acc] + [{B, K, erlang:phash2(V)} | Acc] end, - FoldObjectsFun = + FoldObjectsFun = fun(Book) -> fun(B, K, Obj, ok) -> leveled_bookie:book_put(Book, B, K, Obj, [], ?RIAK_TAG), ok end end, - + % Handoff the data from the first store to the other three stores {async, Handoff2} = leveled_bookie:book_objectfold( @@ -1719,9 +1901,11 @@ handoff(_Config) -> ), SW2 = os:timestamp(), ok = Handoff2(), - Time_HO2 = timer:now_diff(os:timestamp(), SW2)/1000, - io:format("Handoff to Book2 in key_order took ~w milliseconds ~n", - [Time_HO2]), + Time_HO2 = timer:now_diff(os:timestamp(), SW2) / 1000, + io:format( + "Handoff to Book2 in key_order took ~w milliseconds ~n", + [Time_HO2] + ), SW3 = os:timestamp(), {async, Handoff3} = leveled_bookie:book_objectfold( @@ -1732,9 +1916,11 @@ handoff(_Config) -> sqn_order ), ok = Handoff3(), - Time_HO3 = timer:now_diff(os:timestamp(), SW3)/1000, - io:format("Handoff to Book3 in sqn_order took ~w milliseconds ~n", - [Time_HO3]), + Time_HO3 = timer:now_diff(os:timestamp(), SW3) / 1000, + io:format( + "Handoff to Book3 in sqn_order took ~w milliseconds ~n", + [Time_HO3] + ), SW4 = os:timestamp(), {async, Handoff4} = leveled_bookie:book_objectfold( @@ -1746,30 +1932,29 @@ handoff(_Config) -> ), ok = Handoff4(), - Time_HO4 = timer:now_diff(os:timestamp(), SW4)/1000, - io:format("Handoff to Book4 in sqn_order took ~w milliseconds ~n", - [Time_HO4]), + Time_HO4 = timer:now_diff(os:timestamp(), SW4) / 1000, + io:format( + "Handoff to Book4 in sqn_order took ~w milliseconds ~n", + [Time_HO4] + ), % Run tictac folds to confirm all stores consistent after handoff TreeSize = xxsmall, - TicTacFolder = - {foldheads_allkeys, - ?RIAK_TAG, - {fun head_tictac_foldfun/4, - {0, leveled_tictac:new_tree(test, TreeSize)}}, + TicTacFolder = + {foldheads_allkeys, ?RIAK_TAG, + { + fun head_tictac_foldfun/4, + {0, leveled_tictac:new_tree(test, TreeSize)} + }, false, true, false, false, false}, check_tictacfold(Bookie1, Bookie2, TicTacFolder, none, TreeSize), check_tictacfold(Bookie2, Bookie3, TicTacFolder, none, TreeSize), check_tictacfold(Bookie3, Bookie4, TicTacFolder, none, TreeSize), - StdFolder = - {foldobjects_allkeys, - ?STD_TAG, - FoldStObjectsFun, - true, - sqn_order}, - + StdFolder = + {foldobjects_allkeys, ?STD_TAG, FoldStObjectsFun, true, sqn_order}, + {async, StdFold1} = leveled_bookie:book_returnfolder(Bookie1, StdFolder), {async, StdFold2} = leveled_bookie:book_returnfolder(Bookie2, StdFolder), {async, StdFold3} = leveled_bookie:book_returnfolder(Bookie3, StdFolder), @@ -1795,20 +1980,22 @@ dollar_key_index(_Config) -> RootPath = testutil:reset_filestructure(), {ok, Bookie1} = leveled_bookie:book_start( - RootPath, 2000, 50000000, testutil:sync_strategy()), + RootPath, 2000, 50000000, testutil:sync_strategy() + ), ObjectGen = testutil:get_compressiblevalue_andinteger(), IndexGen = fun() -> [] end, ObjL1 = testutil:generate_objects( - 1300, {fixed_binary, 1}, [], ObjectGen, IndexGen, <<"Bucket1">>), + 1300, {fixed_binary, 1}, [], ObjectGen, IndexGen, <<"Bucket1">> + ), testutil:riakload(Bookie1, ObjL1), - FoldKeysFun = fun(_B, K, Acc) -> [ K |Acc] end, + FoldKeysFun = fun(_B, K, Acc) -> [K | Acc] end, StartKey = testutil:fixed_bin_key(123), EndKey = testutil:fixed_bin_key(779), - {async, Folder} = + {async, Folder} = leveled_bookie:book_keylist( Bookie1, ?RIAK_TAG, @@ -1822,8 +2009,8 @@ dollar_key_index(_Config) -> {ok, REMatch} = leveled_util:regex_compile("K.y"), {ok, REMiss} = leveled_util:regex_compile("key"), - - {async, FolderREMatch} = + + {async, FolderREMatch} = leveled_bookie:book_keylist( Bookie1, ?RIAK_TAG, @@ -1832,7 +2019,7 @@ dollar_key_index(_Config) -> {FoldKeysFun, []}, REMatch ), - {async, FolderREMiss} = + {async, FolderREMiss} = leveled_bookie:book_keylist( Bookie1, ?RIAK_TAG, @@ -1841,19 +2028,19 @@ dollar_key_index(_Config) -> {FoldKeysFun, []}, REMiss ), - + true = 657 == length(FolderREMatch()), true = 0 == length(FolderREMiss()), - % Delete an object - and check that it does not show in + % Delete an object - and check that it does not show in % $key index query DeleteFun = fun(KeyID) -> ok = leveled_bookie:book_put( - Bookie1, - <<"Bucket1">>, - testutil:fixed_bin_key(KeyID), + Bookie1, + <<"Bucket1">>, + testutil:fixed_bin_key(KeyID), delete, [], ?RIAK_TAG @@ -1861,8 +2048,8 @@ dollar_key_index(_Config) -> end, DelList = [200, 400, 600, 800, 1200], lists:foreach(DeleteFun, DelList), - - {async, DeleteFolder0} = + + {async, DeleteFolder0} = leveled_bookie:book_keylist( Bookie1, ?RIAK_TAG, @@ -1874,7 +2061,7 @@ dollar_key_index(_Config) -> io:format("Length of Result of folder ~w~n", [ResultsDeleteFolder0]), true = 657 - 3 == ResultsDeleteFolder0, - {async, DeleteFolder1} = + {async, DeleteFolder1} = leveled_bookie:book_keylist( Bookie1, ?RIAK_TAG, @@ -1884,7 +2071,7 @@ dollar_key_index(_Config) -> ), ResultsDeleteFolder1 = length(DeleteFolder1()), io:format("Length of Result of folder ~w~n", [ResultsDeleteFolder1]), - true = 100 -1 == ResultsDeleteFolder1, + true = 100 - 1 == ResultsDeleteFolder1, ok = leveled_bookie:book_close(Bookie1), testutil:reset_filestructure(). @@ -1901,18 +2088,20 @@ handoff_close(_Config) -> {sync_strategy, testutil:sync_strategy()} ], {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), - ObjList1 = + ObjList1 = testutil:generate_objects( - KeyCount div 10, - {fixed_binary, 1}, [], + KeyCount div 10, + {fixed_binary, 1}, + [], crypto:strong_rand_bytes(512), fun() -> [] end, Bucket ), - ObjList2 = + ObjList2 = testutil:generate_objects( - KeyCount - (KeyCount div 10), - {fixed_binary, KeyCount div 10 + 1}, [], + KeyCount - (KeyCount div 10), + {fixed_binary, KeyCount div 10 + 1}, + [], crypto:strong_rand_bytes(512), fun() -> [] end, Bucket @@ -1920,7 +2109,7 @@ handoff_close(_Config) -> testutil:riakload(Bookie1, ObjList1), FoldObjectsFun = fun(_, _, _, Acc) -> - [os:timestamp()|Acc] + [os:timestamp() | Acc] end, {async, Runner} = leveled_bookie:book_objectfold( @@ -1945,7 +2134,6 @@ handoff_close(_Config) -> leveled_bookie:book_destroy(Bookie1), testutil:reset_filestructure(). - handoff_withcompaction(_Config) -> RootPath = testutil:reset_filestructure(), KeyCount = 100000, @@ -1959,37 +2147,41 @@ handoff_withcompaction(_Config) -> {max_run_length, 4} ], {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), - ObjList1 = + ObjList1 = testutil:generate_objects( - KeyCount div 4, - {fixed_binary, 1}, [], + KeyCount div 4, + {fixed_binary, 1}, + [], crypto:strong_rand_bytes(512), fun() -> [] end, Bucket ), testutil:riakload(Bookie1, ObjList1), - ObjList2 = + ObjList2 = testutil:generate_objects( - KeyCount div 4, - {fixed_binary, (KeyCount div 4) + 1}, [], + KeyCount div 4, + {fixed_binary, (KeyCount div 4) + 1}, + [], crypto:strong_rand_bytes(512), fun() -> [] end, Bucket ), testutil:riakload(Bookie1, ObjList2), - ObjList3 = + ObjList3 = testutil:generate_objects( - KeyCount div 4, - {fixed_binary, (KeyCount div 4) * 2 + 1}, [], + KeyCount div 4, + {fixed_binary, (KeyCount div 4) * 2 + 1}, + [], crypto:strong_rand_bytes(512), fun() -> [] end, Bucket ), testutil:riakload(Bookie1, ObjList3), - ObjList4 = + ObjList4 = testutil:generate_objects( - KeyCount div 4, - {fixed_binary, (KeyCount div 4) * 3 + 1}, [], + KeyCount div 4, + {fixed_binary, (KeyCount div 4) * 3 + 1}, + [], crypto:strong_rand_bytes(512), fun() -> [] end, Bucket @@ -2004,7 +2196,7 @@ handoff_withcompaction(_Config) -> % Setup a handoff-style fold to snapshot journal FoldObjectsFun = fun(_B, _K, Obj, Acc) -> - [Obj|Acc] + [Obj | Acc] end, {async, Runner} = leveled_bookie:book_objectfold( @@ -2014,7 +2206,7 @@ handoff_withcompaction(_Config) -> true, sqn_order ), - + % Now compact the journal, twice to be sure ok = leveled_bookie:book_compactjournal(Bookie1, 30000), testutil:wait_for_compaction(Bookie1), @@ -2031,14 +2223,14 @@ handoff_withcompaction(_Config) -> FoldAndFetchFun = fun(_B, _K, PO, Acc) -> - { + { proxy_object, _HeadBin, _Size, {FetchFun, Clone, Ref} } = binary_to_term(PO), Obj = FetchFun(Clone, Ref), - [Obj|Acc] + [Obj | Acc] end, {async, HeadFolder} = leveled_bookie:book_headfold( @@ -2080,56 +2272,60 @@ handoff_withcompaction(_Config) -> false ), {TC3, HeadWithDeferFetchResults} = timer:tc(HeadFolderDefer), - io:format( - "Found ~w objects (no check_presence) in ~w ms~n", - [length(HeadWithDeferFetchResults), TC3 div 1000] - ), - + io:format( + "Found ~w objects (no check_presence) in ~w ms~n", + [length(HeadWithDeferFetchResults), TC3 div 1000] + ), + true = HeadWithFetchResults == HeadWithDeferFetchResults, - + leveled_bookie:book_destroy(Bookie1), testutil:reset_filestructure(). - %% @doc test that the riak specific $bucket indexes can be iterated %% using leveled's existing folders dollar_bucket_index(_Config) -> RootPath = testutil:reset_filestructure(), {ok, Bookie1} = leveled_bookie:book_start( - RootPath, 2000, 50000000, testutil:sync_strategy()), + RootPath, 2000, 50000000, testutil:sync_strategy() + ), ObjectGen = testutil:get_compressiblevalue_andinteger(), IndexGen = fun() -> [] end, ObjL1 = testutil:generate_objects( - 1300, binary_uuid, [], ObjectGen, IndexGen, <<"Bucket1">>), + 1300, binary_uuid, [], ObjectGen, IndexGen, <<"Bucket1">> + ), testutil:riakload(Bookie1, ObjL1), ObjL2 = testutil:generate_objects( - 1700, binary_uuid, [], ObjectGen, IndexGen, <<"Bucket2">>), + 1700, binary_uuid, [], ObjectGen, IndexGen, <<"Bucket2">> + ), testutil:riakload(Bookie1, ObjL2), ObjL3 = testutil:generate_objects( - 7000, binary_uuid, [], ObjectGen, IndexGen, <<"Bucket3">>), + 7000, binary_uuid, [], ObjectGen, IndexGen, <<"Bucket3">> + ), testutil:riakload(Bookie1, ObjL3), - FoldKeysFun = fun(B, K, Acc) -> [{B, K}|Acc] end, + FoldKeysFun = fun(B, K, Acc) -> [{B, K} | Acc] end, FoldAccT = {FoldKeysFun, []}, - {async, Folder} = + {async, Folder} = leveled_bookie:book_keylist( - Bookie1, ?RIAK_TAG, <<"Bucket2">>, FoldAccT), + Bookie1, ?RIAK_TAG, <<"Bucket2">>, FoldAccT + ), Results = Folder(), true = 1700 == length(Results), - + {<<"Bucket2">>, SampleKey} = lists:nth(100, Results), UUID = "[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12}", {ok, RESingleMatch} = leveled_util:regex_compile(SampleKey), {ok, REAllMatch} = leveled_util:regex_compile(UUID), {ok, REMiss} = leveled_util:regex_compile("no_key"), - {async, FolderREMiss} = + {async, FolderREMiss} = leveled_bookie:book_keylist( Bookie1, ?RIAK_TAG, @@ -2138,7 +2334,7 @@ dollar_bucket_index(_Config) -> {FoldKeysFun, []}, REMiss ), - {async, FolderRESingleMatch} = + {async, FolderRESingleMatch} = leveled_bookie:book_keylist( Bookie1, ?RIAK_TAG, @@ -2147,7 +2343,7 @@ dollar_bucket_index(_Config) -> {FoldKeysFun, []}, RESingleMatch ), - {async, FolderREAllMatch} = + {async, FolderREAllMatch} = leveled_bookie:book_keylist( Bookie1, ?RIAK_TAG, @@ -2156,7 +2352,7 @@ dollar_bucket_index(_Config) -> {FoldKeysFun, []}, REAllMatch ), - + true = 0 == length(FolderREMiss()), true = 1 == length(FolderRESingleMatch()), true = 1700 == length(FolderREAllMatch()), @@ -2164,7 +2360,6 @@ dollar_bucket_index(_Config) -> ok = leveled_bookie:book_close(Bookie1), testutil:reset_filestructure(). - bigobject_memorycheck(_Config) -> RootPath = testutil:reset_filestructure(), {ok, Bookie} = @@ -2176,11 +2371,11 @@ bigobject_memorycheck(_Config) -> ), Bucket = <<"B">>, IndexGen = fun() -> [] end, - ObjPutFun = + ObjPutFun = fun(I) -> Key = base64:encode(<>), Value = crypto:strong_rand_bytes(1024 * 1024), - % a big object each time! + % a big object each time! {Obj, Spc} = testutil:set_object(Bucket, Key, Value, IndexGen, []), testutil:book_riakput(Bookie, Obj, Spc) end, @@ -2204,19 +2399,18 @@ bigobject_memorycheck(_Config) -> ), {RS_TotalCDBBinMem, _RS_TotalCDBProcesses} = cdb_memory_check(), true = RS_TotalCDBBinMem < 1024 * 1024, - % No binary object references exist after startup + % No binary object references exist after startup ok = leveled_bookie:book_close(BookieR), - testutil:reset_filestructure(). - + testutil:reset_filestructure(). cdb_memory_check() -> TotalCDBProcesses = lists:filter( fun(P) -> - {dictionary, PD} = + {dictionary, PD} = process_info(P, dictionary), case lists:keyfind('$initial_call', 1, PD) of - {'$initial_call',{leveled_cdb,init,1}} -> + {'$initial_call', {leveled_cdb, init, 1}} -> true; _ -> false @@ -2234,8 +2428,10 @@ cdb_memory_check() -> 0, TotalCDBProcesses ), - io:format("Total binary memory ~w in ~w CDB processes~n", - [TotalCDBBinMem, length(TotalCDBProcesses)]), + io:format( + "Total binary memory ~w in ~w CDB processes~n", + [TotalCDBBinMem, length(TotalCDBProcesses)] + ), {TotalCDBBinMem, TotalCDBProcesses}. calc_total_binary_memory(Pid) -> @@ -2249,4 +2445,4 @@ calc_total_binary_memory(Pid) -> false -> ok end, - TBM. \ No newline at end of file + TBM. diff --git a/test/end_to_end/testutil.erl b/test/end_to_end/testutil.erl index 24923778..eea0f426 100644 --- a/test/end_to_end/testutil.erl +++ b/test/end_to_end/testutil.erl @@ -4,105 +4,108 @@ -export([init_per_suite/1, end_per_suite/1]). --export([book_riakput/3, - book_tempriakput/4, - book_riakdelete/4, - book_riakget/3, - book_riakhead/3, - riakload/2, - stdload/2, - stdload_expiring/3, - stdload_object/6, - stdload_object/9, - reset_filestructure/0, - reset_filestructure/1, - check_bucket_stats/2, - checkhead_forlist/2, - check_forlist/2, - check_forlist/3, - check_formissinglist/2, - check_forobject/2, - check_formissingobject/3, - generate_testobject/0, - generate_testobject/5, - generate_compressibleobjects/2, - generate_smallobjects/2, - generate_objects/2, - generate_objects/5, - generate_objects/6, - set_object/5, - get_bucket/1, - get_key/1, - get_value/1, - get_vclock/1, - get_lastmodified/1, - get_compressiblevalue/0, - get_compressiblevalue_andinteger/0, - get_randomindexes_generator/1, - get_aae_segment/1, - get_aae_segment/2, - name_list/0, - load_objects/5, - load_objects/6, - update_some_objects/3, - delete_some_objects/3, - put_indexed_objects/3, - put_indexed_objects/4, - put_altered_indexed_objects/3, - put_altered_indexed_objects/4, - put_altered_indexed_objects/5, - check_indexed_objects/4, - rotating_object_check/3, - rotation_withnocheck/6, - corrupt_journal/5, - restore_file/2, - restore_topending/2, - find_journals/1, - wait_for_compaction/1, - foldkeysfun/3, - foldkeysfun_returnbucket/3, - sync_strategy/0, - riak_object/4, - get_value_from_objectlistitem/1, - numbered_key/1, - fixed_bin_key/1, - convert_to_seconds/1, - compact_and_wait/1]). +-export([ + book_riakput/3, + book_tempriakput/4, + book_riakdelete/4, + book_riakget/3, + book_riakhead/3, + riakload/2, + stdload/2, + stdload_expiring/3, + stdload_object/6, + stdload_object/9, + reset_filestructure/0, + reset_filestructure/1, + check_bucket_stats/2, + checkhead_forlist/2, + check_forlist/2, + check_forlist/3, + check_formissinglist/2, + check_forobject/2, + check_formissingobject/3, + generate_testobject/0, + generate_testobject/5, + generate_compressibleobjects/2, + generate_smallobjects/2, + generate_objects/2, + generate_objects/5, + generate_objects/6, + set_object/5, + get_bucket/1, + get_key/1, + get_value/1, + get_vclock/1, + get_lastmodified/1, + get_compressiblevalue/0, + get_compressiblevalue_andinteger/0, + get_randomindexes_generator/1, + get_aae_segment/1, + get_aae_segment/2, + name_list/0, + load_objects/5, + load_objects/6, + update_some_objects/3, + delete_some_objects/3, + put_indexed_objects/3, + put_indexed_objects/4, + put_altered_indexed_objects/3, + put_altered_indexed_objects/4, + put_altered_indexed_objects/5, + check_indexed_objects/4, + rotating_object_check/3, + rotation_withnocheck/6, + corrupt_journal/5, + restore_file/2, + restore_topending/2, + find_journals/1, + wait_for_compaction/1, + foldkeysfun/3, + foldkeysfun_returnbucket/3, + sync_strategy/0, + riak_object/4, + get_value_from_objectlistitem/1, + numbered_key/1, + fixed_bin_key/1, + convert_to_seconds/1, + compact_and_wait/1 +]). -define(RETURN_TERMS, {true, undefined}). -define(SLOWOFFER_DELAY, 40). -define(V1_VERS, 1). --define(MAGIC, 53). % riak_kv -> riak_object --define(MD_VTAG, <<"X-Riak-VTag">>). --define(MD_LASTMOD, <<"X-Riak-Last-Modified">>). --define(MD_DELETED, <<"X-Riak-Deleted">>). +% riak_kv -> riak_object +-define(MAGIC, 53). +-define(MD_VTAG, <<"X-Riak-VTag">>). +-define(MD_LASTMOD, <<"X-Riak-Last-Modified">>). +-define(MD_DELETED, <<"X-Riak-Deleted">>). -define(MD_INDEX, <<"index">>). -define(EMPTY_VTAG_BIN, <<"e">>). -define(ROOT_PATH, "test"). -record(r_content, { - metadata, - value :: term() - }). + metadata, + value :: term() +}). -record(r_object, { - bucket, - key, - contents :: [#r_content{}], - vclock, - updatemetadata=dict:store(clean, true, dict:new()), - updatevalue :: term()}). - + bucket, + key, + contents :: [#r_content{}], + vclock, + updatemetadata = dict:store(clean, true, dict:new()), + updatevalue :: term() +}). init_per_suite(Config) -> LogTemplate = [time, " log_level=", level, " ", msg, "\n"], LogFormatter = { logger_formatter, - #{ - time_designator => $\s, - template => LogTemplate - } + #{ + time_designator => $\s, + template => LogTemplate + } }, {suite, SUITEName} = lists:keyfind(suite, 1, Config), FileName = "leveled_" ++ SUITEName ++ "_ct.log", @@ -114,7 +117,7 @@ init_per_suite(Config) -> max_no_files => 5 } }, - + LogFilter = fun(LogEvent, LogType) -> Meta = maps:get(meta, LogEvent), @@ -148,17 +151,19 @@ end_per_suite(_Config) -> ok. riak_object(Bucket, Key, Value, MetaData) -> - Content = #r_content{metadata=dict:from_list(MetaData), value=Value}, - Obj = #r_object{bucket=Bucket, - key=Key, - contents=[Content], - vclock=generate_vclock()}, + Content = #r_content{metadata = dict:from_list(MetaData), value = Value}, + Obj = #r_object{ + bucket = Bucket, + key = Key, + contents = [Content], + vclock = generate_vclock() + }, to_binary(v1, Obj). %% ================================================= %% From riak_object -to_binary(v1, #r_object{contents=Contents, vclock=VClock}) -> +to_binary(v1, #r_object{contents = Contents, vclock = VClock}) -> new_v1(VClock, Contents). new_v1(Vclock, Siblings) -> @@ -167,58 +172,63 @@ new_v1(Vclock, Siblings) -> SibCount = length(Siblings), SibsBin = bin_contents(Siblings), <>. + VclockBin/binary, SibCount:32/integer, SibsBin/binary>>. -bin_content(#r_content{metadata=Meta, value=Val}) -> +bin_content(#r_content{metadata = Meta, value = Val}) -> ValBin = encode_maybe_binary(Val), ValLen = byte_size(ValBin), MetaBin = meta_bin(Meta), MetaLen = byte_size(MetaBin), - <>. + <>. bin_contents(Contents) -> F = fun(Content, Acc) -> - <> - end, + <> + end, lists:foldl(F, <<>>, Contents). meta_bin(MD) -> {{VTagVal, Deleted, LastModVal}, RestBin} = - dict:fold(fun fold_meta_to_bin/3, - {{undefined, <<0>>, undefined}, <<>>}, - MD), - VTagBin = case VTagVal of - undefined -> ?EMPTY_VTAG_BIN; - _ -> list_to_binary(VTagVal) - end, + dict:fold( + fun fold_meta_to_bin/3, + {{undefined, <<0>>, undefined}, <<>>}, + MD + ), + VTagBin = + case VTagVal of + undefined -> ?EMPTY_VTAG_BIN; + _ -> list_to_binary(VTagVal) + end, VTagLen = byte_size(VTagBin), - LastModBin = case LastModVal of - undefined -> - <<0:32/integer, 0:32/integer, 0:32/integer>>; - {Mega,Secs,Micro} -> - <> - end, + LastModBin = + case LastModVal of + undefined -> + <<0:32/integer, 0:32/integer, 0:32/integer>>; + {Mega, Secs, Micro} -> + <> + end, <>. + Deleted:1/binary-unit:8, RestBin/binary>>. -fold_meta_to_bin(?MD_VTAG, Value, {{_Vt,Del,Lm},RestBin}) -> +fold_meta_to_bin(?MD_VTAG, Value, {{_Vt, Del, Lm}, RestBin}) -> {{Value, Del, Lm}, RestBin}; -fold_meta_to_bin(?MD_LASTMOD, Value, {{Vt,Del,_Lm},RestBin}) -> - {{Vt, Del, Value}, RestBin}; -fold_meta_to_bin(?MD_DELETED, true, {{Vt,_Del,Lm},RestBin})-> - {{Vt, <<1>>, Lm}, RestBin}; +fold_meta_to_bin(?MD_LASTMOD, Value, {{Vt, Del, _Lm}, RestBin}) -> + {{Vt, Del, Value}, RestBin}; +fold_meta_to_bin(?MD_DELETED, true, {{Vt, _Del, Lm}, RestBin}) -> + {{Vt, <<1>>, Lm}, RestBin}; fold_meta_to_bin(?MD_DELETED, "true", Acc) -> fold_meta_to_bin(?MD_DELETED, true, Acc); -fold_meta_to_bin(?MD_DELETED, _, {{Vt,_Del,Lm},RestBin}) -> +fold_meta_to_bin(?MD_DELETED, _, {{Vt, _Del, Lm}, RestBin}) -> {{Vt, <<0>>, Lm}, RestBin}; -fold_meta_to_bin(Key, Value, {{_Vt,_Del,_Lm}=Elems,RestBin}) -> +fold_meta_to_bin(Key, Value, {{_Vt, _Del, _Lm} = Elems, RestBin}) -> ValueBin = encode_maybe_binary(Value), ValueLen = byte_size(ValueBin), KeyBin = encode_maybe_binary(Key), KeyLen = byte_size(KeyBin), - MetaBin = <>, + MetaBin = + <>, {Elems, <>}. encode_maybe_binary(Bin) when is_binary(Bin) -> @@ -261,18 +271,19 @@ book_riakget(Pid, Bucket, Key) -> book_riakhead(Pid, Bucket, Key) -> leveled_bookie:book_head(Pid, Bucket, Key, ?RIAK_TAG). - riakload(Bookie, ObjectList) -> - lists:foreach(fun({_RN, Obj, Spc}) -> - R = book_riakput(Bookie, Obj, Spc), - case R of - ok -> ok; - pause -> timer:sleep(?SLOWOFFER_DELAY) - end - end, - ObjectList). - -stdload(Bookie, Count) -> + lists:foreach( + fun({_RN, Obj, Spc}) -> + R = book_riakput(Bookie, Obj, Spc), + case R of + ok -> ok; + pause -> timer:sleep(?SLOWOFFER_DELAY) + end + end, + ObjectList + ). + +stdload(Bookie, Count) -> stdload(Bookie, Count, []). stdload(_Bookie, 0, Acc) -> @@ -286,7 +297,7 @@ stdload(Bookie, Count, Acc) -> ok -> ok; pause -> timer:sleep(?SLOWOFFER_DELAY) end, - stdload(Bookie, Count - 1, [{B, K, erlang:phash2(V)}|Acc]). + stdload(Bookie, Count - 1, [{B, K, erlang:phash2(V)} | Acc]). stdload_expiring(Book, KeyCount, When) -> % Adds KeyCount object that will expire When seconds in the future. @@ -302,14 +313,14 @@ stdload_expiring(Book, KeyCount, TTL, V, Acc) -> K = list_to_binary(leveled_util:generate_uuid()), I = KeyCount rem 1000, stdload_object(Book, B, K, I, V, TTL), - stdload_expiring(Book, KeyCount - 1, TTL, V, [{I, B, K}|Acc]). + stdload_expiring(Book, KeyCount - 1, TTL, V, [{I, B, K} | Acc]). stdload_object(Book, B, K, I, V, TTL) -> stdload_object(Book, B, K, I, V, TTL, ?STD_TAG, true, false). stdload_object(Book, B, K, I, V, TTL, Tag, RemovePrev2i, MustFind) -> Obj = [{index, [I]}, {value, V}], - {IdxSpecs, Obj0} = + {IdxSpecs, Obj0} = case {leveled_bookie:book_get(Book, B, K, Tag), MustFind} of {{ok, PrevObj}, _} -> {index, PrevIs} = lists:keyfind(index, 1, PrevObj), @@ -317,11 +328,17 @@ stdload_object(Book, B, K, I, V, TTL, Tag, RemovePrev2i, MustFind) -> true -> MapFun = fun(OldI) -> {remove, <<"temp_int">>, OldI} end, - {[{add, <<"temp_int">>, I}|lists:map(MapFun, PrevIs)], - Obj}; + { + [ + {add, <<"temp_int">>, I} + | lists:map(MapFun, PrevIs) + ], + Obj + }; false -> - {[{add, <<"temp_int">>, I}], - [{index, [I|PrevIs]}, {value, V}]} + {[{add, <<"temp_int">>, I}], [ + {index, [I | PrevIs]}, {value, V} + ]} end; {not_found, false} -> {[{add, <<"temp_int">>, I}], Obj} @@ -331,23 +348,27 @@ stdload_object(Book, B, K, I, V, TTL, Tag, RemovePrev2i, MustFind) -> infinity -> leveled_bookie:book_put(Book, B, K, Obj0, IdxSpecs, Tag); TTL when is_integer(TTL) -> - leveled_bookie:book_tempput(Book, B, K, Obj0, - IdxSpecs, Tag, TTL) + leveled_bookie:book_tempput( + Book, + B, + K, + Obj0, + IdxSpecs, + Tag, + TTL + ) end, case R of - ok -> + ok -> ok; - pause -> + pause -> io:format("Slow offer needed~n"), timer:sleep(?SLOWOFFER_DELAY) end. - - - reset_filestructure() -> reset_filestructure(0, ?ROOT_PATH). - + reset_filestructure(Wait) when is_integer(Wait) -> reset_filestructure(Wait, ?ROOT_PATH); reset_filestructure(RootPath) when is_list(RootPath) -> @@ -368,21 +389,24 @@ reset_filestructure(Wait, RootPath) -> wait_for_compaction(Bookie) -> F = fun leveled_bookie:book_islastcompactionpending/1, - lists:foldl(fun(X, Pending) -> - case Pending of - false -> - false; - true -> - io:format( - "Loop ~w waiting for journal " - "compaction to complete~n", - [X] - ), - timer:sleep(5000), - F(Bookie) - end end, - true, - lists:seq(1, 15)). + lists:foldl( + fun(X, Pending) -> + case Pending of + false -> + false; + true -> + io:format( + "Loop ~w waiting for journal " + "compaction to complete~n", + [X] + ), + timer:sleep(5000), + F(Bookie) + end + end, + true, + lists:seq(1, 15) + ). check_bucket_stats(Bookie, Bucket) -> FoldSW1 = os:timestamp(), @@ -390,13 +414,16 @@ check_bucket_stats(Bookie, Bucket) -> {async, Folder1} = leveled_bookie:book_returnfolder(Bookie, {riakbucket_stats, Bucket}), {B1Size, B1Count} = Folder1(), - io:format("Bucket fold completed in ~w microseconds~n", - [timer:now_diff(os:timestamp(), FoldSW1)]), - io:format("Bucket ~s has size ~w and count ~w~n", - [Bucket, B1Size, B1Count]), + io:format( + "Bucket fold completed in ~w microseconds~n", + [timer:now_diff(os:timestamp(), FoldSW1)] + ), + io:format( + "Bucket ~s has size ~w and count ~w~n", + [Bucket, B1Size, B1Count] + ), {B1Size, B1Count}. - check_forlist(Bookie, ChkList) -> check_forlist(Bookie, ChkList, false). @@ -410,20 +437,25 @@ check_forlist(Bookie, ChkList, Log) -> true -> ok end, - R = book_riakget(Bookie, - Obj#r_object.bucket, - Obj#r_object.key), + R = book_riakget( + Bookie, + Obj#r_object.bucket, + Obj#r_object.key + ), true = case R of {ok, Val} -> to_binary(v1, Obj) == Val; not_found -> - io:format("Object not found for key ~s~n", - [Obj#r_object.key]), + io:format( + "Object not found for key ~s~n", + [Obj#r_object.key] + ), error end - end, - ChkList), + end, + ChkList + ), io:format( "Fetch check took ~w microseconds checking list of length ~w~n", [timer:now_diff(os:timestamp(), SW), length(ChkList)] @@ -431,51 +463,70 @@ check_forlist(Bookie, ChkList, Log) -> checkhead_forlist(Bookie, ChkList) -> SW = os:timestamp(), - lists:foreach(fun({_RN, Obj, _Spc}) -> - R = book_riakhead(Bookie, - Obj#r_object.bucket, - Obj#r_object.key), - true = case R of - {ok, _Head} -> - true; - not_found -> - io:format("Object not found for key ~s~n", - [Obj#r_object.key]), - error - end - end, - ChkList), - io:format("Head check took ~w microseconds checking list of length ~w~n", - [timer:now_diff(os:timestamp(), SW), length(ChkList)]). + lists:foreach( + fun({_RN, Obj, _Spc}) -> + R = book_riakhead( + Bookie, + Obj#r_object.bucket, + Obj#r_object.key + ), + true = + case R of + {ok, _Head} -> + true; + not_found -> + io:format( + "Object not found for key ~s~n", + [Obj#r_object.key] + ), + error + end + end, + ChkList + ), + io:format( + "Head check took ~w microseconds checking list of length ~w~n", + [timer:now_diff(os:timestamp(), SW), length(ChkList)] + ). check_formissinglist(Bookie, ChkList) -> SW = os:timestamp(), - lists:foreach(fun({_RN, Obj, _Spc}) -> - R = book_riakget(Bookie, - Obj#r_object.bucket, - Obj#r_object.key), - R = not_found end, - ChkList), - io:format("Miss check took ~w microseconds checking list of length ~w~n", - [timer:now_diff(os:timestamp(), SW), length(ChkList)]). + lists:foreach( + fun({_RN, Obj, _Spc}) -> + R = book_riakget( + Bookie, + Obj#r_object.bucket, + Obj#r_object.key + ), + R = not_found + end, + ChkList + ), + io:format( + "Miss check took ~w microseconds checking list of length ~w~n", + [timer:now_diff(os:timestamp(), SW), length(ChkList)] + ). check_forobject(Bookie, TestObject) -> TestBinary = to_binary(v1, TestObject), - {ok, TestBinary} = book_riakget(Bookie, - TestObject#r_object.bucket, - TestObject#r_object.key), - {ok, HeadBinary} = book_riakhead(Bookie, - TestObject#r_object.bucket, - TestObject#r_object.key), - {{_SibMetaBin, Vclock, _Hash, size}, _LMS} - = leveled_head:riak_extract_metadata(HeadBinary, size), + {ok, TestBinary} = book_riakget( + Bookie, + TestObject#r_object.bucket, + TestObject#r_object.key + ), + {ok, HeadBinary} = book_riakhead( + Bookie, + TestObject#r_object.bucket, + TestObject#r_object.key + ), + {{_SibMetaBin, Vclock, _Hash, size}, _LMS} = + leveled_head:riak_extract_metadata(HeadBinary, size), true = binary_to_term(Vclock) == TestObject#r_object.vclock. check_formissingobject(Bookie, Bucket, Key) -> not_found = book_riakget(Bookie, Bucket, Key), not_found = book_riakhead(Bookie, Bucket, Key). - generate_testobject() -> {B1, K1, V1, Spec1, MD} = { @@ -488,20 +539,22 @@ generate_testobject() -> generate_testobject(B1, K1, V1, Spec1, MD). generate_testobject(B, K, V, Spec, MD) -> - MD0 = [{?MD_LASTMOD, os:timestamp()}|MD], - Content = #r_content{metadata=dict:from_list(MD0), value=V}, - {#r_object{bucket=B, - key=K, - contents=[Content], - vclock=generate_vclock()}, - Spec}. - + MD0 = [{?MD_LASTMOD, os:timestamp()} | MD], + Content = #r_content{metadata = dict:from_list(MD0), value = V}, + { + #r_object{ + bucket = B, + key = K, + contents = [Content], + vclock = generate_vclock() + }, + Spec + }. generate_compressibleobjects(Count, KeyNumber) -> V = get_compressiblevalue(), generate_objects(Count, KeyNumber, [], V). - get_compressiblevalue_andinteger() -> {rand:uniform(1000), get_compressiblevalue()}. @@ -514,14 +567,23 @@ get_compressiblevalue() -> S6 = "GGGGGGGGGGGGGGG", S7 = "===============", S8 = "...............", - Selector = [{1, S1}, {2, S2}, {3, S3}, {4, S4}, - {5, S5}, {6, S6}, {7, S7}, {8, S8}], + Selector = [ + {1, S1}, + {2, S2}, + {3, S3}, + {4, S4}, + {5, S5}, + {6, S6}, + {7, S7}, + {8, S8} + ], L = lists:seq(1, 1024), iolist_to_binary( lists:foldl( fun(_X, Acc) -> {_, Str} = lists:keyfind(rand:uniform(8), 1, Selector), - [Str|Acc] end, + [Str | Acc] + end, [""], L ) @@ -533,7 +595,6 @@ generate_smallobjects(Count, KeyNumber) -> generate_objects(Count, KeyNumber) -> generate_objects(Count, KeyNumber, [], crypto:strong_rand_bytes(4096)). - generate_objects(Count, KeyNumber, ObjL, Value) -> generate_objects(Count, KeyNumber, ObjL, Value, fun() -> [] end). @@ -543,14 +604,18 @@ generate_objects(Count, KeyNumber, ObjL, Value, IndexGen) -> generate_objects(0, _KeyNumber, ObjL, _Value, _IndexGen, _Bucket) -> lists:reverse(ObjL); generate_objects( - Count, binary_uuid, ObjL, Value, IndexGen, Bucket) - when is_list(Bucket) -> + Count, binary_uuid, ObjL, Value, IndexGen, Bucket +) when + is_list(Bucket) +-> generate_objects( Count, binary_uuid, ObjL, Value, IndexGen, list_to_binary(Bucket) ); generate_objects( - Count, binary_uuid, ObjL, Value, IndexGen, Bucket) - when is_binary(Bucket); is_tuple(Bucket) -> + Count, binary_uuid, ObjL, Value, IndexGen, Bucket +) when + is_binary(Bucket); is_tuple(Bucket) +-> {Obj1, Spec1} = set_object( Bucket, @@ -561,56 +626,77 @@ generate_objects( generate_objects( Count - 1, binary_uuid, - [{rand:uniform(), Obj1, Spec1}|ObjL], + [{rand:uniform(), Obj1, Spec1} | ObjL], Value, IndexGen, Bucket ); generate_objects( - Count, {binary, KeyNumber}, ObjL, Value, IndexGen, Bucket) - when is_list(Bucket) -> + Count, {binary, KeyNumber}, ObjL, Value, IndexGen, Bucket +) when + is_list(Bucket) +-> generate_objects( - Count, {binary, KeyNumber}, ObjL, Value, IndexGen, list_to_binary(Bucket) + Count, + {binary, KeyNumber}, + ObjL, + Value, + IndexGen, + list_to_binary(Bucket) ); generate_objects( - Count, {binary, KeyNumber}, ObjL, Value, IndexGen, Bucket) - when is_binary(Bucket) -> - {Obj1, Spec1} = + Count, {binary, KeyNumber}, ObjL, Value, IndexGen, Bucket +) when + is_binary(Bucket) +-> + {Obj1, Spec1} = set_object( Bucket, list_to_binary(numbered_key(KeyNumber)), Value, IndexGen ), - generate_objects(Count - 1, - {binary, KeyNumber + 1}, - [{rand:uniform(), Obj1, Spec1}|ObjL], - Value, - IndexGen, - Bucket); -generate_objects(Count, {fixed_binary, KeyNumber}, ObjL, Value, IndexGen, Bucket) -> + generate_objects( + Count - 1, + {binary, KeyNumber + 1}, + [{rand:uniform(), Obj1, Spec1} | ObjL], + Value, + IndexGen, + Bucket + ); +generate_objects( + Count, {fixed_binary, KeyNumber}, ObjL, Value, IndexGen, Bucket +) -> {Obj1, Spec1} = - set_object(Bucket, - fixed_bin_key(KeyNumber), - Value, - IndexGen), - generate_objects(Count - 1, - {fixed_binary, KeyNumber + 1}, - [{rand:uniform(), Obj1, Spec1}|ObjL], - Value, - IndexGen, - Bucket); + set_object( + Bucket, + fixed_bin_key(KeyNumber), + Value, + IndexGen + ), + generate_objects( + Count - 1, + {fixed_binary, KeyNumber + 1}, + [{rand:uniform(), Obj1, Spec1} | ObjL], + Value, + IndexGen, + Bucket + ); generate_objects(Count, KeyNumber, ObjL, Value, IndexGen, Bucket) -> - {Obj1, Spec1} = set_object(Bucket, - numbered_key(KeyNumber), - Value, - IndexGen), - generate_objects(Count - 1, - KeyNumber + 1, - [{rand:uniform(), Obj1, Spec1}|ObjL], - Value, - IndexGen, - Bucket). + {Obj1, Spec1} = set_object( + Bucket, + numbered_key(KeyNumber), + Value, + IndexGen + ), + generate_objects( + Count - 1, + KeyNumber + 1, + [{rand:uniform(), Obj1, Spec1} | ObjL], + Value, + IndexGen, + Bucket + ). %% @doc generates a key, exported so tests can use it without copying %% code @@ -638,9 +724,8 @@ set_object(Bucket, Key, Value, IndexGen, Indexes2Remove, IndexesNotToRemove) -> fun({add, IdxF, IdxV}) -> {IdxF, IdxV} end, lists:flatten([IndexesNotToRemove, IdxSpecs]) ), - Obj = {Bucket, - Key, - Value, + Obj = + {Bucket, Key, Value, lists:flatten( IdxSpecs, lists:map( @@ -648,17 +733,23 @@ set_object(Bucket, Key, Value, IndexGen, Indexes2Remove, IndexesNotToRemove) -> Indexes2Remove ) ), - [{<<"MDK">>, iolist_to_binary([<<"MDV">>, Key])}, + [ + {<<"MDK">>, iolist_to_binary([<<"MDV">>, Key])}, {<<"MDK2">>, iolist_to_binary([<<"MDV">>, Key])}, {?MD_LASTMOD, os:timestamp()}, - {?MD_INDEX, Indexes}]}, + {?MD_INDEX, Indexes} + ]}, {B1, K1, V1, DeltaSpecs, MD} = Obj, - Content = #r_content{metadata=dict:from_list(MD), value=V1}, - {#r_object{bucket=B1, - key=K1, - contents=[Content], - vclock=generate_vclock()}, - DeltaSpecs}. + Content = #r_content{metadata = dict:from_list(MD), value = V1}, + { + #r_object{ + bucket = B1, + key = K1, + contents = [Content], + vclock = generate_vclock() + }, + DeltaSpecs + }. get_value_from_objectlistitem({_Int, Obj, _Spc}) -> [Content] = Obj#r_object.contents, @@ -674,16 +765,20 @@ update_some_objects(Bookie, ObjList, SampleSize) -> [C] = Obj#r_object.contents, MD = C#r_content.metadata, MD0 = dict:store(?MD_LASTMOD, os:timestamp(), MD), - C0 = C#r_content{value = crypto:strong_rand_bytes(512), - metadata = MD0}, + C0 = C#r_content{ + value = crypto:strong_rand_bytes(512), + metadata = MD0 + }, UpdObj = Obj#r_object{vclock = VC0, contents = [C0]}, {R, UpdObj, Spec} end, UpdatedObjList = lists:map(UpdateFun, ToUpdateList), riakload(Bookie, UpdatedObjList), Time = timer:now_diff(os:timestamp(), StartWatchA), - io:format("~w objects updates in ~w seconds~n", - [SampleSize, Time/1000000]). + io:format( + "~w objects updates in ~w seconds~n", + [SampleSize, Time / 1000000] + ). delete_some_objects(Bookie, ObjList, SampleSize) -> StartWatchA = os:timestamp(), @@ -696,24 +791,41 @@ delete_some_objects(Bookie, ObjList, SampleSize) -> end, lists:foreach(DeleteFun, ToDeleteList), Time = timer:now_diff(os:timestamp(), StartWatchA), - io:format("~w objects deleted in ~w seconds~n", - [SampleSize, Time/1000000]). + io:format( + "~w objects deleted in ~w seconds~n", + [SampleSize, Time / 1000000] + ). generate_vclock() -> - lists:map(fun(X) -> - {_, Actor} = lists:keyfind(rand:uniform(10), - 1, - actor_list()), - {Actor, X} end, - lists:seq(1, rand:uniform(8))). + lists:map( + fun(X) -> + {_, Actor} = lists:keyfind( + rand:uniform(10), + 1, + actor_list() + ), + {Actor, X} + end, + lists:seq(1, rand:uniform(8)) + ). update_vclock(VC) -> - [{Actor, X}|Rest] = VC, - [{Actor, X + 1}|Rest]. + [{Actor, X} | Rest] = VC, + [{Actor, X + 1} | Rest]. actor_list() -> - [{1, albert}, {2, bertie}, {3, clara}, {4, dave}, {5, elton}, - {6, fred}, {7, george}, {8, harry}, {9, isaac}, {10, leila}]. + [ + {1, albert}, + {2, bertie}, + {3, clara}, + {4, dave}, + {5, elton}, + {6, fred}, + {7, george}, + {8, harry}, + {9, isaac}, + {10, leila} + ]. get_bucket(Object) -> Object#r_object.bucket. @@ -722,9 +834,10 @@ get_key(Object) -> Object#r_object.key. get_value(ObjectBin) -> - <<_Magic:8/integer, _Vers:8/integer, VclockLen:32/integer, - Rest1/binary>> = ObjectBin, - <<_VclockBin:VclockLen/binary, SibCount:32/integer, SibsBin/binary>> = Rest1, + <<_Magic:8/integer, _Vers:8/integer, VclockLen:32/integer, Rest1/binary>> = + ObjectBin, + <<_VclockBin:VclockLen/binary, SibCount:32/integer, SibsBin/binary>> = + Rest1, case SibCount of 1 -> <> = SibsBin, @@ -741,69 +854,83 @@ get_value(ObjectBin) -> end. get_lastmodified(ObjectBin) -> - <<_Magic:8/integer, _Vers:8/integer, VclockLen:32/integer, - Rest1/binary>> = ObjectBin, - <<_VclockBin:VclockLen/binary, SibCount:32/integer, SibsBin/binary>> = Rest1, + <<_Magic:8/integer, _Vers:8/integer, VclockLen:32/integer, Rest1/binary>> = + ObjectBin, + <<_VclockBin:VclockLen/binary, SibCount:32/integer, SibsBin/binary>> = + Rest1, case SibCount of 1 -> <> = SibsBin, - <<_ContentBin:SibLength/binary, - MetaLength:32/integer, - MetaBin:MetaLength/binary, - _Rest3/binary>> = Rest2, - <> = Rest2, + <> = MetaBin, {MegaSec, Sec, MicroSec} end. get_vclock(ObjectBin) -> - <<_Magic:8/integer, _Vers:8/integer, VclockLen:32/integer, - Rest1/binary>> = ObjectBin, + <<_Magic:8/integer, _Vers:8/integer, VclockLen:32/integer, Rest1/binary>> = + ObjectBin, <> = Rest1, - binary_to_term(VclockBin). + binary_to_term(VclockBin). load_objects(ChunkSize, GenList, Bookie, TestObject, Generator) -> load_objects(ChunkSize, GenList, Bookie, TestObject, Generator, 1000). load_objects(ChunkSize, GenList, Bookie, TestObject, Generator, SubListL) -> - lists:map(fun(KN) -> - ObjListA = Generator(ChunkSize, KN), - StartWatchA = os:timestamp(), - riakload(Bookie, ObjListA), - Time = timer:now_diff(os:timestamp(), StartWatchA), - io:format("~w objects loaded in ~w seconds~n", - [ChunkSize, Time/1000000]), - if - TestObject == no_check -> - ok; - true -> - check_forobject(Bookie, TestObject) - end, - lists:sublist(ObjListA, SubListL) end, - GenList). - + lists:map( + fun(KN) -> + ObjListA = Generator(ChunkSize, KN), + StartWatchA = os:timestamp(), + riakload(Bookie, ObjListA), + Time = timer:now_diff(os:timestamp(), StartWatchA), + io:format( + "~w objects loaded in ~w seconds~n", + [ChunkSize, Time / 1000000] + ), + if + TestObject == no_check -> + ok; + true -> + check_forobject(Bookie, TestObject) + end, + lists:sublist(ObjListA, SubListL) + end, + GenList + ). get_randomindexes_generator(Count) -> Generator = fun() -> lists:map( fun(X) -> - {add, - iolist_to_binary(["idx", integer_to_list(X), "_bin"]), + {add, iolist_to_binary(["idx", integer_to_list(X), "_bin"]), iolist_to_binary([get_randomdate(), get_randomname()])} end, - lists:seq(1, Count)) + lists:seq(1, Count) + ) end, Generator. name_list() -> - [{1, "Sophia"}, {2, "Emma"}, {3, "Olivia"}, {4, "Ava"}, - {5, "Isabella"}, {6, "Mia"}, {7, "Zoe"}, {8, "Lily"}, - {9, "Emily"}, {10, "Madelyn"}, {11, "Madison"}, {12, "Chloe"}, - {13, "Charlotte"}, {14, "Aubrey"}, {15, "Avery"}, - {16, "Abigail"}]. + [ + {1, "Sophia"}, + {2, "Emma"}, + {3, "Olivia"}, + {4, "Ava"}, + {5, "Isabella"}, + {6, "Mia"}, + {7, "Zoe"}, + {8, "Lily"}, + {9, "Emily"}, + {10, "Madelyn"}, + {11, "Madison"}, + {12, "Chloe"}, + {13, "Charlotte"}, + {14, "Aubrey"}, + {15, "Avery"}, + {16, "Abigail"} + ]. get_randomname() -> NameList = name_list(), @@ -817,16 +944,19 @@ get_randomdate() -> RandPoint = LowTime + rand:uniform(HighTime - LowTime), Date = calendar:gregorian_seconds_to_datetime(RandPoint), {{Year, Month, Day}, {Hour, Minute, Second}} = Date, - lists:flatten(io_lib:format("~4..0w~2..0w~2..0w~2..0w~2..0w~2..0w", - [Year, Month, Day, Hour, Minute, Second])). - + lists:flatten( + io_lib:format( + "~4..0w~2..0w~2..0w~2..0w~2..0w~2..0w", + [Year, Month, Day, Hour, Minute, Second] + ) + ). -foldkeysfun(_Bucket, Item, Acc) -> [Item|Acc]. +foldkeysfun(_Bucket, Item, Acc) -> [Item | Acc]. foldkeysfun_returnbucket(Bucket, {Term, Key}, Acc) -> - [{Term, {Bucket, Key}}|Acc]; + [{Term, {Bucket, Key}} | Acc]; foldkeysfun_returnbucket(Bucket, Key, Acc) -> - [{Bucket, Key}|Acc]. + [{Bucket, Key} | Acc]. check_indexed_objects(Book, B, KSpecL, V) -> % Check all objects match, return what should be the results of an all @@ -839,29 +969,31 @@ check_indexed_objects(Book, B, KSpecL, V) -> {add, <<"idx1_bin">>, IdxVal} = lists:keyfind(add, 1, Spc), {IdxVal, K} end, - KSpecL), + KSpecL + ), % Check the all index query matches expectations - R = + R = leveled_bookie:book_returnfolder( Book, - {index_query, - B, - {fun foldkeysfun/3, []}, - {<<"idx1_bin">>, <<"0">>, <<"|">>}, - ?RETURN_TERMS}), + {index_query, B, {fun foldkeysfun/3, []}, + {<<"idx1_bin">>, <<"0">>, <<"|">>}, ?RETURN_TERMS} + ), SW = os:timestamp(), {async, Fldr} = R, QR0 = Fldr(), io:format( "Query match found of length ~w in ~w microseconds " "expected ~w ~n", - [length(QR0), timer:now_diff(os:timestamp(), SW), length(IdxR)]), + [length(QR0), timer:now_diff(os:timestamp(), SW), length(IdxR)] + ), QR = lists:sort(QR0), ER = lists:sort(IdxR), - - ok = if ER == QR -> ok end, - ok. + ok = + if + ER == QR -> ok + end, + ok. put_indexed_objects(Book, Bucket, Count) -> V = get_compressiblevalue(), @@ -870,26 +1002,27 @@ put_indexed_objects(Book, Bucket, Count) -> put_indexed_objects(Book, Bucket, Count, V) -> IndexGen = get_randomindexes_generator(1), SW = os:timestamp(), - ObjL1 = + ObjL1 = generate_objects(Count, binary_uuid, [], V, IndexGen, Bucket), KSpecL = lists:map( fun({_RN, Obj, Spc}) -> - R = book_riakput(Book,Obj, Spc), + R = book_riakput(Book, Obj, Spc), case R of ok -> ok; pause -> timer:sleep(?SLOWOFFER_DELAY) end, {testutil:get_key(Obj), Spc} end, - ObjL1), + ObjL1 + ), io:format( "Put of ~w objects with ~w index entries " "each completed in ~w microseconds~n", - [Count, 1, timer:now_diff(os:timestamp(), SW)]), + [Count, 1, timer:now_diff(os:timestamp(), SW)] + ), {KSpecL, V}. - put_altered_indexed_objects(Book, Bucket, KSpecL) -> put_altered_indexed_objects(Book, Bucket, KSpecL, true). @@ -902,7 +1035,7 @@ put_altered_indexed_objects(Book, Bucket, KSpecL, RemoveOld2i, V) -> IndexGen = get_randomindexes_generator(1), ThisProcess = self(), FindAdditionFun = fun(SpcItem) -> element(1, SpcItem) == add end, - MapFun = + MapFun = fun({K, Spc}, Acc) -> OldSpecs = lists:filter(FindAdditionFun, Spc), {RemoveSpc, AddSpc} = @@ -916,7 +1049,8 @@ put_altered_indexed_objects(Book, Bucket, KSpecL, RemoveOld2i, V) -> fun() -> {O, DeltaSpecs} = set_object( - Bucket, K, V, IndexGen, RemoveSpc, AddSpc), + Bucket, K, V, IndexGen, RemoveSpc, AddSpc + ), % DeltaSpecs should be new indexes added, and any old % indexes which have been removed by this change where % RemoveOld2i is true. @@ -954,10 +1088,12 @@ put_altered_indexed_objects(Book, Bucket, KSpecL, RemoveOld2i, V) -> {RplKSpecL, V}. rotating_object_check(RootPath, B, NumberOfObjects) -> - BookOpts = [{root_path, RootPath}, - {cache_size, 1000}, - {max_journalsize, 5000000}, - {sync_strategy, sync_strategy()}], + BookOpts = [ + {root_path, RootPath}, + {cache_size, 1000}, + {max_journalsize, 5000000}, + {sync_strategy, sync_strategy()} + ], {ok, Book1} = leveled_bookie:book_start(BookOpts), {KSpcL1, V1} = put_indexed_objects(Book1, B, NumberOfObjects), ok = check_indexed_objects(Book1, B, KSpcL1, V1), @@ -974,7 +1110,7 @@ rotating_object_check(RootPath, B, NumberOfObjects) -> true = NumberOfObjects == length(BList()), ok = leveled_bookie:book_close(Book2), ok. - + rotation_withnocheck(Book1, B, NumberOfObjects, V1, V2, V3) -> {KSpcL1, _V1} = put_indexed_objects(Book1, B, NumberOfObjects, V1), {KSpcL2, _V2} = put_altered_indexed_objects(Book1, B, KSpcL1, true, V2), @@ -983,32 +1119,40 @@ rotation_withnocheck(Book1, B, NumberOfObjects, V1, V2, V3) -> corrupt_journal(RootPath, FileName, Corruptions, BasePosition, GapSize) -> OriginalPath = RootPath ++ "/journal/journal_files/" ++ FileName, - BackupPath = RootPath ++ "/journal/journal_files/" ++ - filename:basename(FileName, ".cdb") ++ ".bak", - io:format("Corruption attempt to be made to filename ~s ~w ~w~n", - [FileName, - filelib:is_file(OriginalPath), - filelib:is_file(BackupPath)]), + BackupPath = + RootPath ++ "/journal/journal_files/" ++ + filename:basename(FileName, ".cdb") ++ ".bak", + io:format( + "Corruption attempt to be made to filename ~s ~w ~w~n", + [ + FileName, + filelib:is_file(OriginalPath), + filelib:is_file(BackupPath) + ] + ), {ok, _BytesCopied} = file:copy(OriginalPath, BackupPath), {ok, Handle} = file:open(OriginalPath, [binary, raw, read, write]), - lists:foreach(fun(X) -> - Position = X * GapSize + BasePosition, - ok = file:pwrite(Handle, Position, <<0:8/integer>>) - end, - lists:seq(1, Corruptions)), + lists:foreach( + fun(X) -> + Position = X * GapSize + BasePosition, + ok = file:pwrite(Handle, Position, <<0:8/integer>>) + end, + lists:seq(1, Corruptions) + ), ok = file:close(Handle). - restore_file(RootPath, FileName) -> OriginalPath = RootPath ++ "/journal/journal_files/" ++ FileName, - BackupPath = RootPath ++ "/journal/journal_files/" ++ - filename:basename(FileName, ".cdb") ++ ".bak", + BackupPath = + RootPath ++ "/journal/journal_files/" ++ + filename:basename(FileName, ".cdb") ++ ".bak", file:copy(BackupPath, OriginalPath). restore_topending(RootPath, FileName) -> OriginalPath = RootPath ++ "/journal/journal_files/" ++ FileName, - PndPath = RootPath ++ "/journal/journal_files/" ++ - filename:basename(FileName, ".cdb") ++ ".pnd", + PndPath = + RootPath ++ "/journal/journal_files/" ++ + filename:basename(FileName, ".cdb") ++ ".pnd", ok = file:rename(OriginalPath, PndPath), false = filelib:is_file(OriginalPath). @@ -1022,8 +1166,6 @@ find_journals(RootPath) -> convert_to_seconds({MegaSec, Seconds, _MicroSec}) -> MegaSec * 1000000 + Seconds. - - get_aae_segment(Obj) -> get_aae_segment(testutil:get_bucket(Obj), testutil:get_key(Obj)). @@ -1054,6 +1196,7 @@ compact_and_wait(Book, WaitForDelete) -> end end, true, - lists:seq(1, 15)), + lists:seq(1, 15) + ), io:format("Waiting for journal deletes~n"), timer:sleep(WaitForDelete). diff --git a/test/end_to_end/tictac_SUITE.erl b/test/end_to_end/tictac_SUITE.erl index 4869184b..44141bc1 100644 --- a/test/end_to_end/tictac_SUITE.erl +++ b/test/end_to_end/tictac_SUITE.erl @@ -2,32 +2,33 @@ -include("leveled.hrl"). -export([all/0, init_per_suite/1, end_per_suite/1]). -export([ - multiput_subkeys/1, - many_put_compare/1, - index_compare/1, - basic_headonly/1, - tuplebuckets_headonly/1 - ]). - -all() -> [ - multiput_subkeys, - many_put_compare, - index_compare, - basic_headonly, - tuplebuckets_headonly - ]. + multiput_subkeys/1, + many_put_compare/1, + index_compare/1, + basic_headonly/1, + tuplebuckets_headonly/1 +]). + +all() -> + [ + multiput_subkeys, + many_put_compare, + index_compare, + basic_headonly, + tuplebuckets_headonly + ]. -define(V1_VERS, 1). --define(MAGIC, 53). % riak_kv -> riak_object +% riak_kv -> riak_object +-define(MAGIC, 53). init_per_suite(Config) -> - testutil:init_per_suite([{suite, "tictac"}|Config]), + testutil:init_per_suite([{suite, "tictac"} | Config]), Config. end_per_suite(Config) -> testutil:end_per_suite(Config). - multiput_subkeys(_Config) -> multiput_subkeys_byvalue({null, 0}), multiput_subkeys_byvalue(null), @@ -35,11 +36,13 @@ multiput_subkeys(_Config) -> multiput_subkeys_byvalue(V) -> RootPath = testutil:reset_filestructure("subkeyTest"), - StartOpts = [{root_path, RootPath}, - {max_journalsize, 10000000}, - {max_pencillercachesize, 12000}, - {head_only, no_lookup}, - {sync_strategy, testutil:sync_strategy()}], + StartOpts = [ + {root_path, RootPath}, + {max_journalsize, 10000000}, + {max_pencillercachesize, 12000}, + {head_only, no_lookup}, + {sync_strategy, testutil:sync_strategy()} + ], {ok, Bookie} = leveled_bookie:book_start(StartOpts), SubKeyCount = 200000, @@ -53,7 +56,7 @@ multiput_subkeys_byvalue(V) -> lists:seq(1, SubKeyCount) ) end, - + SpecL1 = ObjSpecLGen(<<1:32/integer>>), load_objectspecs(SpecL1, 32, Bookie), SpecL2 = ObjSpecLGen(<<2:32/integer>>), @@ -69,7 +72,7 @@ multiput_subkeys_byvalue(V) -> fun(Bucket, {Key, SubKey}, _Value, Acc) -> case Bucket of Bucket when Bucket == B -> - [{Key, SubKey}|Acc] + [{Key, SubKey} | Acc] end end, QueryFun = @@ -83,7 +86,7 @@ multiput_subkeys_byvalue(V) -> io:format("query result for range ~p is ~w~n", [Range, L]), L end, - + KR1 = {{<<1:32/integer>>, <<>>}, {<<2:32/integer>>, <<>>}}, KR2 = {{<<3:32/integer>>, <<>>}, {<<5:32/integer>>, <<>>}}, KR3 = @@ -96,7 +99,7 @@ multiput_subkeys_byvalue(V) -> true = (SubKeyCount + 10) == QueryFun(KR3), leveled_bookie:book_destroy(Bookie). -many_put_compare(_Config) -> +many_put_compare(_Config) -> TreeSize = small, SegmentCount = 256 * 256, % Test requires multiple different databases, so want to mount them all @@ -107,9 +110,11 @@ many_put_compare(_Config) -> RootPathD = testutil:reset_filestructure("testD"), % Start the first database, load a test object, close it, start it again - StartOpts1 = [{root_path, RootPathA}, - {max_pencillercachesize, 16000}, - {sync_strategy, riak_sync}], + StartOpts1 = [ + {root_path, RootPathA}, + {max_pencillercachesize, 16000}, + {sync_strategy, riak_sync} + ], {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), {B1, K1, V1, S1, MD} = { @@ -123,10 +128,12 @@ many_put_compare(_Config) -> ok = testutil:book_riakput(Bookie1, TestObject, TestSpec), testutil:check_forobject(Bookie1, TestObject), ok = leveled_bookie:book_close(Bookie1), - StartOpts2 = [{root_path, RootPathA}, - {max_journalsize, 500000000}, - {max_pencillercachesize, 32000}, - {sync_strategy, testutil:sync_strategy()}], + StartOpts2 = [ + {root_path, RootPathA}, + {max_journalsize, 500000000}, + {max_pencillercachesize, 32000}, + {sync_strategy, testutil:sync_strategy()} + ], {ok, Bookie2} = leveled_bookie:book_start(StartOpts2), testutil:check_forobject(Bookie2, TestObject), @@ -134,8 +141,18 @@ many_put_compare(_Config) -> % the first store (outputting the generated objects as a list of lists) % to be used elsewhere - GenList = [2, 20002, 40002, 60002, 80002, - 100002, 120002, 140002, 160002, 180002], + GenList = [ + 2, + 20002, + 40002, + 60002, + 80002, + 100002, + 120002, + 140002, + 160002, + 180002 + ], CLs = testutil:load_objects( 20000, @@ -149,41 +166,57 @@ many_put_compare(_Config) -> % Start a new store, and load the same objects (except fot the original % test object) into this store - StartOpts3 = [{root_path, RootPathB}, - {max_journalsize, 200000000}, - {max_pencillercachesize, 16000}, - {sync_strategy, testutil:sync_strategy()}], + StartOpts3 = [ + {root_path, RootPathB}, + {max_journalsize, 200000000}, + {max_pencillercachesize, 16000}, + {sync_strategy, testutil:sync_strategy()} + ], {ok, Bookie3} = leveled_bookie:book_start(StartOpts3), lists:foreach(fun(ObjL) -> testutil:riakload(Bookie3, ObjL) end, CLs), % Now run a tictac query against both stores to see the extent to which % state between stores is consistent - TicTacQ = {tictactree_obj, - {o_rkv, <<"Bucket">>, null, null, true}, - TreeSize, - fun(_B, _K) -> accumulate end}, + TicTacQ = + {tictactree_obj, {o_rkv, <<"Bucket">>, null, null, true}, TreeSize, fun( + _B, _K + ) -> + accumulate + end}, {async, TreeAFolder} = leveled_bookie:book_returnfolder(Bookie2, TicTacQ), {async, TreeBFolder} = leveled_bookie:book_returnfolder(Bookie3, TicTacQ), SWA0 = os:timestamp(), TreeA = TreeAFolder(), - io:format("Build tictac tree with 200K objects in ~w~n", - [timer:now_diff(os:timestamp(), SWA0)]), + io:format( + "Build tictac tree with 200K objects in ~w~n", + [timer:now_diff(os:timestamp(), SWA0)] + ), SWB0 = os:timestamp(), TreeB = TreeBFolder(), - io:format("Build tictac tree with 200K objects in ~w~n", - [timer:now_diff(os:timestamp(), SWB0)]), + io:format( + "Build tictac tree with 200K objects in ~w~n", + [timer:now_diff(os:timestamp(), SWB0)] + ), SWC0 = os:timestamp(), SegList0 = leveled_tictac:find_dirtyleaves(TreeA, TreeB), - io:format("Compare tictac trees with 200K objects in ~w~n", - [timer:now_diff(os:timestamp(), SWC0)]), - io:format("Tree comparison shows ~w different leaves~n", - [length(SegList0)]), + io:format( + "Compare tictac trees with 200K objects in ~w~n", + [timer:now_diff(os:timestamp(), SWC0)] + ), + io:format( + "Tree comparison shows ~w different leaves~n", + [length(SegList0)] + ), AltList = - leveled_tictac:find_dirtyleaves(TreeA, - leveled_tictac:new_tree(0, TreeSize)), - io:format("Tree comparison shows ~w altered leaves~n", - [length(AltList)]), + leveled_tictac:find_dirtyleaves( + TreeA, + leveled_tictac:new_tree(0, TreeSize) + ), + io:format( + "Tree comparison shows ~w altered leaves~n", + [length(AltList)] + ), true = length(SegList0) == 1, % only the test object should be different true = length(AltList) > 10000, @@ -196,12 +229,14 @@ many_put_compare(_Config) -> TreeSize, fun(_B, _K) -> pass end }, - {async, TreeAFolder_WP} = + {async, TreeAFolder_WP} = leveled_bookie:book_returnfolder(Bookie2, WrongPartitionTicTacQ), TreeAWP = TreeAFolder_WP(), DoubleEmpty = - leveled_tictac:find_dirtyleaves(TreeAWP, - leveled_tictac:new_tree(0, TreeSize)), + leveled_tictac:find_dirtyleaves( + TreeAWP, + leveled_tictac:new_tree(0, TreeSize) + ), true = length(DoubleEmpty) == 0, % Now run the same query by putting the tree-building responsibility onto @@ -212,42 +247,44 @@ many_put_compare(_Config) -> {proxy_object, HeadBin, _Size, _FetchFun} = binary_to_term(Value), <> = HeadBin, - case is_binary(Key) of - true -> - {Key, - lists:sort(binary_to_term(VclockBin))}; + case is_binary(Key) of + true -> + {Key, lists:sort(binary_to_term(VclockBin))}; false -> - {term_to_binary(Key), - lists:sort(binary_to_term(VclockBin))} + {term_to_binary(Key), lists:sort(binary_to_term(VclockBin))} end end, FoldObjectsFun = - fun(_Bucket, Key, Value, Acc) -> - leveled_tictac:add_kv(Acc, Key, Value, ExtractClockFun) - end, + fun(_Bucket, Key, Value, Acc) -> + leveled_tictac:add_kv(Acc, Key, Value, ExtractClockFun) + end, FoldAccT = {FoldObjectsFun, leveled_tictac:new_tree(0, TreeSize)}, {async, TreeAObjFolder0} = - leveled_bookie:book_headfold(Bookie2, - o_rkv, - {range, <<"Bucket">>, all}, - FoldAccT, - false, - true, - false), - + leveled_bookie:book_headfold( + Bookie2, + o_rkv, + {range, <<"Bucket">>, all}, + FoldAccT, + false, + true, + false + ), + SWB0Obj = os:timestamp(), TreeAObj0 = TreeAObjFolder0(), - io:format("Build tictac tree via object fold with no "++ - "presence check and 200K objects in ~w~n", - [timer:now_diff(os:timestamp(), SWB0Obj)]), + io:format( + "Build tictac tree via object fold with no " ++ + "presence check and 200K objects in ~w~n", + [timer:now_diff(os:timestamp(), SWB0Obj)] + ), true = length(leveled_tictac:find_dirtyleaves(TreeA, TreeAObj0)) == 0, InitAccTree = leveled_tictac:new_tree(0, TreeSize, true), - + {async, TreeAObjFolder1} = leveled_bookie:book_headfold( - Bookie2, + Bookie2, ?RIAK_TAG, {range, <<"Bucket">>, all}, {FoldObjectsFun, InitAccTree}, @@ -265,7 +302,7 @@ many_put_compare(_Config) -> true = length(leveled_tictac:find_dirtyleaves(TreeA, TreeAObj1)) == 0, {async, TreeAObjFolder1Alt} = leveled_bookie:book_headfold( - Bookie2, + Bookie2, ?RIAK_TAG, {range, <<"Bucket">>, all}, {FoldObjectsFun, leveled_tictac:new_tree(0, TreeSize, false)}, @@ -282,7 +319,7 @@ many_put_compare(_Config) -> ), true = length(leveled_tictac:find_dirtyleaves(TreeA, TreeAObj1Alt)) == 0, - % For an exportable comparison, want hash to be based on something not + % For an exportable comparison, want hash to be based on something not % coupled to erlang language - so use exportable query AltExtractFun = fun(K, V) -> @@ -295,13 +332,13 @@ many_put_compare(_Config) -> fun(_Bucket, Key, Value, Acc) -> leveled_tictac:add_kv(Acc, Key, Value, AltExtractFun) end, - + {async, TreeAAltObjFolder0} = leveled_bookie:book_headfold( - Bookie2, + Bookie2, ?RIAK_TAG, {range, <<"Bucket">>, all}, - {AltFoldObjectsFun, InitAccTree}, + {AltFoldObjectsFun, InitAccTree}, false, true, false @@ -315,7 +352,7 @@ many_put_compare(_Config) -> ), {async, TreeBAltObjFolder0} = leveled_bookie:book_headfold( - Bookie3, + Bookie3, ?RIAK_TAG, {range, <<"Bucket">>, all}, {AltFoldObjectsFun, InitAccTree}, @@ -328,14 +365,16 @@ many_put_compare(_Config) -> io:format( "Build tictac tree via object fold with no " "presence check and 200K objects and alt hash in ~w~n", - [timer:now_diff(os:timestamp(), SWB3Obj)]), - DL_ExportFold = + [timer:now_diff(os:timestamp(), SWB3Obj)] + ), + DL_ExportFold = length(leveled_tictac:find_dirtyleaves(TreeBAltObj, TreeAAltObj)), - io:format("Found dirty leaves with exportable comparison of ~w~n", - [DL_ExportFold]), + io:format( + "Found dirty leaves with exportable comparison of ~w~n", + [DL_ExportFold] + ), true = DL_ExportFold == 1, - %% Finding differing keys FoldKeysFun = fun(SegListToFind) -> @@ -343,7 +382,7 @@ many_put_compare(_Config) -> Seg = get_segment(K, SegmentCount), case lists:member(Seg, SegListToFind) of true -> - [K|Acc]; + [K | Acc]; false -> Acc end @@ -354,10 +393,14 @@ many_put_compare(_Config) -> leveled_bookie:book_returnfolder(Bookie2, SegQuery), SWSKL0 = os:timestamp(), SegKeyList = SegKeyFinder(), - io:format("Finding ~w keys in ~w dirty segments in ~w~n", - [length(SegKeyList), - length(SegList0), - timer:now_diff(os:timestamp(), SWSKL0)]), + io:format( + "Finding ~w keys in ~w dirty segments in ~w~n", + [ + length(SegKeyList), + length(SegList0), + timer:now_diff(os:timestamp(), SWSKL0) + ] + ), true = length(SegKeyList) >= 1, true = length(SegKeyList) < 10, @@ -370,12 +413,16 @@ many_put_compare(_Config) -> {async, TreeAFolder0} = leveled_bookie:book_returnfolder(Bookie2, TicTacQ), SWA1 = os:timestamp(), TreeA0 = TreeAFolder0(), - io:format("Build tictac tree with 200K objects in ~w~n", - [timer:now_diff(os:timestamp(), SWA1)]), + io:format( + "Build tictac tree with 200K objects in ~w~n", + [timer:now_diff(os:timestamp(), SWA1)] + ), SegList1 = leveled_tictac:find_dirtyleaves(TreeA0, TreeB), - io:format("Tree comparison following delete shows ~w different leaves~n", - [length(SegList1)]), + io:format( + "Tree comparison following delete shows ~w different leaves~n", + [length(SegList1)] + ), true = length(SegList1) == 0, % Removed test object so tictac trees should match @@ -383,8 +430,10 @@ many_put_compare(_Config) -> {async, TreeBFolder0} = leveled_bookie:book_returnfolder(Bookie3, TicTacQ), SWB1 = os:timestamp(), TreeB0 = TreeBFolder0(), - io:format("Build tictac tree with 200K objects in ~w~n", - [timer:now_diff(os:timestamp(), SWB1)]), + io:format( + "Build tictac tree with 200K objects in ~w~n", + [timer:now_diff(os:timestamp(), SWB1)] + ), SegList2 = leveled_tictac:find_dirtyleaves(TreeA0, TreeB0), true = SegList2 == SegList0, % There is an identical difference now the difference is on Bookie3 not @@ -395,15 +444,19 @@ many_put_compare(_Config) -> % Replace Bookie 3 with two stores Bookie 4 and Bookie 5 where the ojects % have been randomly split between the stores - StartOpts4 = [{root_path, RootPathC}, - {max_journalsize, 200000000}, - {max_pencillercachesize, 24000}, - {sync_strategy, testutil:sync_strategy()}], + StartOpts4 = [ + {root_path, RootPathC}, + {max_journalsize, 200000000}, + {max_pencillercachesize, 24000}, + {sync_strategy, testutil:sync_strategy()} + ], {ok, Bookie4} = leveled_bookie:book_start(StartOpts4), - StartOpts5 = [{root_path, RootPathD}, - {max_journalsize, 200000000}, - {max_pencillercachesize, 24000}, - {sync_strategy, testutil:sync_strategy()}], + StartOpts5 = [ + {root_path, RootPathD}, + {max_journalsize, 200000000}, + {max_pencillercachesize, 24000}, + {sync_strategy, testutil:sync_strategy()} + ], {ok, Bookie5} = leveled_bookie:book_start(StartOpts5), SplitFun = @@ -415,12 +468,14 @@ many_put_compare(_Config) -> false end end, - lists:foreach(fun(ObjL) -> - {ObjLA, ObjLB} = lists:partition(SplitFun, ObjL), - testutil:riakload(Bookie4, ObjLA), - testutil:riakload(Bookie5, ObjLB) - end, - CLs), + lists:foreach( + fun(ObjL) -> + {ObjLA, ObjLB} = lists:partition(SplitFun, ObjL), + testutil:riakload(Bookie4, ObjLA), + testutil:riakload(Bookie5, ObjLB) + end, + CLs + ), % query both the stores, then merge the trees - the result should be the % same as the result from the tree created aginst the store with both @@ -430,25 +485,29 @@ many_put_compare(_Config) -> {async, TreeC1Folder} = leveled_bookie:book_returnfolder(Bookie5, TicTacQ), SWD0 = os:timestamp(), TreeC0 = TreeC0Folder(), - io:format("Build tictac tree with 100K objects in ~w~n", - [timer:now_diff(os:timestamp(), SWD0)]), + io:format( + "Build tictac tree with 100K objects in ~w~n", + [timer:now_diff(os:timestamp(), SWD0)] + ), SWD1 = os:timestamp(), TreeC1 = TreeC1Folder(), - io:format("Build tictac tree with 100K objects in ~w~n", - [timer:now_diff(os:timestamp(), SWD1)]), + io:format( + "Build tictac tree with 100K objects in ~w~n", + [timer:now_diff(os:timestamp(), SWD1)] + ), TreeC2 = leveled_tictac:merge_trees(TreeC0, TreeC1), SegList3 = leveled_tictac:find_dirtyleaves(TreeC2, TreeB), - io:format("Tree comparison following delete shows ~w different leaves~n", - [length(SegList3)]), + io:format( + "Tree comparison following delete shows ~w different leaves~n", + [length(SegList3)] + ), true = length(SegList3) == 0, - ok = leveled_bookie:book_close(Bookie2), ok = leveled_bookie:book_close(Bookie4), ok = leveled_bookie:book_close(Bookie5). - index_compare(_Config) -> TreeSize = xxsmall, LS = 2000, @@ -481,34 +540,43 @@ index_compare(_Config) -> ObjLists = lists:map(GenMapFun, lists:seq(1, 9)), % Load all nine lists into Book1A - lists:foreach(fun(ObjL) -> testutil:riakload(Book1A, ObjL) end, - ObjLists), + lists:foreach( + fun(ObjL) -> testutil:riakload(Book1A, ObjL) end, + ObjLists + ), % Split nine lists across Book1B to Book1D, three object lists in each - lists:foreach(fun(ObjL) -> testutil:riakload(Book1B, ObjL) end, - lists:sublist(ObjLists, 1, 3)), - lists:foreach(fun(ObjL) -> testutil:riakload(Book1C, ObjL) end, - lists:sublist(ObjLists, 4, 3)), - lists:foreach(fun(ObjL) -> testutil:riakload(Book1D, ObjL) end, - lists:sublist(ObjLists, 7, 3)), + lists:foreach( + fun(ObjL) -> testutil:riakload(Book1B, ObjL) end, + lists:sublist(ObjLists, 1, 3) + ), + lists:foreach( + fun(ObjL) -> testutil:riakload(Book1C, ObjL) end, + lists:sublist(ObjLists, 4, 3) + ), + lists:foreach( + fun(ObjL) -> testutil:riakload(Book1D, ObjL) end, + lists:sublist(ObjLists, 7, 3) + ), GetTicTacTreeFun = fun(X, Bookie) -> SW = os:timestamp(), ST = <<"!">>, ET = <<"|">>, - Q = {tictactree_idx, + Q = + {tictactree_idx, {BucketBin, list_to_binary("idx" ++ integer_to_list(X) ++ "_bin"), - ST, - ET}, - TreeSize, - fun(_B, _K) -> accumulate end}, + ST, ET}, + TreeSize, fun(_B, _K) -> accumulate end}, {async, Folder} = leveled_bookie:book_returnfolder(Bookie, Q), R = Folder(), - io:format("TicTac Tree for index ~w took " ++ - "~w microseconds~n", - [X, timer:now_diff(os:timestamp(), SW)]), + io:format( + "TicTac Tree for index ~w took " ++ + "~w microseconds~n", + [X, timer:now_diff(os:timestamp(), SW)] + ), R end, @@ -519,13 +587,17 @@ index_compare(_Config) -> TicTacTree1_P3 = GetTicTacTreeFun(1, Book1D), % Merge the tree across the partitions - TicTacTree1_Joined = lists:foldl(fun leveled_tictac:merge_trees/2, - TicTacTree1_P1, - [TicTacTree1_P2, TicTacTree1_P3]), + TicTacTree1_Joined = lists:foldl( + fun leveled_tictac:merge_trees/2, + TicTacTree1_P1, + [TicTacTree1_P2, TicTacTree1_P3] + ), % Go compare! Also check we're not comparing empty trees - DL1_0 = leveled_tictac:find_dirtyleaves(TicTacTree1_Full, - TicTacTree1_Joined), + DL1_0 = leveled_tictac:find_dirtyleaves( + TicTacTree1_Full, + TicTacTree1_Joined + ), EmptyTree = leveled_tictac:new_tree(empty, TreeSize), DL1_1 = leveled_tictac:find_dirtyleaves(TicTacTree1_Full, EmptyTree), true = DL1_0 == [], @@ -550,13 +622,17 @@ index_compare(_Config) -> TicTacTree2_P3 = GetTicTacTreeFun(2, Book2D), % Merge the tree across the partitions - TicTacTree2_Joined = lists:foldl(fun leveled_tictac:merge_trees/2, - TicTacTree2_P1, - [TicTacTree2_P2, TicTacTree2_P3]), + TicTacTree2_Joined = lists:foldl( + fun leveled_tictac:merge_trees/2, + TicTacTree2_P1, + [TicTacTree2_P2, TicTacTree2_P3] + ), % Go compare! Also check we're not comparing empty trees - DL2_0 = leveled_tictac:find_dirtyleaves(TicTacTree2_Full, - TicTacTree2_Joined), + DL2_0 = leveled_tictac:find_dirtyleaves( + TicTacTree2_Full, + TicTacTree2_Joined + ), EmptyTree = leveled_tictac:new_tree(empty, TreeSize), DL2_1 = leveled_tictac:find_dirtyleaves(TicTacTree2_Full, EmptyTree), true = DL2_0 == [], @@ -569,7 +645,8 @@ index_compare(_Config) -> term_to_binary("K9.Z"), "Value1", [IdxSpc], - [{"MDK1", "MDV1"}]), + [{"MDK1", "MDV1"}] + ), ok = testutil:book_riakput(Book2C, TestObj, TestSpc), testutil:check_forobject(Book2C, TestObj), @@ -583,36 +660,42 @@ index_compare(_Config) -> lists:foldl( fun leveled_tictac:merge_trees/2, TicTacTree3_P1, - [TicTacTree3_P2, TicTacTree3_P3]), + [TicTacTree3_P2, TicTacTree3_P3] + ), % Find all keys index, and then just the last key - IdxQ1 = {index_query, - BucketBin, - {fun testutil:foldkeysfun/3, []}, - {<<"idx2_bin">>, <<"zz">>, <<"zz|">>}, - {true, undefined}}, + IdxQ1 = + {index_query, BucketBin, {fun testutil:foldkeysfun/3, []}, + {<<"idx2_bin">>, <<"zz">>, <<"zz|">>}, {true, undefined}}, {async, IdxFolder1} = leveled_bookie:book_returnfolder(Book2C, IdxQ1), true = IdxFolder1() >= 1, DL_3to2B = leveled_tictac:find_dirtyleaves( - TicTacTree2_P1, TicTacTree3_P1), + TicTacTree2_P1, TicTacTree3_P1 + ), DL_3to2C = leveled_tictac:find_dirtyleaves( - TicTacTree2_P2, TicTacTree3_P2), + TicTacTree2_P2, TicTacTree3_P2 + ), DL_3to2D = leveled_tictac:find_dirtyleaves( - TicTacTree2_P3, TicTacTree3_P3), - io:format("Individual tree comparison found dirty leaves of ~w ~w ~w~n", - [DL_3to2B, DL_3to2C, DL_3to2D]), + TicTacTree2_P3, TicTacTree3_P3 + ), + io:format( + "Individual tree comparison found dirty leaves of ~w ~w ~w~n", + [DL_3to2B, DL_3to2C, DL_3to2D] + ), true = length(DL_3to2B) == 0, true = length(DL_3to2C) == 1, true = length(DL_3to2D) == 0, % Go compare! Should find a difference in one leaf - DL3_0 = leveled_tictac:find_dirtyleaves(TicTacTree3_Full, - TicTacTree3_Joined), + DL3_0 = leveled_tictac:find_dirtyleaves( + TicTacTree3_Full, + TicTacTree3_Joined + ), io:format("Different leaves count ~w~n", [length(DL3_0)]), true = length(DL3_0) == 1, @@ -627,17 +710,15 @@ index_compare(_Config) -> Seg = get_segment(Key, SegmentCount), case lists:member(Seg, DL3_0) of true -> - [{Term, Key}|Acc]; + [{Term, Key} | Acc]; false -> Acc end end, - MismatchQ = {index_query, - BucketBin, - {FoldKeysIndexQFun, []}, - {<<"idx2_bin">>, <<"!">>, <<"|">>}, - {true, undefined}}, + MismatchQ = + {index_query, BucketBin, {FoldKeysIndexQFun, []}, + {<<"idx2_bin">>, <<"!">>, <<"|">>}, {true, undefined}}, {async, MMFldr_2A} = leveled_bookie:book_returnfolder(Book2A, MismatchQ), {async, MMFldr_2B} = leveled_bookie:book_returnfolder(Book2B, MismatchQ), {async, MMFldr_2C} = leveled_bookie:book_returnfolder(Book2C, MismatchQ), @@ -646,14 +727,17 @@ index_compare(_Config) -> SWSS = os:timestamp(), SL_Joined = MMFldr_2B() ++ MMFldr_2C() ++ MMFldr_2D(), SL_Full = MMFldr_2A(), - io:format("Segment search across both clusters took ~w~n", - [timer:now_diff(os:timestamp(), SWSS)]), + io:format( + "Segment search across both clusters took ~w~n", + [timer:now_diff(os:timestamp(), SWSS)] + ), io:format("Joined SegList ~w~n", [SL_Joined]), io:format("Full SegList ~w~n", [SL_Full]), - Diffs = lists:subtract(SL_Full, SL_Joined) - ++ lists:subtract(SL_Joined, SL_Full), + Diffs = + lists:subtract(SL_Full, SL_Joined) ++ + lists:subtract(SL_Joined, SL_Full), io:format("Differences between lists ~w~n", [Diffs]), @@ -662,58 +746,62 @@ index_compare(_Config) -> % Without discovering too many others true = length(Diffs) < 20, - ok = leveled_bookie:book_close(Book2A), ok = leveled_bookie:book_close(Book2B), ok = leveled_bookie:book_close(Book2C), ok = leveled_bookie:book_close(Book2D). - tuplebuckets_headonly(_Config) -> ObjectCount = 60000, RootPathHO = testutil:reset_filestructure("testTBHO"), - StartOpts1 = [{root_path, RootPathHO}, - {max_pencillercachesize, 16000}, - {sync_strategy, none}, - {head_only, with_lookup}, - {max_journalsize, 500000}], + StartOpts1 = [ + {root_path, RootPathHO}, + {max_pencillercachesize, 16000}, + {sync_strategy, none}, + {head_only, with_lookup}, + {max_journalsize, 500000} + ], {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), ObjectSpecFun = - fun(Op) -> + fun(Op) -> fun(N) -> Bucket = {<<"BucketType">>, <<"B", 0:4/integer, N:4/integer>>}, Key = <<"K", N:32/integer>>, <> = crypto:hash(md5, <>), {Op, Bucket, Key, null, Hash} - end + end end, - + ObjectSpecL = lists:map(ObjectSpecFun(add), lists:seq(1, ObjectCount)), SW0 = os:timestamp(), ok = load_objectspecs(ObjectSpecL, 32, Bookie1), - io:format("Loaded an object count of ~w in ~w ms~n", - [ObjectCount, timer:now_diff(os:timestamp(), SW0)/1000]), - - CheckHeadFun = + io:format( + "Loaded an object count of ~w in ~w ms~n", + [ObjectCount, timer:now_diff(os:timestamp(), SW0) / 1000] + ), + + CheckHeadFun = fun({add, B, K, null, H}) -> - {ok, H} = + {ok, H} = leveled_bookie:book_headonly(Bookie1, B, K, null) end, lists:foreach(CheckHeadFun, ObjectSpecL), - BucketList = - lists:map(fun(I) -> - {<<"BucketType">>, <<"B", 0:4/integer, I:4/integer>>} - end, - lists:seq(0, 15)), + BucketList = + lists:map( + fun(I) -> + {<<"BucketType">>, <<"B", 0:4/integer, I:4/integer>>} + end, + lists:seq(0, 15) + ), FoldHeadFun = fun(B, {K, null}, V, Acc) -> - [{add, B, K, null, V}|Acc] + [{add, B, K, null, V} | Acc] end, SW1 = os:timestamp(), @@ -723,26 +811,33 @@ tuplebuckets_headonly(_Config) -> ?HEAD_TAG, {bucket_list, BucketList}, {FoldHeadFun, []}, - false, false, + false, + false, false ), ReturnedObjSpecL1 = lists:reverse(HeadRunner1()), - [FirstItem|_Rest] = ReturnedObjSpecL1, + [FirstItem | _Rest] = ReturnedObjSpecL1, LastItem = lists:last(ReturnedObjSpecL1), io:format( "Returned ~w objects with first ~w and last ~w in ~w ms~n", - [length(ReturnedObjSpecL1), - FirstItem, LastItem, - timer:now_diff(os:timestamp(), SW1)/1000]), + [ + length(ReturnedObjSpecL1), + FirstItem, + LastItem, + timer:now_diff(os:timestamp(), SW1) / 1000 + ] + ), true = ReturnedObjSpecL1 == lists:sort(ObjectSpecL), {add, {TB, B1}, K1, null, _H1} = FirstItem, {add, {TB, BL}, KL, null, _HL} = LastItem, - SegList = [testutil:get_aae_segment({TB, B1}, K1), - testutil:get_aae_segment({TB, BL}, KL)], - + SegList = [ + testutil:get_aae_segment({TB, B1}, K1), + testutil:get_aae_segment({TB, BL}, KL) + ], + SW2 = os:timestamp(), {async, HeadRunner2} = leveled_bookie:book_headfold( @@ -750,41 +845,46 @@ tuplebuckets_headonly(_Config) -> ?HEAD_TAG, {bucket_list, BucketList}, {FoldHeadFun, []}, - false, false, + false, + false, SegList ), ReturnedObjSpecL2 = lists:reverse(HeadRunner2()), - io:format("Returned ~w objects using seglist in ~w ms~n", - [length(ReturnedObjSpecL2), - timer:now_diff(os:timestamp(), SW2)/1000]), - - true = length(ReturnedObjSpecL2) < (ObjectCount/1000 + 2), - % Not too many false positives + io:format( + "Returned ~w objects using seglist in ~w ms~n", + [ + length(ReturnedObjSpecL2), + timer:now_diff(os:timestamp(), SW2) / 1000 + ] + ), + + true = length(ReturnedObjSpecL2) < (ObjectCount / 1000 + 2), + % Not too many false positives true = lists:member(FirstItem, ReturnedObjSpecL2), true = lists:member(LastItem, ReturnedObjSpecL2), leveled_bookie:book_destroy(Bookie1). - basic_headonly(_Config) -> ObjectCount = 200000, RemoveCount = 100, basic_headonly_test(ObjectCount, RemoveCount, with_lookup), basic_headonly_test(ObjectCount, RemoveCount, no_lookup). - basic_headonly_test(ObjectCount, RemoveCount, HeadOnly) -> % Load some AAE type objects into Leveled using the read_only mode. This - % should allow for the items to be added in batches. Confirm that the - % journal is garbage collected as expected, and that it is possible to - % perform a fold_heads style query + % should allow for the items to be added in batches. Confirm that the + % journal is garbage collected as expected, and that it is possible to + % perform a fold_heads style query RootPathHO = testutil:reset_filestructure("testHO"), - StartOpts1 = [{root_path, RootPathHO}, - {max_pencillercachesize, 16000}, - {sync_strategy, sync}, - {head_only, HeadOnly}, - {max_journalsize, 500000}], + StartOpts1 = [ + {root_path, RootPathHO}, + {max_pencillercachesize, 16000}, + {sync_strategy, sync}, + {head_only, HeadOnly}, + {max_journalsize, 500000} + ], {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), {B1, K1, V1, S1, MD} = { @@ -795,57 +895,61 @@ basic_headonly_test(ObjectCount, RemoveCount, HeadOnly) -> [{<<"MDK1">>, <<"MDV1">>}] }, {TestObject, TestSpec} = testutil:generate_testobject(B1, K1, V1, S1, MD), - {unsupported_message, put} = + {unsupported_message, put} = testutil:book_riakput(Bookie1, TestObject, TestSpec), - + ObjectSpecFun = - fun(Op) -> + fun(Op) -> fun(N) -> Bucket = <<"B", N:8/integer>>, Key = <<"K", N:32/integer>>, - <> = + <> = crypto:hash(md5, term_to_binary({Bucket, Key})), <> = crypto:hash(md5, <>), {Op, <>, Bucket, Key, Hash} - end + end end, - + ObjectSpecL = lists:map(ObjectSpecFun(add), lists:seq(1, ObjectCount)), SW0 = os:timestamp(), ok = load_objectspecs(ObjectSpecL, 32, Bookie1), - io:format("Loaded an object count of ~w in ~w microseconds with ~w~n", - [ObjectCount, timer:now_diff(os:timestamp(), SW0), HeadOnly]), + io:format( + "Loaded an object count of ~w in ~w microseconds with ~w~n", + [ObjectCount, timer:now_diff(os:timestamp(), SW0), HeadOnly] + ), - FoldFun = + FoldFun = fun(_B, _K, V, {HashAcc, CountAcc}) -> {HashAcc bxor V, CountAcc + 1} end, InitAcc = {0, 0}, - RunnerDefinition = - {foldheads_allkeys, h, {FoldFun, InitAcc}, - false, false, false, false, false}, - {async, Runner1} = + RunnerDefinition = + {foldheads_allkeys, h, {FoldFun, InitAcc}, false, false, false, false, + false}, + {async, Runner1} = leveled_bookie:book_returnfolder(Bookie1, RunnerDefinition), SW1 = os:timestamp(), {AccH1, AccC1} = Runner1(), - io:format("AccH and AccC of ~w ~w in ~w microseconds~n", - [AccH1, AccC1, timer:now_diff(os:timestamp(), SW1)]), + io:format( + "AccH and AccC of ~w ~w in ~w microseconds~n", + [AccH1, AccC1, timer:now_diff(os:timestamp(), SW1)] + ), - true = AccC1 == ObjectCount, + true = AccC1 == ObjectCount, JFP = RootPathHO ++ "/journal/journal_files", {ok, FNs} = file:list_dir(JFP), - + ok = leveled_bookie:book_trimjournal(Bookie1), WaitForTrimFun = fun(N, _Acc) -> {ok, PollFNs} = file:list_dir(JFP), - case length(PollFNs) < length(FNs) of + case length(PollFNs) < length(FNs) of true -> true; false -> @@ -853,28 +957,30 @@ basic_headonly_test(ObjectCount, RemoveCount, HeadOnly) -> false end end, - + true = lists:foldl(WaitForTrimFun, false, [1, 2, 3, 5, 8, 13]), - + {ok, FinalFNs} = file:list_dir(JFP), ok = leveled_bookie:book_trimjournal(Bookie1), % CCheck a second trim is still OK - [{add, SegmentID0, Bucket0, Key0, Hash0}|_Rest] = ObjectSpecL, - case HeadOnly of + [{add, SegmentID0, Bucket0, Key0, Hash0} | _Rest] = ObjectSpecL, + case HeadOnly of with_lookup -> - % If we allow HEAD_TAG to be suubject to a lookup, then test this + % If we allow HEAD_TAG to be suubject to a lookup, then test this % here - {ok, Hash0} = - leveled_bookie:book_headonly(Bookie1, - SegmentID0, - Bucket0, - Key0), - CheckHeadFun = + {ok, Hash0} = + leveled_bookie:book_headonly( + Bookie1, + SegmentID0, + Bucket0, + Key0 + ), + CheckHeadFun = fun(DB) -> fun({add, SegID, B, K, H}) -> - {ok, H} = + {ok, H} = leveled_bookie:book_headonly(DB, SegID, B, K) end end, @@ -892,7 +998,8 @@ basic_headonly_test(ObjectCount, RemoveCount, HeadOnly) -> ), io:format( "Checking for ~w objects against Snapshot ~w~n", - [length(ObjectSpecL), Snapshot]), + [length(ObjectSpecL), Snapshot] + ), lists:foreach(CheckHeadFun(Snapshot), ObjectSpecL), io:format("Closing snapshot ~w~n", [Snapshot]), ok = leveled_bookie:book_close(Snapshot), @@ -912,50 +1019,54 @@ basic_headonly_test(ObjectCount, RemoveCount, HeadOnly) -> timer:sleep(10), false = is_process_alive(AltSnapshot); no_lookup -> - {unsupported_message, head} = + {unsupported_message, head} = leveled_bookie:book_head( - Bookie1, SegmentID0, {Bucket0, Key0}, h), - {unsupported_message, head} = + Bookie1, SegmentID0, {Bucket0, Key0}, h + ), + {unsupported_message, head} = leveled_bookie:book_headonly( - Bookie1, SegmentID0, Bucket0, Key0), + Bookie1, SegmentID0, Bucket0, Key0 + ), io:format("Closing actual store ~w~n", [Bookie1]), ok = leveled_bookie:book_close(Bookie1) end, - + {ok, FinalJournals} = file:list_dir(JFP), io:format( "Trim has reduced journal count from " - "~w to ~w and ~w after restart~n", + "~w to ~w and ~w after restart~n", [length(FNs), length(FinalFNs), length(FinalJournals)] ), {ok, Bookie2} = leveled_bookie:book_start(StartOpts1), - {async, Runner2} = + {async, Runner2} = leveled_bookie:book_returnfolder(Bookie2, RunnerDefinition), {AccH2, AccC2} = Runner2(), true = AccC2 == ObjectCount, - case HeadOnly of + case HeadOnly of with_lookup -> - % If we allow HEAD_TAG to be suubject to a lookup, then test this + % If we allow HEAD_TAG to be suubject to a lookup, then test this % here - {ok, Hash0} = + {ok, Hash0} = leveled_bookie:book_head( - Bookie2, SegmentID0, {Bucket0, Key0}, h); + Bookie2, SegmentID0, {Bucket0, Key0}, h + ); no_lookup -> - {unsupported_message, head} = + {unsupported_message, head} = leveled_bookie:book_head( - Bookie2, SegmentID0, {Bucket0, Key0}, h) + Bookie2, SegmentID0, {Bucket0, Key0}, h + ) end, RemoveSpecL0 = lists:sublist(ObjectSpecL, RemoveCount), - RemoveSpecL1 = + RemoveSpecL1 = lists:map(fun(Spec) -> setelement(1, Spec, remove) end, RemoveSpecL0), ok = load_objectspecs(RemoveSpecL1, 32, Bookie2), - {async, Runner3} = + {async, Runner3} = leveled_bookie:book_returnfolder(Bookie2, RunnerDefinition), {AccH3, AccC3} = Runner3(), @@ -964,11 +1075,11 @@ basic_headonly_test(ObjectCount, RemoveCount, HeadOnly) -> ok = leveled_bookie:book_close(Bookie2). - load_objectspecs([], _SliceSize, _Bookie) -> ok; -load_objectspecs(ObjectSpecL, SliceSize, Bookie) - when length(ObjectSpecL) < SliceSize -> +load_objectspecs(ObjectSpecL, SliceSize, Bookie) when + length(ObjectSpecL) < SliceSize +-> load_objectspecs(ObjectSpecL, length(ObjectSpecL), Bookie); load_objectspecs(ObjectSpecL, SliceSize, Bookie) -> {Head, Tail} = lists:split(SliceSize, ObjectSpecL), @@ -980,9 +1091,8 @@ load_objectspecs(ObjectSpecL, SliceSize, Bookie) -> load_objectspecs(Tail, SliceSize, Bookie) end. - get_segment(K, SegmentCount) -> - BinKey = + BinKey = case is_binary(K) of true -> K; From b6661886a503236a65c7474d1e22cf286e72849a Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 2 Sep 2025 11:42:15 +0100 Subject: [PATCH 4/4] Use meck (with OTP 24 support) --- rebar.config | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/rebar.config b/rebar.config index 2f262a44..09384190 100644 --- a/rebar.config +++ b/rebar.config @@ -48,7 +48,12 @@ {profiles, [ {eqc, [ - {deps, [meck, fqc]}, + {deps, [ + {meck, + {git, "https://github.com/OpenRiak/meck.git", + {branch, "openriak-3.2"}}}, + fqc + ]}, {erl_opts, [debug_info, {d, 'EQC'}]}, {extra_src_dirs, ["test/property", "test/end_to_end"]}, {shell, [{apps, [lz4]}]},