Last active
January 4, 2018 16:09
-
-
Save kjnilsson/1ba3f3455fb23d517bd495ff232d4716 to your computer and use it in GitHub Desktop.
spreading sampler module
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-module(ra_thw). | |
-export([ | |
% new/1, % size, | |
new/2, % size, distance | |
append/3, | |
to_list/1, | |
get/2, % kv lookup | |
truncate/2 % state, first entry to keep | |
]). | |
-define(DEFAULT_SIZE, 32). | |
-define(DEFAULT_DISTANCE, 32). | |
-record(state, {size = ?DEFAULT_SIZE :: non_neg_integer(), | |
dist = ?DEFAULT_DISTANCE :: non_neg_integer(), | |
sorted_keys = [] :: [], | |
kv = #{} :: #{term() => term()}, | |
cur_size = 0, | |
dist_count = 0 :: non_neg_integer() | |
}). | |
-type state() :: #state{}. | |
-export_type([state/0]). | |
%% A fixed size, sorted-by-insertion map-like data structure that samples incoming | |
%% key-values every 'dist' (distance) inputs. When it's full it grows the 'dist' value | |
%% and "thins" the current key space by removing every other key in the key space. | |
%% The keyspace can also be reduced by truncating all keys before and including | |
%% some given key. | |
new(Size, Distance) -> | |
#state{size = Size, | |
dist = Distance, | |
% we want to capture the first sample | |
dist_count = Distance}. | |
append(#state{cur_size = Size, size = Size, | |
dist = Dist0, dist_count = Dist0} = State0, | |
_Key, _Value) -> | |
% time to thin out the samples and grow the distance | |
Dist = (Dist0 * 2) + 1, | |
State = thin_out(State0), | |
State#state{dist = Dist, | |
% we've received one sample | |
% Add one to current dist count | |
dist_count = Dist0 + 1, | |
cur_size = length(State#state.sorted_keys)}; | |
append(#state{dist = Dist, dist_count = Dist, sorted_keys = Keys, | |
kv = Kv, cur_size = CurSize} = State, | |
Key, Value) -> | |
State#state{sorted_keys = [Key | Keys], kv = Kv#{Key => Value}, | |
dist_count = 0, cur_size = CurSize + 1}; | |
append(#state{dist_count = C} = State, _Key, _Value) -> | |
% it's not time to sample yet | |
State#state{dist_count = C + 1}. | |
get(#state{kv = Kv}, Key) -> | |
maps:get(Key, Kv, undefined). | |
% mostly for testing | |
to_list(#state{sorted_keys = Keys, kv = Kv}) -> | |
to_list0(Keys, Kv, []). | |
to_list0([Key | T], Kv, Acc) -> | |
#{Key := Value} = Kv, | |
to_list0(T, Kv, [{Key, Value} | Acc]); | |
to_list0([], _Kv, Acc) -> | |
Acc. | |
truncate(#state{kv = _Kv0} = State, _Key) -> | |
% case Kv0 of | |
% #{Key := _} -> | |
% % key exists - we can truncate:vspl | |
State. | |
%% Internal | |
thin_out(#state{sorted_keys = Keys0, kv = Kv0} = State) -> | |
{Keys, Kv} = thin_out0(Keys0, Kv0, {[], #{}}), | |
State#state{sorted_keys = Keys, kv = Kv}. | |
thin_out0([KeepKey, _ | Tail], Kv, {KeyAcc, KvAcc}) -> | |
#{KeepKey := KeepVal} = Kv, | |
thin_out0(Tail, Kv, {[KeepKey | KeyAcc], KvAcc#{KeepKey => KeepVal}}); | |
thin_out0([KeepKey | []], Kv, {KeyAcc, KvAcc}) -> | |
#{KeepKey := KeepVal} = Kv, | |
{lists:reverse([KeepKey | KeyAcc]), KvAcc#{KeepKey => KeepVal}}; | |
thin_out0([], _Kv, {Keys, Kv}) -> | |
{lists:reverse(Keys), Kv}. | |
-ifdef(TEST). | |
-include_lib("eunit/include/eunit.hrl"). | |
append_test() -> | |
Thw0 = new(5, 1), | |
Thw1 = lists:foldl(fun (S, Acc) -> | |
append(Acc, S, S) | |
end, Thw0, [a,b,c,d,e]), | |
[a, c, e] = map_fst(to_list(Thw1)), | |
Thw2 = lists:foldl(fun (S, Acc) -> | |
append(Acc, S, S) | |
end, Thw1, [f, g, h, i, j, k, l]), | |
% after 'i' it will grow the distance from 1 to 3 | |
% and thin the current samples | |
% so no sample will be taken when 'k' nd 'l' are received | |
[a, e, i] = map_fst(to_list(Thw2)), | |
% when 'm' is received we need to take another sample and thus | |
% need to thin previous samples to make room | |
Thw3 = append(Thw2, m, m), | |
[a, e, i, m] = map_fst(to_list(Thw3)), | |
Thw4 = lists:foldl(fun (S, Acc) -> | |
append(Acc, S, S) | |
end, Thw3, [l, m, n]), | |
[a, e, i, m] = map_fst(to_list(Thw4)), | |
Thw = append(Thw4, o, o), | |
[a, e, i, m, o] = map_fst(to_list(Thw)), | |
a = get(Thw, a), | |
undefined = get(Thw, c), | |
ok. | |
% truncate_test() -> | |
% Thw0 = new(5, 1), | |
% Thw1 = lists:foldl(fun (S, Acc) -> | |
% append(Acc, S, S) | |
% end, Thw0, [a,b,c,d,e]), | |
% Thw = truncate(Thw1, c), | |
% [e] = map_fst(to_list(Thw)), | |
% ok. | |
map_fst(L) -> | |
[element(1, X) || X <- L]. | |
-endif. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment