1 /**
2 Copyright: Copyright (c) 2019, Joakim Brännström. All rights reserved.
3 License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost Software License 1.0)
4 Author: Joakim Brännström (joakim.brannstrom@gmx.com)
5 
6 # Algorithm
7 1. Construct the layout consisting of a number of consecutive slots with a
8    duration between them.
9 2. Fill the slots with snapshots that exists using a "best fit" algorithm.
10 
11 # Best fit
12 The best fitting snapshot is the one with the lowest difference between the
13 buckets time and the snapshots actual time.
14 */
15 module dsnapshot.layout;
16 
17 import logger = std.experimental.logger;
18 import std.algorithm : joiner, map;
19 import std.array : appender;
20 import std.datetime : SysTime, Duration, dur, Clock;
21 import std.range : repeat, enumerate;
22 import std.typecons : Nullable;
23 
24 public import dsnapshot.types : Name;
25 
26 version (unittest) {
27     import unit_threaded.assertions;
28 }
29 
30 @safe:
31 
32 struct Snapshot {
33     /// The time the snapshot was taken.
34     SysTime time;
35     /// Name of the snapshot. This is used to locate it.
36     Name name;
37 }
38 
39 /// Represent an empty position in the snapshot layout.
40 struct Empty {
41 }
42 
43 // TODO: replace bucket with an alias to the internal sumtype.
44 struct Bucket {
45     import sumtype;
46 
47     SumType!(Empty, Snapshot) value;
48 }
49 
50 /// It is always positive. The closer to zero the better fit.
51 Duration fitness(const SysTime a, const SysTime b) {
52     auto diff = b - a;
53     if (diff.isNegative)
54         return diff * -1;
55     return diff;
56 }
57 
58 /// Returns: the index of the candidate that best fit the time.
59 Nullable!size_t bestFit(const SysTime time, const SysTime[] candidates) {
60     import std.typecons : tuple;
61 
62     typeof(return) rval;
63     auto curr = Duration.max;
64     foreach (a; candidates.enumerate.map!(a => tuple(a.index, fitness(time, a.value)))) {
65         if (a[1] < curr) {
66             rval = a[0];
67             curr = a[1];
68         }
69     }
70 
71     return rval;
72 }
73 
74 @("shall find the candidate that has the best fitness for the specific time")
75 unittest {
76     import std.array : array;
77     import std.range : iota;
78 
79     auto candidates = iota(0, 10).map!(a => Clock.currTime + a.dur!"hours").array;
80 
81     const bucket = Clock.currTime + 4.dur!"hours";
82     bestFit(bucket, candidates).get.should == 4;
83 }
84 
85 /**
86  * At construction it is configured with how the snapshots should be organized
87  * into buckets. How many and the space in time between them.
88  *
89  * It is then updated with the current layout on the storage medium.
90  *
91  * The only data that it relies on is the basename of the paths that are pushed
92  * to it.
93  *
94  * It operates on two passes.
95  * During the first pass snapshots are added to the buckets following a best
96  * fit algorithm.  Snapshots are never discarded at this stage. If a snapshot
97  * do not fit in a bucket or is replaced it is moved to a waiting list.
98  *
99  * During the second pass the waiting snapshots are mapped back to the buckets
100  * via the same best fit algorithm.  The difference here is that the buckets
101  * "time" is matched against all waiting snapshots. This is the reverse of the
102  * first pass.
103  * */
104 struct Layout {
105     import sumtype;
106 
107     Bucket[] buckets;
108     /// The time of the bucket which a snapshot should try to match.
109     const(SysTime)[] times;
110 
111     /// Based on the first pass of the algoritm.
112     bool isFirstBucketEmpty;
113 
114     /// Snapshots collected for pass two.
115     Snapshot[] waiting;
116 
117     /// Snapshots that has been discarded because they do are not the best fit for any bucket.
118     Snapshot[] discarded;
119 
120     this(const SysTime start, const LayoutConfig conf) {
121         // configure the buckets
122         auto app = appender!(SysTime[])();
123         SysTime curr = start;
124         foreach (a; conf.spans.map!(a => repeat(a.space, a.nr)).joiner) {
125             curr -= a;
126             app.put(curr);
127         }
128         times = app.data;
129         buckets.length = times.length;
130     }
131 
132     Nullable!(Snapshot) firstFullBucket() const {
133         typeof(return) rval;
134         foreach (a; buckets) {
135             bool done;
136             a.value.match!((Empty a) {}, (Snapshot a) { done = true; rval = a; });
137             if (done)
138                 break;
139         }
140         return rval;
141     }
142 
143     bool empty() const {
144         return buckets.length == 0;
145     }
146 
147     /// Returns: the time of the snapshot that is in the bucket
148     Nullable!SysTime snapshotTimeInBucket(size_t idx) {
149         typeof(return) rval;
150         if (idx >= buckets.length)
151             return rval;
152 
153         buckets[idx].value.match!((Empty a) {}, (Snapshot a) { rval = a.time; });
154         return rval;
155     }
156 
157     /// Returns: the bucket that best matched the time.
158     Nullable!Snapshot bestFitBucket(SysTime time) {
159         typeof(return) rval;
160 
161         const fitIdx = bestFit(time, times);
162         if (!fitIdx.isNull) {
163             buckets[fitIdx.get].value.match!((Empty a) {}, (Snapshot a) {
164                 rval = a;
165             });
166         }
167 
168         return rval;
169     }
170 
171     void put(const Snapshot s) {
172         if (buckets.length == 0) {
173             discarded ~= s;
174             return;
175         }
176 
177         const fitIdx = bestFit(s.time, times);
178         if (fitIdx.isNull) {
179             waiting ~= s;
180             return;
181         }
182 
183         const bucketTime = times[fitIdx];
184         buckets[fitIdx].value = buckets[fitIdx].value.match!((Empty a) => s, (Snapshot a) {
185             // Replace the snapshot in the bucket if the new one `s` is a better fit.
186             if (fitness(bucketTime, s.time) < fitness(bucketTime, a.time)) {
187                 waiting ~= a;
188                 return s;
189             }
190             waiting ~= s;
191             return a;
192         });
193     }
194 
195     /// Pass two. Moving waiting to either buckets or discarded.
196     void finalize() {
197         import std.algorithm : remove;
198         import std.array : array;
199 
200         if (buckets.length == 0) {
201             return;
202         }
203         if (waiting.length == 0) {
204             isFirstBucketEmpty = true;
205             return;
206         }
207 
208         scope (exit)
209             waiting = null;
210 
211         isFirstBucketEmpty = buckets[0].value.match!((Empty a) => true, (Snapshot a) => false);
212 
213         auto waitingTimes = waiting.map!(a => a.time).array;
214         size_t bucketIdx;
215 
216         while (bucketIdx < buckets.length) {
217             scope (exit)
218                 bucketIdx++;
219 
220             const fitIdx = bestFit(times[bucketIdx], waitingTimes);
221             if (fitIdx.isNull) {
222                 continue;
223             }
224 
225             buckets[bucketIdx].value = buckets[bucketIdx].value.match!((Empty a) {
226                 auto s = waiting[fitIdx];
227                 waiting = waiting.remove(fitIdx.get);
228                 waitingTimes = waitingTimes.remove(fitIdx.get);
229                 return s;
230             }, (Snapshot a) => a);
231         }
232 
233         discarded ~= waiting;
234     }
235 
236     import std.range : isOutputRange;
237 
238     string toString() @safe const {
239         import std.array : appender;
240 
241         auto buf = appender!string;
242         toString(buf);
243         return buf.data;
244     }
245 
246     void toString(Writer)(ref Writer w) const if (isOutputRange!(Writer, char)) {
247         import std.format : formattedWrite;
248         import std.range : enumerate, put;
249 
250         put(w, "Layout(");
251 
252         put(w, "Bucket nr: Best Fit Time - Content\n");
253         foreach (a; buckets.enumerate)
254             formattedWrite(w, "%s: %s - %s\n", a.index, times[a.index], a.value);
255 
256         if (waiting.length != 0)
257             put(w, "waiting\n");
258         foreach (a; waiting.enumerate)
259             formattedWrite(w, "%s: %s\n", a.index, a.value);
260 
261         if (discarded.length != 0)
262             put(w, "Discarded\n");
263         foreach (a; discarded.enumerate)
264             formattedWrite(w, "%s: %s\n", a.index, a.value);
265 
266         put(w, ")");
267     }
268 }
269 
270 /// Configuration for a span of snapshots in a layout.
271 struct Span {
272     uint nr;
273     Duration space;
274 }
275 
276 /// Configuration of a layout consisting of a number of span configs.
277 struct LayoutConfig {
278     Span[] spans;
279 }
280 
281 @(
282         "shall be a layout of 15 snapshots with increasing time between them when configured with three spans")
283 unittest {
284     import std.conv : to;
285     import std.range : iota;
286 
287     const base = Clock.currTime;
288 
289     auto conf = LayoutConfig([
290             Span(5, 4.dur!"hours"), Span(5, 1.dur!"days"), Span(5, 7.dur!"days")
291             ]);
292     auto layout = Layout(base, conf);
293 
294     immutable addSnapshotsNr = 5 * 4 + 5 * 24 + 5 * 24 * 7;
295 
296     // completely fill up the layout
297     foreach (a; iota(0, addSnapshotsNr)) {
298         layout.put(Snapshot(base - a.dur!"hours", a.to!string.Name));
299     }
300 
301     layout.buckets.length.should == 15;
302     layout.waiting.length.shouldEqual(addSnapshotsNr - 15);
303 
304     layout.finalize;
305 
306     layout.waiting.length.shouldEqual(0);
307     layout.discarded.length.shouldEqual(addSnapshotsNr - 15);
308 
309     (base - layout.times[0]).total!"hours".shouldEqual(4);
310     (base - layout.times[4]).total!"hours".shouldEqual(4 * 5);
311     (base - layout.times[5]).total!"hours".shouldEqual(4 * 5 + 24);
312     (base - layout.times[9]).total!"hours".shouldEqual(4 * 5 + 24 * 5);
313     (base - layout.times[10]).total!"hours".shouldEqual(4 * 5 + 24 * 5 + 24 * 7);
314     (base - layout.times[14]).total!"hours".shouldEqual(4 * 5 + 24 * 5 + 24 * 7 * 5);
315 }