1 /** 2 Copyright: Copyright (c) 2019, Joakim Brännström. All rights reserved. 3 License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost Software License 1.0) 4 Author: Joakim Brännström (joakim.brannstrom@gmx.com) 5 6 # Algorithm 7 1. Construct the layout consisting of a number of consecutive slots with a 8 duration between them. 9 2. Fill the slots with snapshots that exists using a "best fit" algorithm. 10 11 # Best fit 12 The best fitting snapshot is the one with the lowest difference between the 13 buckets time and the snapshots actual time. 14 */ 15 module dsnapshot.layout; 16 17 import logger = std.experimental.logger; 18 import std.algorithm : joiner, map; 19 import std.array : appender; 20 import std.datetime : SysTime, Duration, dur, Clock; 21 import std.range : repeat, enumerate; 22 import std.typecons : Nullable; 23 24 public import dsnapshot.types : Name; 25 26 version (unittest) { 27 import unit_threaded.assertions; 28 } 29 30 @safe: 31 32 struct Snapshot { 33 /// The time the snapshot was taken. 34 SysTime time; 35 /// Name of the snapshot. This is used to locate it. 36 Name name; 37 } 38 39 /// Represent an empty position in the snapshot layout. 40 struct Empty { 41 } 42 43 // TODO: replace bucket with an alias to the internal sumtype. 44 struct Bucket { 45 import sumtype; 46 47 SumType!(Empty, Snapshot) value; 48 } 49 50 /// It is always positive. The closer to zero the better fit. 51 Duration fitness(const SysTime a, const SysTime b) { 52 auto diff = b - a; 53 if (diff.isNegative) 54 return diff * -1; 55 return diff; 56 } 57 58 /// Returns: the index of the candidate that best fit the time. 59 Nullable!size_t bestFit(const SysTime time, const SysTime[] candidates) { 60 import std.typecons : tuple; 61 62 typeof(return) rval; 63 auto curr = Duration.max; 64 foreach (a; candidates.enumerate.map!(a => tuple(a.index, fitness(time, a.value)))) { 65 if (a[1] < curr) { 66 rval = a[0]; 67 curr = a[1]; 68 } 69 } 70 71 return rval; 72 } 73 74 @("shall find the candidate that has the best fitness for the specific time") 75 unittest { 76 import std.array : array; 77 import std.range : iota; 78 79 auto candidates = iota(0, 10).map!(a => Clock.currTime + a.dur!"hours").array; 80 81 const bucket = Clock.currTime + 4.dur!"hours"; 82 bestFit(bucket, candidates).get.should == 4; 83 } 84 85 /** 86 * At construction it is configured with how the snapshots should be organized 87 * into buckets. How many and the space in time between them. 88 * 89 * It is then updated with the current layout on the storage medium. 90 * 91 * The only data that it relies on is the basename of the paths that are pushed 92 * to it. 93 * 94 * It operates on two passes. 95 * During the first pass snapshots are added to the buckets following a best 96 * fit algorithm. Snapshots are never discarded at this stage. If a snapshot 97 * do not fit in a bucket or is replaced it is moved to a waiting list. 98 * 99 * During the second pass the waiting snapshots are mapped back to the buckets 100 * via the same best fit algorithm. The difference here is that the buckets 101 * "time" is matched against all waiting snapshots. This is the reverse of the 102 * first pass. 103 * */ 104 struct Layout { 105 import sumtype; 106 107 Bucket[] buckets; 108 /// The time of the bucket which a snapshot should try to match. 109 const(SysTime)[] times; 110 111 /// Based on the first pass of the algoritm. 112 bool isFirstBucketEmpty; 113 114 /// Snapshots collected for pass two. 115 Snapshot[] waiting; 116 117 /// Snapshots that has been discarded because they do are not the best fit for any bucket. 118 Snapshot[] discarded; 119 120 this(const SysTime start, const LayoutConfig conf) { 121 // configure the buckets 122 auto app = appender!(SysTime[])(); 123 SysTime curr = start; 124 foreach (a; conf.spans.map!(a => repeat(a.space, a.nr)).joiner) { 125 curr -= a; 126 app.put(curr); 127 } 128 times = app.data; 129 buckets.length = times.length; 130 } 131 132 Nullable!(Snapshot) firstFullBucket() const { 133 typeof(return) rval; 134 foreach (a; buckets) { 135 bool done; 136 a.value.match!((Empty a) {}, (Snapshot a) { done = true; rval = a; }); 137 if (done) 138 break; 139 } 140 return rval; 141 } 142 143 bool empty() const { 144 return buckets.length == 0; 145 } 146 147 /// Returns: the time of the snapshot that is in the bucket 148 Nullable!SysTime snapshotTimeInBucket(size_t idx) { 149 typeof(return) rval; 150 if (idx >= buckets.length) 151 return rval; 152 153 buckets[idx].value.match!((Empty a) {}, (Snapshot a) { rval = a.time; }); 154 return rval; 155 } 156 157 /// Returns: the bucket that best matched the time. 158 Nullable!Snapshot bestFitBucket(SysTime time) { 159 typeof(return) rval; 160 161 const fitIdx = bestFit(time, times); 162 if (!fitIdx.isNull) { 163 buckets[fitIdx.get].value.match!((Empty a) {}, (Snapshot a) { 164 rval = a; 165 }); 166 } 167 168 return rval; 169 } 170 171 void put(const Snapshot s) { 172 if (buckets.length == 0) { 173 discarded ~= s; 174 return; 175 } 176 177 const fitIdx = bestFit(s.time, times); 178 if (fitIdx.isNull) { 179 waiting ~= s; 180 return; 181 } 182 183 const bucketTime = times[fitIdx]; 184 buckets[fitIdx].value = buckets[fitIdx].value.match!((Empty a) => s, (Snapshot a) { 185 // Replace the snapshot in the bucket if the new one `s` is a better fit. 186 if (fitness(bucketTime, s.time) < fitness(bucketTime, a.time)) { 187 waiting ~= a; 188 return s; 189 } 190 waiting ~= s; 191 return a; 192 }); 193 } 194 195 /// Pass two. Moving waiting to either buckets or discarded. 196 void finalize() { 197 import std.algorithm : remove; 198 import std.array : array; 199 200 if (buckets.length == 0) { 201 return; 202 } 203 if (waiting.length == 0) { 204 isFirstBucketEmpty = true; 205 return; 206 } 207 208 scope (exit) 209 waiting = null; 210 211 isFirstBucketEmpty = buckets[0].value.match!((Empty a) => true, (Snapshot a) => false); 212 213 auto waitingTimes = waiting.map!(a => a.time).array; 214 size_t bucketIdx; 215 216 while (bucketIdx < buckets.length) { 217 scope (exit) 218 bucketIdx++; 219 220 const fitIdx = bestFit(times[bucketIdx], waitingTimes); 221 if (fitIdx.isNull) { 222 continue; 223 } 224 225 buckets[bucketIdx].value = buckets[bucketIdx].value.match!((Empty a) { 226 auto s = waiting[fitIdx]; 227 waiting = waiting.remove(fitIdx.get); 228 waitingTimes = waitingTimes.remove(fitIdx.get); 229 return s; 230 }, (Snapshot a) => a); 231 } 232 233 discarded ~= waiting; 234 } 235 236 import std.range : isOutputRange; 237 238 string toString() @safe const { 239 import std.array : appender; 240 241 auto buf = appender!string; 242 toString(buf); 243 return buf.data; 244 } 245 246 void toString(Writer)(ref Writer w) const if (isOutputRange!(Writer, char)) { 247 import std.format : formattedWrite; 248 import std.range : enumerate, put; 249 250 put(w, "Layout("); 251 252 put(w, "Bucket nr: Best Fit Time - Content\n"); 253 foreach (a; buckets.enumerate) 254 formattedWrite(w, "%s: %s - %s\n", a.index, times[a.index], a.value); 255 256 if (waiting.length != 0) 257 put(w, "waiting\n"); 258 foreach (a; waiting.enumerate) 259 formattedWrite(w, "%s: %s\n", a.index, a.value); 260 261 if (discarded.length != 0) 262 put(w, "Discarded\n"); 263 foreach (a; discarded.enumerate) 264 formattedWrite(w, "%s: %s\n", a.index, a.value); 265 266 put(w, ")"); 267 } 268 } 269 270 /// Configuration for a span of snapshots in a layout. 271 struct Span { 272 uint nr; 273 Duration space; 274 } 275 276 /// Configuration of a layout consisting of a number of span configs. 277 struct LayoutConfig { 278 Span[] spans; 279 } 280 281 @( 282 "shall be a layout of 15 snapshots with increasing time between them when configured with three spans") 283 unittest { 284 import std.conv : to; 285 import std.range : iota; 286 287 const base = Clock.currTime; 288 289 auto conf = LayoutConfig([ 290 Span(5, 4.dur!"hours"), Span(5, 1.dur!"days"), Span(5, 7.dur!"days") 291 ]); 292 auto layout = Layout(base, conf); 293 294 immutable addSnapshotsNr = 5 * 4 + 5 * 24 + 5 * 24 * 7; 295 296 // completely fill up the layout 297 foreach (a; iota(0, addSnapshotsNr)) { 298 layout.put(Snapshot(base - a.dur!"hours", a.to!string.Name)); 299 } 300 301 layout.buckets.length.should == 15; 302 layout.waiting.length.shouldEqual(addSnapshotsNr - 15); 303 304 layout.finalize; 305 306 layout.waiting.length.shouldEqual(0); 307 layout.discarded.length.shouldEqual(addSnapshotsNr - 15); 308 309 (base - layout.times[0]).total!"hours".shouldEqual(4); 310 (base - layout.times[4]).total!"hours".shouldEqual(4 * 5); 311 (base - layout.times[5]).total!"hours".shouldEqual(4 * 5 + 24); 312 (base - layout.times[9]).total!"hours".shouldEqual(4 * 5 + 24 * 5); 313 (base - layout.times[10]).total!"hours".shouldEqual(4 * 5 + 24 * 5 + 24 * 7); 314 (base - layout.times[14]).total!"hours".shouldEqual(4 * 5 + 24 * 5 + 24 * 7 * 5); 315 }