1 /** 2 Copyright: Copyright (c) 2019, Joakim Brännström. All rights reserved. 3 License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost Software License 1.0) 4 Author: Joakim Brännström (joakim.brannstrom@gmx.com) 5 6 # Algorithm 7 1. Construct the layout consisting of a number of consecutive slots with a 8 duration between them. 9 2. Fill the slots with snapshots that exists using a "best fit" algorithm. 10 11 # Best fit 12 The best fitting snapshot is the one with the lowest difference between the 13 buckets time and the snapshots actual time. 14 */ 15 module dsnapshot.layout; 16 17 import logger = std.experimental.logger; 18 import std.algorithm : joiner, map, filter; 19 import std.array : appender, empty; 20 import std.datetime : SysTime, Duration, dur, Clock, Interval; 21 import std.range : repeat, enumerate; 22 import std.typecons : Nullable; 23 24 public import dsnapshot.types : Name; 25 26 version (unittest) { 27 import unit_threaded.assertions; 28 } 29 30 @safe: 31 32 struct Snapshot { 33 /// The time the snapshot was taken. 34 SysTime time; 35 /// Name of the snapshot. This is used to locate it. 36 Name name; 37 } 38 39 /// Represent an empty position in the snapshot layout. 40 struct Empty { 41 } 42 43 // TODO: replace bucket with an alias to the internal sumtype. 44 struct Bucket { 45 import sumtype; 46 47 SumType!(Empty, Snapshot) value; 48 49 import std.range : isOutputRange; 50 51 string toString() @safe const { 52 import std.array : appender; 53 54 auto buf = appender!string; 55 toString(buf); 56 return buf.data; 57 } 58 59 void toString(Writer)(ref Writer w) const if (isOutputRange!(Writer, char)) { 60 import std.format : formattedWrite; 61 import std.range : enumerate, put; 62 63 formattedWrite(w, "%s", cast() value); 64 } 65 } 66 67 /// It is always positive. The closer to zero the better fit. 68 Duration fitness(const SysTime a, const SysTime b) { 69 auto diff = b - a; 70 if (diff.isNegative) 71 return diff * -1; 72 return diff; 73 } 74 75 /// Returns: the index of the interval that enclose `time`. 76 Nullable!size_t bestFitInterval(const SysTime time, const Interval!SysTime[] candidates) @safe pure nothrow { 77 typeof(return) rval; 78 // can't use contains because we want the intervals to be inverted, open 79 // beginning and closed end. this is to put times that are on the edge in 80 // the "closest to now" interval. 81 foreach (a; candidates.enumerate.filter!(a => (time > a.value.begin && time <= a.value.end))) { 82 rval = a.index; 83 break; 84 } 85 86 return rval; 87 } 88 89 @("shall find the interval that contains the time") 90 unittest { 91 import std.array : array; 92 import std.range : iota; 93 94 const base = Clock.currTime; 95 const offset = 5.dur!"minutes"; 96 97 auto candidates = iota(0, 10).map!(a => Interval!SysTime(base - (a + 1) 98 .dur!"hours", base - a.dur!"hours")).array; 99 100 // |---|---|---|---|---|---| 101 // 0 1 2 3 4 5 6 102 bestFitInterval(base - offset, candidates).get.should == 0; 103 bestFitInterval(base - 4.dur!"hours" - offset, candidates).get.should == 4; 104 bestFitInterval(Clock.currTime - 5.dur!"hours" - offset, candidates).get.should == 5; 105 106 // test edge case where the times are exactly on the borders 107 bestFitInterval(base, candidates).get.should == 0; 108 bestFitInterval(base - 1.dur!"hours", candidates).get.should == 1; 109 bestFitInterval(base - 4.dur!"hours", candidates).get.should == 4; 110 bestFitInterval(base - 5.dur!"hours", candidates).get.should == 5; 111 } 112 113 /// Returns: the index of the candidate that best fit the time. 114 Nullable!size_t bestFitTime(const SysTime time, const SysTime[] candidates) { 115 import std.typecons : tuple; 116 117 typeof(return) rval; 118 auto curr = Duration.max; 119 foreach (a; candidates.enumerate.map!(a => tuple(a.index, fitness(time, a.value)))) { 120 if (a[1] < curr) { 121 rval = a[0]; 122 curr = a[1]; 123 } 124 } 125 126 return rval; 127 } 128 129 @("shall find the candidate that has the best fitness for the specific time") 130 unittest { 131 import std.array : array; 132 import std.range : iota; 133 134 const base = Clock.currTime; 135 const offset = 5.dur!"minutes"; 136 137 auto candidates = iota(0, 10).map!(a => base - a.dur!"hours").array; 138 139 // |---|---|---|---|---|---| 140 // 0 1 2 3 4 5 6 141 bestFitTime(base - offset, candidates).get.should == 0; 142 bestFitTime(base - 4.dur!"hours" - offset, candidates).get.should == 4; 143 bestFitTime(Clock.currTime - 5.dur!"hours" - offset, candidates).get.should == 5; 144 } 145 146 /** 147 * At construction it is configured with how the snapshots should be organized 148 * into buckets. How many and the space in time between them. 149 * 150 * It is then updated with the current layout on the storage medium. 151 * 152 * The only data that it relies on is the basename of the paths that are pushed 153 * to it. 154 * 155 * It operates on two passes. 156 * The first pass is basically a histogram. It finds the bucket interval that 157 * *contain* the snapshot. It then checks to see if the candidate is newer than 158 * the one currently in the bucket. If so it replaces it. This mean that each 159 * bucket contains the latest snapshot that fit it. If a snapshot do not fit in 160 * a bucket or is replaced it is moved to the discarded list. 161 * 162 * The second pass goes through those that has been marked for discard and see 163 * if any of them fit *well enough* into the buckets that are empty. 164 */ 165 struct Layout { 166 import sumtype; 167 168 /// The config which can be used to regenerate the layout. 169 LayoutConfig conf; 170 171 Bucket[] buckets; 172 /// The time of the bucket which a snapshot should try to match. 173 const(Interval!SysTime)[] times; 174 175 /// Snapshots that has been discarded because they do are not the best fit for any bucket. 176 Snapshot[] discarded; 177 178 this(const SysTime start, const LayoutConfig conf) { 179 this.conf = LayoutConfig(conf.spans.dup); 180 auto begin = start.toUTC; 181 auto end = start.toUTC; 182 auto app = appender!(Interval!SysTime[])(); 183 foreach (const a; conf.spans.map!(a => repeat(a.space, a.nr)).joiner) { 184 try { 185 end = begin; 186 begin -= a; 187 app.put(Interval!SysTime(begin, end)); 188 } catch (Exception e) { 189 logger.warning(e.msg); 190 logger.infof("Tried to create a bucket with time span %s -> %s from span interval %s", 191 begin, end, a); 192 } 193 } 194 times = app.data; 195 buckets.length = times.length; 196 } 197 198 Layout dup() @safe pure nothrow const { 199 Layout rval; 200 rval.buckets = buckets.dup; 201 rval.times = times.dup; 202 rval.discarded = discarded.dup; 203 return rval; 204 } 205 206 bool isFirstBucketEmpty() @safe pure nothrow const @nogc { 207 if (buckets.length == 0) 208 return false; 209 return buckets[0].value.match!((Empty a) => true, (Snapshot a) => false); 210 } 211 212 Nullable!Snapshot firstFullBucket() const { 213 typeof(return) rval; 214 foreach (a; buckets) { 215 bool done; 216 a.value.match!((Empty a) {}, (Snapshot a) { done = true; rval = a; }); 217 if (done) 218 break; 219 } 220 221 return rval; 222 } 223 224 /// Returns: a snapshot that can be used to resume. 225 Nullable!Snapshot resume() @safe pure nothrow const @nogc { 226 import std.algorithm : endsWith; 227 import dsnapshot.types : snapshotInProgressSuffix; 228 229 typeof(return) rval; 230 231 foreach (s; discarded.filter!(a => a.name.value.endsWith(snapshotInProgressSuffix))) { 232 rval = s; 233 break; 234 } 235 236 return rval; 237 } 238 239 bool empty() const { 240 return buckets.length == 0; 241 } 242 243 /// Returns: the time of the snapshot that is in the bucket 244 Nullable!SysTime snapshotTimeInBucket(size_t idx) @safe pure nothrow const @nogc { 245 typeof(return) rval; 246 if (idx >= buckets.length) 247 return rval; 248 249 buckets[idx].value.match!((Empty a) {}, (Snapshot a) { rval = a.time; }); 250 return rval; 251 } 252 253 /// Returns: the bucket which interval enclose `time`. 254 Nullable!Snapshot bestFitBucket(const SysTime time) @safe const { 255 typeof(return) rval; 256 257 const fitIdx = bestFitInterval(time, times); 258 if (!fitIdx.isNull) { 259 buckets[fitIdx.get].value.match!((Empty a) {}, (Snapshot a) { 260 rval = a; 261 }); 262 } 263 264 return rval; 265 } 266 267 void put(const Snapshot s) { 268 if (buckets.length == 0) { 269 discarded ~= s; 270 return; 271 } 272 273 const fitIdx = bestFitInterval(s.time, times); 274 if (fitIdx.isNull) { 275 discarded ~= s; 276 return; 277 } 278 279 const bucketTime = times[fitIdx]; 280 auto tmp = cast() buckets[fitIdx.get].value.match!((Empty a) => s, (Snapshot a) { 281 // Replace the snapshot in the bucket if the new one `s` is a better fit. 282 // Using `.end` on the assumption that the latest snapshot for 283 // each bucket is the most interesting. This also mean that when a 284 // snapshot trickle over to a new bucket it will most probably 285 // replace the old one right away because the old one is closer to 286 // the `.begin` than `.end`. 287 if (fitness(bucketTime.end, s.time) < fitness(bucketTime.end, a.time)) { 288 discarded ~= a; 289 return s; 290 } 291 discarded ~= s; 292 return a; 293 }); 294 295 () @trusted { buckets[fitIdx.get].value = tmp; }(); 296 } 297 298 void finalize() { 299 if (buckets.empty || discarded.empty) 300 return; 301 302 Snapshot[] junk; 303 304 auto iterate(Snapshot[] candidates) { 305 Snapshot[] spare; 306 307 // find what bucket the candidate fit in (bucket n). 308 // Put it in the bucket after it (bucket n+1), if n+1 is empty or 309 // the new one fit better. 310 // Assume that this only need to handle the cases when a snapshot 311 // is about to expire from its current bucket. 312 while (!candidates.empty) { 313 auto s = candidates[0]; 314 candidates = candidates[1 .. $]; 315 316 const fitIdx = bestFitInterval(s.time, times); 317 if (fitIdx.isNull) { 318 junk ~= s; 319 continue; 320 } 321 322 const idx = fitIdx.get + 1; 323 if (idx >= buckets.length) { 324 junk ~= s; 325 continue; 326 } 327 328 const bucketTime = times[idx]; 329 auto tmp = buckets[idx].value.match!((Empty a) => s, (Snapshot a) { 330 if (fitness(bucketTime.end, s.time) < fitness(bucketTime.end, a.time)) { 331 spare ~= a; 332 return s; 333 } 334 spare ~= s; 335 return a; 336 }); 337 338 () @trusted { buckets[idx].value = tmp; }(); 339 } 340 341 return spare; 342 } 343 344 // Assume that a cascade effect may happen when a snapshot is replacing 345 // another snapshot in a bucket. 346 Snapshot[] prev; 347 do { 348 prev = discarded; 349 discarded = iterate(discarded); 350 } 351 while (prev != discarded); 352 353 discarded ~= junk; 354 } 355 356 import std.range : isOutputRange; 357 358 string toString() @safe const { 359 import std.array : appender; 360 361 auto buf = appender!string; 362 toString(buf); 363 return buf.data; 364 } 365 366 void toString(Writer)(ref Writer w) const if (isOutputRange!(Writer, char)) { 367 import std.format : formattedWrite; 368 import std.range : enumerate, put; 369 import std.ascii : newline; 370 371 put(w, "Bucket Nr: Interval\n"); 372 foreach (a; buckets.enumerate) { 373 formattedWrite(w, "%9s: %s - %s\n%11s", a.index, 374 times[a.index].begin, times[a.index].end, ""); 375 a.value.value.match!((Empty a) { put(w, "empty"); }, (Snapshot a) { 376 formattedWrite(w, "%s", a.time); 377 }); 378 put(w, newline); 379 } 380 381 if (discarded.length != 0) 382 put(w, "Discarded\n"); 383 foreach (a; discarded.enumerate) 384 formattedWrite(w, "%s: %s name:%s\n", a.index, a.value.time, a.value.name.value); 385 } 386 } 387 388 /// Configuration for a span of snapshots in a layout. 389 struct Span { 390 uint nr; 391 Duration space; 392 } 393 394 /// Configuration of a layout consisting of a number of span configs. 395 struct LayoutConfig { 396 Span[] spans; 397 } 398 399 @( 400 "shall be a layout of 15 snapshots with increasing time between them when configured with three spans") 401 unittest { 402 import std.conv : to; 403 import std.range : iota; 404 405 const base = Clock.currTime.toUTC; 406 407 auto conf = LayoutConfig([ 408 Span(5, 4.dur!"hours"), Span(5, 1.dur!"days"), Span(5, 7.dur!"days") 409 ]); 410 auto layout = Layout(base, conf); 411 412 immutable addSnapshotsNr = 5 * 4 + 5 * 24 + 5 * 24 * 7; 413 414 // completely fill up the layout 415 foreach (a; iota(0, addSnapshotsNr)) { 416 layout.put(Snapshot(base - a.dur!"hours", a.to!string.Name)); 417 } 418 419 layout.buckets.length.should == 15; 420 layout.discarded.length.shouldEqual(addSnapshotsNr - 15); 421 422 (base - layout.times[0].begin).total!"hours".shouldEqual(4); 423 (base - layout.times[4].begin).total!"hours".shouldEqual(4 * 5); 424 (base - layout.times[5].begin).total!"hours".shouldEqual(4 * 5 + 24); 425 (base - layout.times[9].begin).total!"hours".shouldEqual(4 * 5 + 24 * 5); 426 (base - layout.times[10].begin).total!"hours".shouldEqual(4 * 5 + 24 * 5 + 24 * 7); 427 (base - layout.times[14].begin).total!"hours".shouldEqual(4 * 5 + 24 * 5 + 24 * 7 * 5); 428 } 429 430 @("shall move the about to expire snapshot from its current bucket to bucket n+1") 431 unittest { 432 import std.conv : to; 433 import std.range : iota; 434 435 const base = Clock.currTime.toUTC; 436 437 auto conf = LayoutConfig([Span(5, 4.dur!"hours")]); 438 auto layout = Layout(base, conf); 439 440 // init bucket 0 441 layout.put(Snapshot(base - 3.dur!"hours", "bucket 1".Name)); 442 layout.put(Snapshot(base - 3.dur!"hours" - 10.dur!"minutes", "bucket 1".Name)); 443 layout.put(Snapshot(base - 7.dur!"hours", "bucket 2".Name)); 444 // replace bucket 0 with a better candidate 445 layout.put(Snapshot(base - 1.dur!"hours", "bucket 0".Name)); 446 447 layout.finalize; 448 449 (base - layout.snapshotTimeInBucket(0)).total!"hours".shouldEqual(1); 450 451 (base - layout.snapshotTimeInBucket(1)).total!"hours".shouldEqual(3); 452 // should be the one with -10 minutes. 453 (base - layout.snapshotTimeInBucket(1)).total!"minutes".shouldEqual(190); 454 455 (base - layout.snapshotTimeInBucket(2)).total!"hours".shouldEqual(7); 456 }