// Package hdrhistogram provides an implementation of Gil Tene's HDR Histogram // data structure. The HDR Histogram allows for fast and accurate analysis of // the extreme ranges of data with non-normal distributions, like latency. package hdrhistogram import ( "fmt" "math" ) // A Bracket is a part of a cumulative distribution. type Bracket struct { Quantile float64 Count, ValueAt int64 } // A Snapshot is an exported view of a Histogram, useful for serializing them. // A Histogram can be constructed from it by passing it to Import. type Snapshot struct { LowestTrackableValue int64 HighestTrackableValue int64 SignificantFigures int64 Counts []int64 } // A Histogram is a lossy data structure used to record the distribution of // non-normally distributed data (like latency) with a high degree of accuracy // and a bounded degree of precision. type Histogram struct { lowestTrackableValue int64 highestTrackableValue int64 unitMagnitude int64 significantFigures int64 subBucketHalfCountMagnitude int32 subBucketHalfCount int32 subBucketMask int64 subBucketCount int32 bucketCount int32 countsLen int32 totalCount int64 counts []int64 } // New returns a new Histogram instance capable of tracking values in the given // range and with the given amount of precision. func New(minValue, maxValue int64, sigfigs int) *Histogram { if sigfigs < 1 || 5 < sigfigs { panic(fmt.Errorf("sigfigs must be [1,5] (was %d)", sigfigs)) } largestValueWithSingleUnitResolution := 2 * math.Pow10(sigfigs) subBucketCountMagnitude := int32(math.Ceil(math.Log2(float64(largestValueWithSingleUnitResolution)))) subBucketHalfCountMagnitude := subBucketCountMagnitude if subBucketHalfCountMagnitude < 1 { subBucketHalfCountMagnitude = 1 } subBucketHalfCountMagnitude-- unitMagnitude := int32(math.Floor(math.Log2(float64(minValue)))) if unitMagnitude < 0 { unitMagnitude = 0 } subBucketCount := int32(math.Pow(2, float64(subBucketHalfCountMagnitude)+1)) subBucketHalfCount := subBucketCount / 2 subBucketMask := int64(subBucketCount-1) << uint(unitMagnitude) // determine exponent range needed to support the trackable value with no // overflow: smallestUntrackableValue := int64(subBucketCount) << uint(unitMagnitude) bucketsNeeded := int32(1) for smallestUntrackableValue < maxValue { smallestUntrackableValue <<= 1 bucketsNeeded++ } bucketCount := bucketsNeeded countsLen := (bucketCount + 1) * (subBucketCount / 2) return &Histogram{ lowestTrackableValue: minValue, highestTrackableValue: maxValue, unitMagnitude: int64(unitMagnitude), significantFigures: int64(sigfigs), subBucketHalfCountMagnitude: subBucketHalfCountMagnitude, subBucketHalfCount: subBucketHalfCount, subBucketMask: subBucketMask, subBucketCount: subBucketCount, bucketCount: bucketCount, countsLen: countsLen, totalCount: 0, counts: make([]int64, countsLen), } } // ByteSize returns an estimate of the amount of memory allocated to the // histogram in bytes. // // N.B.: This does not take into account the overhead for slices, which are // small, constant, and specific to the compiler version. func (h *Histogram) ByteSize() int { return 6*8 + 5*4 + len(h.counts)*8 } // Merge merges the data stored in the given histogram with the receiver, // returning the number of recorded values which had to be dropped. func (h *Histogram) Merge(from *Histogram) (dropped int64) { i := from.rIterator() for i.next() { v := i.valueFromIdx c := i.countAtIdx if h.RecordValues(v, c) != nil { dropped += c } } return } // TotalCount returns total number of values recorded. func (h *Histogram) TotalCount() int64 { return h.totalCount } // Max returns the approximate maximum recorded value. func (h *Histogram) Max() int64 { var max int64 i := h.iterator() for i.next() { if i.countAtIdx != 0 { max = i.highestEquivalentValue } } return h.highestEquivalentValue(max) } // Min returns the approximate minimum recorded value. func (h *Histogram) Min() int64 { var min int64 i := h.iterator() for i.next() { if i.countAtIdx != 0 && min == 0 { min = i.highestEquivalentValue break } } return h.lowestEquivalentValue(min) } // Mean returns the approximate arithmetic mean of the recorded values. func (h *Histogram) Mean() float64 { if h.totalCount == 0 { return 0 } var total int64 i := h.iterator() for i.next() { if i.countAtIdx != 0 { total += i.countAtIdx * h.medianEquivalentValue(i.valueFromIdx) } } return float64(total) / float64(h.totalCount) } // StdDev returns the approximate standard deviation of the recorded values. func (h *Histogram) StdDev() float64 { if h.totalCount == 0 { return 0 } mean := h.Mean() geometricDevTotal := 0.0 i := h.iterator() for i.next() { if i.countAtIdx != 0 { dev := float64(h.medianEquivalentValue(i.valueFromIdx)) - mean geometricDevTotal += (dev * dev) * float64(i.countAtIdx) } } return math.Sqrt(geometricDevTotal / float64(h.totalCount)) } // Reset deletes all recorded values and restores the histogram to its original // state. func (h *Histogram) Reset() { h.totalCount = 0 for i := range h.counts { h.counts[i] = 0 } } // RecordValue records the given value, returning an error if the value is out // of range. func (h *Histogram) RecordValue(v int64) error { return h.RecordValues(v, 1) } // RecordCorrectedValue records the given value, correcting for stalls in the // recording process. This only works for processes which are recording values // at an expected interval (e.g., doing jitter analysis). Processes which are // recording ad-hoc values (e.g., latency for incoming requests) can't take // advantage of this. func (h *Histogram) RecordCorrectedValue(v, expectedInterval int64) error { if err := h.RecordValue(v); err != nil { return err } if expectedInterval <= 0 || v <= expectedInterval { return nil } missingValue := v - expectedInterval for missingValue >= expectedInterval { if err := h.RecordValue(missingValue); err != nil { return err } missingValue -= expectedInterval } return nil } // RecordValues records n occurrences of the given value, returning an error if // the value is out of range. func (h *Histogram) RecordValues(v, n int64) error { idx := h.countsIndexFor(v) if idx < 0 || int(h.countsLen) <= idx { return fmt.Errorf("value %d is too large to be recorded", v) } h.counts[idx] += n h.totalCount += n return nil } // ValueAtQuantile returns the recorded value at the given quantile (0..100). func (h *Histogram) ValueAtQuantile(q float64) int64 { if q > 100 { q = 100 } total := int64(0) countAtPercentile := int64(((q / 100) * float64(h.totalCount)) + 0.5) i := h.iterator() for i.next() { total += i.countAtIdx if total >= countAtPercentile { return h.highestEquivalentValue(i.valueFromIdx) } } return 0 } // CumulativeDistribution returns an ordered list of brackets of the // distribution of recorded values. func (h *Histogram) CumulativeDistribution() []Bracket { var result []Bracket i := h.pIterator(1) for i.next() { result = append(result, Bracket{ Quantile: i.percentile, Count: i.countToIdx, ValueAt: i.highestEquivalentValue, }) } return result } // SignificantFigures returns the significant figures used to create the // histogram func (h *Histogram) SignificantFigures() int64 { return h.significantFigures } // LowestTrackableValue returns the lower bound on values that will be added // to the histogram func (h *Histogram) LowestTrackableValue() int64 { return h.lowestTrackableValue } // HighestTrackableValue returns the upper bound on values that will be added // to the histogram func (h *Histogram) HighestTrackableValue() int64 { return h.highestTrackableValue } // Histogram bar for plotting type Bar struct { From, To, Count int64 } // Pretty print as csv for easy plotting func (b Bar) String() string { return fmt.Sprintf("%v, %v, %v\n", b.From, b.To, b.Count) } // Distribution returns an ordered list of bars of the // distribution of recorded values, counts can be normalized to a probability func (h *Histogram) Distribution() (result []Bar) { i := h.iterator() for i.next() { result = append(result, Bar{ Count: i.countAtIdx, From: h.lowestEquivalentValue(i.valueFromIdx), To: i.highestEquivalentValue, }) } return result } // Equals returns true if the two Histograms are equivalent, false if not. func (h *Histogram) Equals(other *Histogram) bool { switch { case h.lowestTrackableValue != other.lowestTrackableValue, h.highestTrackableValue != other.highestTrackableValue, h.unitMagnitude != other.unitMagnitude, h.significantFigures != other.significantFigures, h.subBucketHalfCountMagnitude != other.subBucketHalfCountMagnitude, h.subBucketHalfCount != other.subBucketHalfCount, h.subBucketMask != other.subBucketMask, h.subBucketCount != other.subBucketCount, h.bucketCount != other.bucketCount, h.countsLen != other.countsLen, h.totalCount != other.totalCount: return false default: for i, c := range h.counts { if c != other.counts[i] { return false } } } return true } // Export returns a snapshot view of the Histogram. This can be later passed to // Import to construct a new Histogram with the same state. func (h *Histogram) Export() *Snapshot { return &Snapshot{ LowestTrackableValue: h.lowestTrackableValue, HighestTrackableValue: h.highestTrackableValue, SignificantFigures: h.significantFigures, Counts: append([]int64(nil), h.counts...), // copy } } // Import returns a new Histogram populated from the Snapshot data (which the // caller must stop accessing). func Import(s *Snapshot) *Histogram { h := New(s.LowestTrackableValue, s.HighestTrackableValue, int(s.SignificantFigures)) h.counts = s.Counts totalCount := int64(0) for i := int32(0); i < h.countsLen; i++ { countAtIndex := h.counts[i] if countAtIndex > 0 { totalCount += countAtIndex } } h.totalCount = totalCount return h } func (h *Histogram) iterator() *iterator { return &iterator{ h: h, subBucketIdx: -1, } } func (h *Histogram) rIterator() *rIterator { return &rIterator{ iterator: iterator{ h: h, subBucketIdx: -1, }, } } func (h *Histogram) pIterator(ticksPerHalfDistance int32) *pIterator { return &pIterator{ iterator: iterator{ h: h, subBucketIdx: -1, }, ticksPerHalfDistance: ticksPerHalfDistance, } } func (h *Histogram) sizeOfEquivalentValueRange(v int64) int64 { bucketIdx := h.getBucketIndex(v) subBucketIdx := h.getSubBucketIdx(v, bucketIdx) adjustedBucket := bucketIdx if subBucketIdx >= h.subBucketCount { adjustedBucket++ } return int64(1) << uint(h.unitMagnitude+int64(adjustedBucket)) } func (h *Histogram) valueFromIndex(bucketIdx, subBucketIdx int32) int64 { return int64(subBucketIdx) << uint(int64(bucketIdx)+h.unitMagnitude) } func (h *Histogram) lowestEquivalentValue(v int64) int64 { bucketIdx := h.getBucketIndex(v) subBucketIdx := h.getSubBucketIdx(v, bucketIdx) return h.valueFromIndex(bucketIdx, subBucketIdx) } func (h *Histogram) nextNonEquivalentValue(v int64) int64 { return h.lowestEquivalentValue(v) + h.sizeOfEquivalentValueRange(v) } func (h *Histogram) highestEquivalentValue(v int64) int64 { return h.nextNonEquivalentValue(v) - 1 } func (h *Histogram) medianEquivalentValue(v int64) int64 { return h.lowestEquivalentValue(v) + (h.sizeOfEquivalentValueRange(v) >> 1) } func (h *Histogram) getCountAtIndex(bucketIdx, subBucketIdx int32) int64 { return h.counts[h.countsIndex(bucketIdx, subBucketIdx)] } func (h *Histogram) countsIndex(bucketIdx, subBucketIdx int32) int32 { bucketBaseIdx := (bucketIdx + 1) << uint(h.subBucketHalfCountMagnitude) offsetInBucket := subBucketIdx - h.subBucketHalfCount return bucketBaseIdx + offsetInBucket } func (h *Histogram) getBucketIndex(v int64) int32 { pow2Ceiling := bitLen(v | h.subBucketMask) return int32(pow2Ceiling - int64(h.unitMagnitude) - int64(h.subBucketHalfCountMagnitude+1)) } func (h *Histogram) getSubBucketIdx(v int64, idx int32) int32 { return int32(v >> uint(int64(idx)+int64(h.unitMagnitude))) } func (h *Histogram) countsIndexFor(v int64) int { bucketIdx := h.getBucketIndex(v) subBucketIdx := h.getSubBucketIdx(v, bucketIdx) return int(h.countsIndex(bucketIdx, subBucketIdx)) } type iterator struct { h *Histogram bucketIdx, subBucketIdx int32 countAtIdx, countToIdx, valueFromIdx int64 highestEquivalentValue int64 } func (i *iterator) next() bool { if i.countToIdx >= i.h.totalCount { return false } // increment bucket i.subBucketIdx++ if i.subBucketIdx >= i.h.subBucketCount { i.subBucketIdx = i.h.subBucketHalfCount i.bucketIdx++ } if i.bucketIdx >= i.h.bucketCount { return false } i.countAtIdx = i.h.getCountAtIndex(i.bucketIdx, i.subBucketIdx) i.countToIdx += i.countAtIdx i.valueFromIdx = i.h.valueFromIndex(i.bucketIdx, i.subBucketIdx) i.highestEquivalentValue = i.h.highestEquivalentValue(i.valueFromIdx) return true } type rIterator struct { iterator countAddedThisStep int64 } func (r *rIterator) next() bool { for r.iterator.next() { if r.countAtIdx != 0 { r.countAddedThisStep = r.countAtIdx return true } } return false } type pIterator struct { iterator seenLastValue bool ticksPerHalfDistance int32 percentileToIteratorTo float64 percentile float64 } func (p *pIterator) next() bool { if !(p.countToIdx < p.h.totalCount) { if p.seenLastValue { return false } p.seenLastValue = true p.percentile = 100 return true } if p.subBucketIdx == -1 && !p.iterator.next() { return false } var done = false for !done { currentPercentile := (100.0 * float64(p.countToIdx)) / float64(p.h.totalCount) if p.countAtIdx != 0 && p.percentileToIteratorTo <= currentPercentile { p.percentile = p.percentileToIteratorTo halfDistance := math.Trunc(math.Pow(2, math.Trunc(math.Log2(100.0/(100.0-p.percentileToIteratorTo)))+1)) percentileReportingTicks := float64(p.ticksPerHalfDistance) * halfDistance p.percentileToIteratorTo += 100.0 / percentileReportingTicks return true } done = !p.iterator.next() } return true } func bitLen(x int64) (n int64) { for ; x >= 0x8000; x >>= 16 { n += 16 } if x >= 0x80 { x >>= 8 n += 8 } if x >= 0x8 { x >>= 4 n += 4 } if x >= 0x2 { x >>= 2 n += 2 } if x >= 0x1 { n++ } return }