/*
 *
 * Copyright 2017 gRPC authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */

// Package stats tracks the statistics associated with benchmark runs.
package stats

import (
	"bytes"
	"fmt"
	"log"
	"math"
	"runtime"
	"sort"
	"strconv"
	"sync"
	"time"

	"google.golang.org/grpc"
)

// FeatureIndex is an enum for features that usually differ across individual
// benchmark runs in a single execution. These are usually configured by the
// user through command line flags.
type FeatureIndex int

// FeatureIndex enum values corresponding to individually settable features.
const (
	EnableTraceIndex FeatureIndex = iota
	ReadLatenciesIndex
	ReadKbpsIndex
	ReadMTUIndex
	MaxConcurrentCallsIndex
	ReqSizeBytesIndex
	RespSizeBytesIndex
	ReqPayloadCurveIndex
	RespPayloadCurveIndex
	CompModesIndex
	EnableChannelzIndex
	EnablePreloaderIndex

	// MaxFeatureIndex is a place holder to indicate the total number of feature
	// indices we have. Any new feature indices should be added above this.
	MaxFeatureIndex
)

// Features represent configured options for a specific benchmark run. This is
// usually constructed from command line arguments passed by the caller. See
// benchmark/benchmain/main.go for defined command line flags. This is also
// part of the BenchResults struct which is serialized and written to a file.
type Features struct {
	// Network mode used for this benchmark run. Could be one of Local, LAN, WAN
	// or Longhaul.
	NetworkMode string
	// UseBufCon indicates whether an in-memory connection was used for this
	// benchmark run instead of system network I/O.
	UseBufConn bool
	// EnableKeepalive indicates if keepalives were enabled on the connections
	// used in this benchmark run.
	EnableKeepalive bool
	// BenchTime indicates the duration of the benchmark run.
	BenchTime time.Duration

	// Features defined above are usually the same for all benchmark runs in a
	// particular invocation, while the features defined below could vary from
	// run to run based on the configured command line. These features have a
	// corresponding featureIndex value which is used for a variety of reasons.

	// EnableTrace indicates if tracing was enabled.
	EnableTrace bool
	// Latency is the simulated one-way network latency used.
	Latency time.Duration
	// Kbps is the simulated network throughput used.
	Kbps int
	// MTU is the simulated network MTU used.
	MTU int
	// MaxConcurrentCalls is the number of concurrent RPCs made during this
	// benchmark run.
	MaxConcurrentCalls int
	// ReqSizeBytes is the request size in bytes used in this benchmark run.
	// Unused if ReqPayloadCurve is non-nil.
	ReqSizeBytes int
	// RespSizeBytes is the response size in bytes used in this benchmark run.
	// Unused if RespPayloadCurve is non-nil.
	RespSizeBytes int
	// ReqPayloadCurve is a histogram representing the shape a random
	// distribution request payloads should take.
	ReqPayloadCurve *PayloadCurve
	// RespPayloadCurve is a histogram representing the shape a random
	// distribution request payloads should take.
	RespPayloadCurve *PayloadCurve
	// ModeCompressor represents the compressor mode used.
	ModeCompressor string
	// EnableChannelz indicates if channelz was turned on.
	EnableChannelz bool
	// EnablePreloader indicates if preloading was turned on.
	EnablePreloader bool
}

// String returns all the feature values as a string.
func (f Features) String() string {
	var reqPayloadString, respPayloadString string
	if f.ReqPayloadCurve != nil {
		reqPayloadString = fmt.Sprintf("reqPayloadCurve_%s", f.ReqPayloadCurve.ShortHash())
	} else {
		reqPayloadString = fmt.Sprintf("reqSize_%vB", f.ReqSizeBytes)
	}
	if f.RespPayloadCurve != nil {
		respPayloadString = fmt.Sprintf("respPayloadCurve_%s", f.RespPayloadCurve.ShortHash())
	} else {
		respPayloadString = fmt.Sprintf("respSize_%vB", f.RespSizeBytes)
	}
	return fmt.Sprintf("networkMode_%v-bufConn_%v-keepalive_%v-benchTime_%v-"+
		"trace_%v-latency_%v-kbps_%v-MTU_%v-maxConcurrentCalls_%v-%s-%s-"+
		"compressor_%v-channelz_%v-preloader_%v",
		f.NetworkMode, f.UseBufConn, f.EnableKeepalive, f.BenchTime, f.EnableTrace,
		f.Latency, f.Kbps, f.MTU, f.MaxConcurrentCalls, reqPayloadString,
		respPayloadString, f.ModeCompressor, f.EnableChannelz, f.EnablePreloader)
}

// SharedFeatures returns the shared features as a pretty printable string.
// 'wantFeatures' is a bitmask of wanted features, indexed by FeaturesIndex.
func (f Features) SharedFeatures(wantFeatures []bool) string {
	var b bytes.Buffer
	if f.NetworkMode != "" {
		b.WriteString(fmt.Sprintf("Network: %v\n", f.NetworkMode))
	}
	if f.UseBufConn {
		b.WriteString(fmt.Sprintf("UseBufConn: %v\n", f.UseBufConn))
	}
	if f.EnableKeepalive {
		b.WriteString(fmt.Sprintf("EnableKeepalive: %v\n", f.EnableKeepalive))
	}
	b.WriteString(fmt.Sprintf("BenchTime: %v\n", f.BenchTime))
	f.partialString(&b, wantFeatures, ": ", "\n")
	return b.String()
}

// PrintableName returns a one line name which includes the features specified
// by 'wantFeatures' which is a bitmask of wanted features, indexed by
// FeaturesIndex.
func (f Features) PrintableName(wantFeatures []bool) string {
	var b bytes.Buffer
	f.partialString(&b, wantFeatures, "_", "-")
	return b.String()
}

// partialString writes features specified by 'wantFeatures' to the provided
// bytes.Buffer.
func (f Features) partialString(b *bytes.Buffer, wantFeatures []bool, sep, delim string) {
	for i, sf := range wantFeatures {
		if sf {
			switch FeatureIndex(i) {
			case EnableTraceIndex:
				b.WriteString(fmt.Sprintf("Trace%v%v%v", sep, f.EnableTrace, delim))
			case ReadLatenciesIndex:
				b.WriteString(fmt.Sprintf("Latency%v%v%v", sep, f.Latency, delim))
			case ReadKbpsIndex:
				b.WriteString(fmt.Sprintf("Kbps%v%v%v", sep, f.Kbps, delim))
			case ReadMTUIndex:
				b.WriteString(fmt.Sprintf("MTU%v%v%v", sep, f.MTU, delim))
			case MaxConcurrentCallsIndex:
				b.WriteString(fmt.Sprintf("Callers%v%v%v", sep, f.MaxConcurrentCalls, delim))
			case ReqSizeBytesIndex:
				b.WriteString(fmt.Sprintf("ReqSize%v%vB%v", sep, f.ReqSizeBytes, delim))
			case RespSizeBytesIndex:
				b.WriteString(fmt.Sprintf("RespSize%v%vB%v", sep, f.RespSizeBytes, delim))
			case ReqPayloadCurveIndex:
				if f.ReqPayloadCurve != nil {
					b.WriteString(fmt.Sprintf("ReqPayloadCurve%vSHA-256:%v%v", sep, f.ReqPayloadCurve.Hash(), delim))
				}
			case RespPayloadCurveIndex:
				if f.RespPayloadCurve != nil {
					b.WriteString(fmt.Sprintf("RespPayloadCurve%vSHA-256:%v%v", sep, f.RespPayloadCurve.Hash(), delim))
				}
			case CompModesIndex:
				b.WriteString(fmt.Sprintf("Compressor%v%v%v", sep, f.ModeCompressor, delim))
			case EnableChannelzIndex:
				b.WriteString(fmt.Sprintf("Channelz%v%v%v", sep, f.EnableChannelz, delim))
			case EnablePreloaderIndex:
				b.WriteString(fmt.Sprintf("Preloader%v%v%v", sep, f.EnablePreloader, delim))
			default:
				log.Fatalf("Unknown feature index %v. maxFeatureIndex is %v", i, MaxFeatureIndex)
			}
		}
	}
}

// BenchResults records features and results of a benchmark run. A collection
// of these structs is usually serialized and written to a file after a
// benchmark execution, and could later be read for pretty-printing or
// comparison with other benchmark results.
type BenchResults struct {
	// GoVersion is the version of the compiler the benchmark was compiled with.
	GoVersion string
	// GrpcVersion is the gRPC version being benchmarked.
	GrpcVersion string
	// RunMode is the workload mode for this benchmark run. This could be unary,
	// stream or unconstrained.
	RunMode string
	// Features represents the configured feature options for this run.
	Features Features
	// SharedFeatures represents the features which were shared across all
	// benchmark runs during one execution. It is a slice indexed by
	// 'FeaturesIndex' and a value of true indicates that the associated
	// feature is shared across all runs.
	SharedFeatures []bool
	// Data contains the statistical data of interest from the benchmark run.
	Data RunData
}

// RunData contains statistical data of interest from a benchmark run.
type RunData struct {
	// TotalOps is the number of operations executed during this benchmark run.
	// Only makes sense for unary and streaming workloads.
	TotalOps uint64
	// SendOps is the number of send operations executed during this benchmark
	// run. Only makes sense for unconstrained workloads.
	SendOps uint64
	// RecvOps is the number of receive operations executed during this benchmark
	// run. Only makes sense for unconstrained workloads.
	RecvOps uint64
	// AllocedBytes is the average memory allocation in bytes per operation.
	AllocedBytes float64
	// Allocs is the average number of memory allocations per operation.
	Allocs float64
	// ReqT is the average request throughput associated with this run.
	ReqT float64
	// RespT is the average response throughput associated with this run.
	RespT float64

	// We store different latencies associated with each run. These latencies are
	// only computed for unary and stream workloads as they are not very useful
	// for unconstrained workloads.

	// Fiftieth is the 50th percentile latency.
	Fiftieth time.Duration
	// Ninetieth is the 90th percentile latency.
	Ninetieth time.Duration
	// Ninetyninth is the 99th percentile latency.
	NinetyNinth time.Duration
	// Average is the average latency.
	Average time.Duration
}

type durationSlice []time.Duration

func (a durationSlice) Len() int           { return len(a) }
func (a durationSlice) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
func (a durationSlice) Less(i, j int) bool { return a[i] < a[j] }

// Stats is a helper for gathering statistics about individual benchmark runs.
type Stats struct {
	mu         sync.Mutex
	numBuckets int
	hw         *histWrapper
	results    []BenchResults
	startMS    runtime.MemStats
	stopMS     runtime.MemStats
}

type histWrapper struct {
	unit      time.Duration
	histogram *Histogram
	durations durationSlice
}

// NewStats creates a new Stats instance. If numBuckets is not positive, the
// default value (16) will be used.
func NewStats(numBuckets int) *Stats {
	if numBuckets <= 0 {
		numBuckets = 16
	}
	// Use one more bucket for the last unbounded bucket.
	s := &Stats{numBuckets: numBuckets + 1}
	s.hw = &histWrapper{}
	return s
}

// StartRun is to be invoked to indicate the start of a new benchmark run.
func (s *Stats) StartRun(mode string, f Features, sf []bool) {
	s.mu.Lock()
	defer s.mu.Unlock()

	runtime.ReadMemStats(&s.startMS)
	s.results = append(s.results, BenchResults{
		GoVersion:      runtime.Version(),
		GrpcVersion:    grpc.Version,
		RunMode:        mode,
		Features:       f,
		SharedFeatures: sf,
	})
}

// EndRun is to be invoked to indicate the end of the ongoing benchmark run. It
// computes a bunch of stats and dumps them to stdout.
func (s *Stats) EndRun(count uint64) {
	s.mu.Lock()
	defer s.mu.Unlock()

	runtime.ReadMemStats(&s.stopMS)
	r := &s.results[len(s.results)-1]
	r.Data = RunData{
		TotalOps:     count,
		AllocedBytes: float64(s.stopMS.TotalAlloc-s.startMS.TotalAlloc) / float64(count),
		Allocs:       float64(s.stopMS.Mallocs-s.startMS.Mallocs) / float64(count),
		ReqT:         float64(count) * float64(r.Features.ReqSizeBytes) * 8 / r.Features.BenchTime.Seconds(),
		RespT:        float64(count) * float64(r.Features.RespSizeBytes) * 8 / r.Features.BenchTime.Seconds(),
	}
	s.computeLatencies(r)
	s.dump(r)
	s.hw = &histWrapper{}
}

// EndUnconstrainedRun is similar to EndRun, but is to be used for
// unconstrained workloads.
func (s *Stats) EndUnconstrainedRun(req uint64, resp uint64) {
	s.mu.Lock()
	defer s.mu.Unlock()

	runtime.ReadMemStats(&s.stopMS)
	r := &s.results[len(s.results)-1]
	r.Data = RunData{
		SendOps:      req,
		RecvOps:      resp,
		AllocedBytes: float64(s.stopMS.TotalAlloc-s.startMS.TotalAlloc) / float64((req+resp)/2),
		Allocs:       float64(s.stopMS.Mallocs-s.startMS.Mallocs) / float64((req+resp)/2),
		ReqT:         float64(req) * float64(r.Features.ReqSizeBytes) * 8 / r.Features.BenchTime.Seconds(),
		RespT:        float64(resp) * float64(r.Features.RespSizeBytes) * 8 / r.Features.BenchTime.Seconds(),
	}
	s.computeLatencies(r)
	s.dump(r)
	s.hw = &histWrapper{}
}

// AddDuration adds an elapsed duration per operation to the stats. This is
// used by unary and stream modes where request and response stats are equal.
func (s *Stats) AddDuration(d time.Duration) {
	s.mu.Lock()
	defer s.mu.Unlock()

	s.hw.durations = append(s.hw.durations, d)
}

// GetResults returns the results from all benchmark runs.
func (s *Stats) GetResults() []BenchResults {
	s.mu.Lock()
	defer s.mu.Unlock()

	return s.results
}

// computeLatencies computes percentile latencies based on durations stored in
// the stats object and updates the corresponding fields in the result object.
func (s *Stats) computeLatencies(result *BenchResults) {
	if len(s.hw.durations) == 0 {
		return
	}
	sort.Sort(s.hw.durations)
	minDuration := int64(s.hw.durations[0])
	maxDuration := int64(s.hw.durations[len(s.hw.durations)-1])

	// Use the largest unit that can represent the minimum time duration.
	s.hw.unit = time.Nanosecond
	for _, u := range []time.Duration{time.Microsecond, time.Millisecond, time.Second} {
		if minDuration <= int64(u) {
			break
		}
		s.hw.unit = u
	}

	numBuckets := s.numBuckets
	if n := int(maxDuration - minDuration + 1); n < numBuckets {
		numBuckets = n
	}
	s.hw.histogram = NewHistogram(HistogramOptions{
		NumBuckets: numBuckets,
		// max-min(lower bound of last bucket) = (1 + growthFactor)^(numBuckets-2) * baseBucketSize.
		GrowthFactor:   math.Pow(float64(maxDuration-minDuration), 1/float64(numBuckets-2)) - 1,
		BaseBucketSize: 1.0,
		MinValue:       minDuration,
	})
	for _, d := range s.hw.durations {
		s.hw.histogram.Add(int64(d))
	}
	result.Data.Fiftieth = s.hw.durations[max(s.hw.histogram.Count*int64(50)/100-1, 0)]
	result.Data.Ninetieth = s.hw.durations[max(s.hw.histogram.Count*int64(90)/100-1, 0)]
	result.Data.NinetyNinth = s.hw.durations[max(s.hw.histogram.Count*int64(99)/100-1, 0)]
	result.Data.Average = time.Duration(float64(s.hw.histogram.Sum) / float64(s.hw.histogram.Count))
}

// dump returns a printable version.
func (s *Stats) dump(result *BenchResults) {
	var b bytes.Buffer

	// Go and gRPC version information.
	b.WriteString(fmt.Sprintf("%s/grpc%s\n", result.GoVersion, result.GrpcVersion))

	// This prints the run mode and all features of the bench on a line.
	b.WriteString(fmt.Sprintf("%s-%s:\n", result.RunMode, result.Features.String()))

	unit := s.hw.unit
	tUnit := fmt.Sprintf("%v", unit)[1:] // stores one of s, ms, μs, ns

	if l := result.Data.Fiftieth; l != 0 {
		b.WriteString(fmt.Sprintf("50_Latency: %s%s\t", strconv.FormatFloat(float64(l)/float64(unit), 'f', 4, 64), tUnit))
	}
	if l := result.Data.Ninetieth; l != 0 {
		b.WriteString(fmt.Sprintf("90_Latency: %s%s\t", strconv.FormatFloat(float64(l)/float64(unit), 'f', 4, 64), tUnit))
	}
	if l := result.Data.NinetyNinth; l != 0 {
		b.WriteString(fmt.Sprintf("99_Latency: %s%s\t", strconv.FormatFloat(float64(l)/float64(unit), 'f', 4, 64), tUnit))
	}
	if l := result.Data.Average; l != 0 {
		b.WriteString(fmt.Sprintf("Avg_Latency: %s%s\t", strconv.FormatFloat(float64(l)/float64(unit), 'f', 4, 64), tUnit))
	}
	b.WriteString(fmt.Sprintf("Bytes/op: %v\t", result.Data.AllocedBytes))
	b.WriteString(fmt.Sprintf("Allocs/op: %v\t\n", result.Data.Allocs))

	// This prints the histogram stats for the latency.
	if s.hw.histogram == nil {
		b.WriteString("Histogram (empty)\n")
	} else {
		b.WriteString(fmt.Sprintf("Histogram (unit: %s)\n", tUnit))
		s.hw.histogram.PrintWithUnit(&b, float64(unit))
	}

	// Print throughput data.
	req := result.Data.SendOps
	if req == 0 {
		req = result.Data.TotalOps
	}
	resp := result.Data.RecvOps
	if resp == 0 {
		resp = result.Data.TotalOps
	}
	b.WriteString(fmt.Sprintf("Number of requests:  %v\tRequest throughput:  %v bit/s\n", req, result.Data.ReqT))
	b.WriteString(fmt.Sprintf("Number of responses: %v\tResponse throughput: %v bit/s\n", resp, result.Data.RespT))
	fmt.Println(b.String())
}

func max(a, b int64) int64 {
	if a > b {
		return a
	}
	return b
}