//go:build windows
// +build windows

package hcsoci

import (
	"context"
	"errors"
	"fmt"
	"math"
	"path/filepath"
	"regexp"
	"strconv"
	"strings"

	"github.com/Microsoft/go-winio/pkg/fs"
	specs "github.com/opencontainers/runtime-spec/specs-go"
	"github.com/sirupsen/logrus"

	"github.com/Microsoft/hcsshim/internal/guestpath"
	"github.com/Microsoft/hcsshim/internal/hcs/schema1"
	hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2"
	"github.com/Microsoft/hcsshim/internal/layers"
	"github.com/Microsoft/hcsshim/internal/log"
	"github.com/Microsoft/hcsshim/internal/oci"
	"github.com/Microsoft/hcsshim/internal/processorinfo"
	"github.com/Microsoft/hcsshim/internal/uvm"
	"github.com/Microsoft/hcsshim/internal/uvmfolder"
	"github.com/Microsoft/hcsshim/internal/wclayer"
	"github.com/Microsoft/hcsshim/osversion"
	"github.com/Microsoft/hcsshim/pkg/annotations"
)

const createContainerSubdirectoryForProcessDumpSuffix = "{container_id}"

// A simple wrapper struct around the container mount configs that should be added to the
// container.
type mountsConfig struct {
	mdsv1 []schema1.MappedDir
	mpsv1 []schema1.MappedPipe
	mdsv2 []hcsschema.MappedDirectory
	mpsv2 []hcsschema.MappedPipe
}

func createMountsConfig(ctx context.Context, coi *createOptionsInternal) (*mountsConfig, error) {
	// Add the mounts as mapped directories or mapped pipes
	// TODO: Mapped pipes to add in v2 schema.
	var config mountsConfig
	for _, mount := range coi.Spec.Mounts {
		if uvm.IsPipe(mount.Source) {
			src, dst := uvm.GetContainerPipeMapping(coi.HostingSystem, mount)
			config.mpsv1 = append(config.mpsv1, schema1.MappedPipe{HostPath: src, ContainerPipeName: dst})
			config.mpsv2 = append(config.mpsv2, hcsschema.MappedPipe{HostPath: src, ContainerPipeName: dst})
		} else {
			readOnly := false
			for _, o := range mount.Options {
				if strings.ToLower(o) == "ro" {
					readOnly = true
				}
			}
			mdv1 := schema1.MappedDir{HostPath: mount.Source, ContainerPath: mount.Destination, ReadOnly: readOnly}
			mdv2 := hcsschema.MappedDirectory{ContainerPath: mount.Destination, ReadOnly: readOnly}
			if coi.HostingSystem == nil {
				// HCS has a bug where it does not correctly resolve file (not dir) paths
				// if the path includes a symlink. Therefore, we resolve the path here before
				// passing it in. The issue does not occur with VSMB, so don't need to worry
				// about the isolated case.
				src, err := fs.ResolvePath(mount.Source)
				if err != nil {
					return nil, fmt.Errorf("failed to resolve path for mount source %q: %s", mount.Source, err)
				}
				mdv2.HostPath = src
			} else if mount.Type == MountTypeVirtualDisk || mount.Type == MountTypePhysicalDisk || mount.Type == MountTypeExtensibleVirtualDisk {
				// For v2 schema containers, any disk mounts will be part of coi.additionalMounts.
				// For v1 schema containers, we don't even get here, since there is no HostingSystem.
				continue
			} else if strings.HasPrefix(mount.Source, guestpath.SandboxMountPrefix) {
				// Convert to the path in the guest that was asked for.
				mdv2.HostPath = convertToWCOWSandboxMountPath(mount.Source)
			} else {
				// vsmb mount
				uvmPath, err := coi.HostingSystem.GetVSMBUvmPath(ctx, mount.Source, readOnly)
				if err != nil {
					return nil, err
				}
				mdv2.HostPath = uvmPath
			}
			config.mdsv1 = append(config.mdsv1, mdv1)
			config.mdsv2 = append(config.mdsv2, mdv2)
		}
	}
	config.mdsv2 = append(config.mdsv2, coi.windowsAdditionalMounts...)
	return &config, nil
}

// ConvertCPULimits handles the logic of converting and validating the containers CPU limits
// specified in the OCI spec to what HCS expects.
//
// `cid` is the container's ID.
//
// `vmid` is the Utility VM's ID if the container we're constructing is going to belong to
// one.
//
// `spec` is the OCI spec for the container.
//
// `maxCPUCount` is the maximum cpu count allowed for the container. This value should
// be the number of processors on the host, or in the case of a hypervisor isolated container
// the number of processors assigned to the guest/Utility VM.
//
// Returns the cpu count, cpu limit, and cpu weight in this order. Returns an error if more than one of
// cpu count, cpu limit, or cpu weight was specified in the OCI spec as they are mutually
// exclusive.
func ConvertCPULimits(ctx context.Context, cid string, spec *specs.Spec, maxCPUCount int32) (int32, int32, int32, error) {
	cpuNumSet := 0
	cpuCount := oci.ParseAnnotationsCPUCount(ctx, spec, annotations.ContainerProcessorCount, 0)
	if cpuCount > 0 {
		cpuNumSet++
	}

	cpuLimit := oci.ParseAnnotationsCPULimit(ctx, spec, annotations.ContainerProcessorLimit, 0)
	if cpuLimit > 0 {
		cpuNumSet++
	}

	cpuWeight := oci.ParseAnnotationsCPUWeight(ctx, spec, annotations.ContainerProcessorWeight, 0)
	if cpuWeight > 0 {
		cpuNumSet++
	}

	if cpuNumSet > 1 {
		return 0, 0, 0, fmt.Errorf("invalid spec - Windows Container CPU Count: '%d', Limit: '%d', and Weight: '%d' are mutually exclusive", cpuCount, cpuLimit, cpuWeight)
	} else if cpuNumSet == 1 {
		cpuCount = NormalizeProcessorCount(ctx, cid, cpuCount, maxCPUCount)
	}
	return cpuCount, cpuLimit, cpuWeight, nil
}

// createWindowsContainerDocument creates documents for passing to HCS or GCS to create
// a container, both hosted and process isolated. It creates both v1 and v2
// container objects, WCOW only. The containers storage should have been mounted already.
func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInternal) (*schema1.ContainerConfig, *hcsschema.Container, error) {
	log.G(ctx).Debug("hcsshim: CreateHCSContainerDocument")
	// TODO: Make this safe if exported so no null pointer dereferences.

	if coi.Spec == nil {
		return nil, nil, fmt.Errorf("cannot create HCS container document - OCI spec is missing")
	}

	if coi.Spec.Windows == nil {
		return nil, nil, fmt.Errorf("cannot create HCS container document - OCI spec Windows section is missing ")
	}

	v1 := &schema1.ContainerConfig{
		SystemType:              "Container",
		Name:                    coi.actualID,
		Owner:                   coi.actualOwner,
		HvPartition:             false,
		IgnoreFlushesDuringBoot: coi.Spec.Windows.IgnoreFlushesDuringBoot,
	}

	// IgnoreFlushesDuringBoot is a property of the SCSI attachment for the scratch. Set when it's hot-added to the utility VM
	// ID is a property on the create call in V2 rather than part of the schema.
	v2Container := &hcsschema.Container{Storage: &hcsschema.Storage{}}

	// TODO: Still want to revisit this.
	if coi.Spec.Windows.LayerFolders == nil || len(coi.Spec.Windows.LayerFolders) < 2 {
		return nil, nil, fmt.Errorf("invalid spec - not enough layer folders supplied")
	}

	if coi.Spec.Hostname != "" {
		v1.HostName = coi.Spec.Hostname
		v2Container.GuestOs = &hcsschema.GuestOs{HostName: coi.Spec.Hostname}
	}

	var (
		uvmCPUCount  int32
		hostCPUCount = processorinfo.ProcessorCount()
		maxCPUCount  = hostCPUCount
	)

	if coi.HostingSystem != nil {
		uvmCPUCount = coi.HostingSystem.ProcessorCount()
		maxCPUCount = uvmCPUCount
	}

	cpuCount, cpuLimit, cpuWeight, err := ConvertCPULimits(ctx, coi.ID, coi.Spec, maxCPUCount)
	if err != nil {
		return nil, nil, err
	}

	if coi.HostingSystem != nil && coi.ScaleCPULimitsToSandbox && cpuLimit > 0 {
		// When ScaleCPULimitsToSandbox is set and we are running in a UVM, we assume
		// the CPU limit has been calculated based on the number of processors on the
		// host, and instead re-calculate it based on the number of processors in the UVM.
		//
		// This is needed to work correctly with assumptions kubelet makes when computing
		// the CPU limit value:
		// - kubelet thinks about CPU limits in terms of millicores, which are 1000ths of
		//   cores. So if 2000 millicores are assigned, the container can use 2 processors.
		// - In Windows, the job object CPU limit is global across all processors on the
		//   system, and is represented as a fraction out of 10000. In this model, a limit
		//   of 10000 means the container can use all processors fully, regardless of how
		//   many processors exist on the system.
		// - To convert the millicores value into the job object limit, kubelet divides
		//   the millicores by the number of CPU cores on the host. This causes problems
		//   when running inside a UVM, as the UVM may have a different number of processors
		//   than the host system.
		//
		// To work around this, we undo the division by the number of host processors, and
		// re-do the division based on the number of processors inside the UVM. This will
		// give the correct value based on the actual number of millicores that the kubelet
		// wants the container to have.
		//
		// Kubelet formula to compute CPU limit:
		// cpuMaximum := 10000 * cpuLimit.MilliValue() / int64(runtime.NumCPU()) / 1000
		newCPULimit := cpuLimit * hostCPUCount / uvmCPUCount
		// We only apply bounds here because we are calculating the CPU limit ourselves,
		// and this matches the kubelet behavior where they also bound the CPU limit by [1, 10000].
		// In the case where we use the value directly from the user, we don't alter it to fit
		// within the bounds, but just let the platform throw an error if it is invalid.
		if newCPULimit < 1 {
			newCPULimit = 1
		} else if newCPULimit > 10000 {
			newCPULimit = 10000
		}
		log.G(ctx).WithFields(logrus.Fields{
			"hostCPUCount": hostCPUCount,
			"uvmCPUCount":  uvmCPUCount,
			"oldCPULimit":  cpuLimit,
			"newCPULimit":  newCPULimit,
		}).Info("rescaling CPU limit for UVM sandbox")
		cpuLimit = newCPULimit
	}

	v1.ProcessorCount = uint32(cpuCount)
	v1.ProcessorMaximum = int64(cpuLimit)
	v1.ProcessorWeight = uint64(cpuWeight)

	v2Container.Processor = &hcsschema.Processor{
		Count:   cpuCount,
		Maximum: cpuLimit,
		Weight:  cpuWeight,
	}

	// Memory Resources
	memoryMaxInMB := oci.ParseAnnotationsMemory(ctx, coi.Spec, annotations.ContainerMemorySizeInMB, 0)
	if memoryMaxInMB > 0 {
		v1.MemoryMaximumInMB = int64(memoryMaxInMB)
		v2Container.Memory = &hcsschema.Memory{
			SizeInMB: memoryMaxInMB,
		}
	}

	// Storage Resources
	storageBandwidthMax := oci.ParseAnnotationsStorageBps(ctx, coi.Spec, annotations.ContainerStorageQoSBandwidthMaximum, 0)
	storageIopsMax := oci.ParseAnnotationsStorageIops(ctx, coi.Spec, annotations.ContainerStorageQoSIopsMaximum, 0)
	if storageBandwidthMax > 0 || storageIopsMax > 0 {
		v1.StorageBandwidthMaximum = uint64(storageBandwidthMax)
		v1.StorageIOPSMaximum = uint64(storageIopsMax)
		v2Container.Storage.QoS = &hcsschema.StorageQoS{
			BandwidthMaximum: storageBandwidthMax,
			IopsMaximum:      storageIopsMax,
		}
	}

	// TODO V2 networking. Only partial at the moment. v2.Container.Networking.Namespace specifically
	if coi.Spec.Windows.Network != nil {
		v2Container.Networking = &hcsschema.Networking{}

		v1.EndpointList = coi.Spec.Windows.Network.EndpointList

		v2Container.Networking.Namespace = coi.actualNetworkNamespace

		v1.AllowUnqualifiedDNSQuery = coi.Spec.Windows.Network.AllowUnqualifiedDNSQuery
		v2Container.Networking.AllowUnqualifiedDnsQuery = v1.AllowUnqualifiedDNSQuery

		if coi.Spec.Windows.Network.DNSSearchList != nil {
			v1.DNSSearchList = strings.Join(coi.Spec.Windows.Network.DNSSearchList, ",")
			v2Container.Networking.DnsSearchList = v1.DNSSearchList
		}

		v1.NetworkSharedContainerName = coi.Spec.Windows.Network.NetworkSharedContainerName
		v2Container.Networking.NetworkSharedContainerName = v1.NetworkSharedContainerName
	}

	if cs, ok := coi.Spec.Windows.CredentialSpec.(string); ok {
		v1.Credentials = cs
		// If this is a HCS v2 schema container, we created the CCG instance
		// with the other container resources. Pass the CCG state information
		// as part of the container document.
		if coi.ccgState != nil {
			v2Container.ContainerCredentialGuard = coi.ccgState
		}
	}

	if coi.Spec.Root == nil {
		return nil, nil, fmt.Errorf("spec is invalid - root isn't populated")
	}

	if coi.Spec.Root.Readonly {
		return nil, nil, fmt.Errorf(`invalid container spec - readonly is not supported for Windows containers`)
	}

	// Strip off the top-most RW/scratch layer as that's passed in separately to HCS for v1
	v1.LayerFolderPath = coi.Spec.Windows.LayerFolders[len(coi.Spec.Windows.LayerFolders)-1]

	if coi.isV2Argon() || coi.isV1Argon() {
		// Argon v1 or v2.
		const volumeGUIDRegex = `^\\\\\?\\(Volume)\{{0,1}[0-9a-fA-F]{8}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{12}(\}){0,1}\}(|\\)$`
		if matched, err := regexp.MatchString(volumeGUIDRegex, coi.Spec.Root.Path); !matched || err != nil {
			return nil, nil, fmt.Errorf(`invalid container spec - Root.Path '%s' must be a volume GUID path in the format '\\?\Volume{GUID}\'`, coi.Spec.Root.Path)
		}
		if coi.Spec.Root.Path[len(coi.Spec.Root.Path)-1] != '\\' {
			coi.Spec.Root.Path += `\` // Be nice to clients and make sure well-formed for back-compat
		}
		v1.VolumePath = coi.Spec.Root.Path[:len(coi.Spec.Root.Path)-1] // Strip the trailing backslash. Required for v1.
		v2Container.Storage.Path = coi.Spec.Root.Path
	} else if coi.isV1Xenon() {
		// V1 Xenon
		v1.HvPartition = true
		if coi.Spec == nil || coi.Spec.Windows == nil || coi.Spec.Windows.HyperV == nil { // Be resilient to nil de-reference
			return nil, nil, fmt.Errorf(`invalid container spec - Spec.Windows.HyperV is nil`)
		}
		if coi.Spec.Windows.HyperV.UtilityVMPath != "" {
			// Client-supplied utility VM path
			v1.HvRuntime = &schema1.HvRuntime{ImagePath: coi.Spec.Windows.HyperV.UtilityVMPath}
		} else {
			// Client was lazy. Let's locate it from the layer folders instead.
			uvmImagePath, err := uvmfolder.LocateUVMFolder(ctx, coi.Spec.Windows.LayerFolders)
			if err != nil {
				return nil, nil, err
			}
			v1.HvRuntime = &schema1.HvRuntime{ImagePath: filepath.Join(uvmImagePath, `UtilityVM`)}
		}
	} else if coi.isV2Xenon() {
		// Hosting system was supplied, so is v2 Xenon.
		v2Container.Storage.Path = coi.Spec.Root.Path
		if coi.HostingSystem.OS() == "windows" {
			layers, err := layers.GetHCSLayers(ctx, coi.HostingSystem, coi.Spec.Windows.LayerFolders[:len(coi.Spec.Windows.LayerFolders)-1])
			if err != nil {
				return nil, nil, err
			}
			v2Container.Storage.Layers = layers
		}
	}

	if coi.isV2Argon() || coi.isV1Argon() { // Argon v1 or v2
		for _, layerPath := range coi.Spec.Windows.LayerFolders[:len(coi.Spec.Windows.LayerFolders)-1] {
			layerID, err := wclayer.LayerID(ctx, layerPath)
			if err != nil {
				return nil, nil, err
			}
			v1.Layers = append(v1.Layers, schema1.Layer{ID: layerID.String(), Path: layerPath})
			v2Container.Storage.Layers = append(v2Container.Storage.Layers, hcsschema.Layer{Id: layerID.String(), Path: layerPath})
		}
	}

	mounts, err := createMountsConfig(ctx, coi)
	if err != nil {
		return nil, nil, err
	}
	v1.MappedDirectories = mounts.mdsv1
	v2Container.MappedDirectories = mounts.mdsv2
	if len(mounts.mpsv1) > 0 && osversion.Build() < osversion.RS3 {
		return nil, nil, fmt.Errorf("named pipe mounts are not supported on this version of Windows")
	}
	v1.MappedPipes = mounts.mpsv1
	v2Container.MappedPipes = mounts.mpsv2

	// add assigned devices to the container definition
	if err := parseAssignedDevices(ctx, coi, v2Container); err != nil {
		return nil, nil, err
	}

	// add any device extensions
	extensions, err := getDeviceExtensions(coi.Spec.Annotations)
	if err != nil {
		return nil, nil, err
	}
	v2Container.AdditionalDeviceNamespace = extensions

	// Process dump setup (if requested)
	dumpPath := ""
	if coi.HostingSystem != nil {
		dumpPath = coi.HostingSystem.ProcessDumpLocation()
	}

	if specDumpPath, ok := coi.Spec.Annotations[annotations.ContainerProcessDumpLocation]; ok {
		// If a process dump path was specified at pod creation time for a hypervisor isolated pod, then
		// use this value. If one was specified on the container creation document then override with this
		// instead. Unlike Linux, Windows containers can set the dump path on a per container basis.
		dumpPath = specDumpPath
	}

	// Servercore images block on signaling and wait until the target process
	// is terminated to return to its caller. By default, servercore waits for
	// 5 seconds (default value of 'WaitToKillServiceTimeout') before sending
	// a SIGKILL to terminate the process. This causes issues when graceful
	// termination of containers is requested (Bug36689012).
	// The regkey 'WaitToKillServiceTimeout' value is overridden here to help
	// honor graceful termination of containers by waiting for the requested
	// amount of time before stopping the container.
	// More details on the implementation of this fix can be found in the Kill()
	// function of exec_hcs.go

	// 'WaitToKillServiceTimeout' reg key value is arbitrarily chosen and set to a
	// value that is long enough that no one will want to wait longer
	registryAdd := []hcsschema.RegistryValue{
		{
			Key: &hcsschema.RegistryKey{
				Hive: "System",
				Name: "ControlSet001\\Control",
			},
			Name:        "WaitToKillServiceTimeout",
			StringValue: strconv.Itoa(math.MaxInt32),
			Type_:       "String",
		},
	}

	if dumpPath != "" {
		//  If dumpPath specified has createContainerSubdirectoryForProcessDumpSuffix substring
		// specified as a suffix, then create subdirectory for this container at the specified
		// dumpPath location. When a fileshare from the host is mounted to the specified dumpPath,
		// this behavior will help identify dumps coming from differnet containers in the pod.
		// Check for createContainerSubdirectoryForProcessDumpSuffix in lower case and upper case
		if strings.HasSuffix(dumpPath, createContainerSubdirectoryForProcessDumpSuffix) {
			// replace {container_id} with the actual container id
			dumpPath = strings.TrimSuffix(dumpPath, createContainerSubdirectoryForProcessDumpSuffix) + coi.ID
		} else if strings.HasSuffix(dumpPath, strings.ToUpper(createContainerSubdirectoryForProcessDumpSuffix)) {
			// replace {CONTAINER_ID} with the actual container id
			dumpPath = strings.TrimSuffix(dumpPath, strings.ToUpper(createContainerSubdirectoryForProcessDumpSuffix)) + coi.ID
		}
		dumpType, err := parseDumpType(coi.Spec.Annotations)
		if err != nil {
			return nil, nil, err
		}
		dumpCount, err := parseDumpCount(coi.Spec.Annotations)
		if err != nil {
			return nil, nil, err
		}

		// Setup WER registry keys for local process dump creation if specified.
		// https://docs.microsoft.com/en-us/windows/win32/wer/collecting-user-mode-dumps
		registryAdd = append(registryAdd, []hcsschema.RegistryValue{
			{
				Key: &hcsschema.RegistryKey{
					Hive: "Software",
					Name: "Microsoft\\Windows\\Windows Error Reporting\\LocalDumps",
				},
				Name:        "DumpFolder",
				StringValue: dumpPath,
				Type_:       "String",
			},
			{
				Key: &hcsschema.RegistryKey{
					Hive: "Software",
					Name: "Microsoft\\Windows\\Windows Error Reporting\\LocalDumps",
				},
				Name:       "DumpType",
				DWordValue: dumpType,
				Type_:      "DWord",
			},
			{
				Key: &hcsschema.RegistryKey{
					Hive: "Software",
					Name: "Microsoft\\Windows\\Windows Error Reporting\\LocalDumps",
				},
				Name:       "DumpCount",
				DWordValue: dumpCount,
				Type_:      "DWord",
			},
		}...)
	}

	v2Container.RegistryChanges = &hcsschema.RegistryChanges{
		AddValues: registryAdd,
	}
	return v1, v2Container, nil
}

// parseAssignedDevices parses assigned devices for the container definition
// this is currently supported for v2 argon and xenon only
func parseAssignedDevices(ctx context.Context, coi *createOptionsInternal, v2 *hcsschema.Container) error {
	if !coi.isV2Argon() && !coi.isV2Xenon() {
		return nil
	}

	v2AssignedDevices := []hcsschema.Device{}
	for _, d := range coi.Spec.Windows.Devices {
		v2Dev := hcsschema.Device{}
		switch d.IDType {
		case uvm.VPCILocationPathIDType:
			v2Dev.LocationPath = d.ID
			v2Dev.Type = hcsschema.DeviceInstanceID
		case uvm.VPCIClassGUIDTypeLegacy:
			v2Dev.InterfaceClassGuid = d.ID
		case uvm.VPCIClassGUIDType:
			v2Dev.InterfaceClassGuid = d.ID
		default:
			return fmt.Errorf("specified device %s has unsupported type %s", d.ID, d.IDType)
		}
		log.G(ctx).WithField("hcsv2 device", v2Dev).Debug("adding assigned device to container doc")
		v2AssignedDevices = append(v2AssignedDevices, v2Dev)
	}
	v2.AssignedDevices = v2AssignedDevices
	return nil
}

func parseDumpCount(annots map[string]string) (int32, error) {
	dmpCountStr := annots[annotations.WCOWProcessDumpCount]
	if dmpCountStr == "" {
		// If no count is specified, default of 10 is set.
		return 10, nil
	}

	dumpCount, err := strconv.Atoi(dmpCountStr)
	if err != nil {
		return -1, err
	}
	if dumpCount > 0 {
		return int32(dumpCount), nil
	}
	return -1, fmt.Errorf("invaid dump count specified: %v", dmpCountStr)
}

// parseDumpType parses the passed in string representation of the local user mode process dump type to the
// corresponding value the registry expects to be set.
//
// See DumpType at https://docs.microsoft.com/en-us/windows/win32/wer/collecting-user-mode-dumps for the mappings
func parseDumpType(annots map[string]string) (int32, error) {
	dmpTypeStr := annots[annotations.WCOWProcessDumpType]
	switch dmpTypeStr {
	case "":
		// If no type specified, default to full dumps.
		return 2, nil
	case "mini":
		return 1, nil
	case "full":
		return 2, nil
	default:
		return -1, errors.New(`unknown dump type specified, valid values are "mini" or "full"`)
	}
}