/*
 * Copyright 2016-2019 The Last Pickle Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.cassandrareaper.service;

import java.io.File;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.Timer;
import java.util.TimerTask;
import java.util.UUID;
import java.util.concurrent.ThreadLocalRandom;

import org.joda.time.DateTime;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;

import io.cassandrareaper.AppContext;
import io.cassandrareaper.ReaperException;
import io.cassandrareaper.core.RepairRun;
import io.cassandrareaper.core.RepairRun.RunState;
import io.cassandrareaper.core.RepairSchedule;
import io.cassandrareaper.core.RepairUnit;
import io.cassandrareaper.service.RepairManager.CQLSyncer;
import io.cassandrareaper.storage.IDistributedStorage;
import io.cassandrareaper.storage.OpType;

public final class SchedulingManager extends TimerTask {

  private static final Logger LOG = LoggerFactory.getLogger(SchedulingManager.class);

  private final AppContext context;
  private final RepairRunService repairRunService;

  /* nextActivatedSchedule used for nicer logging only */
  private RepairSchedule nextActivatedSchedule;

  private SchedulingManager(AppContext context) {
    this.context = context;
    this.repairRunService = RepairRunService.create(context);
  }

  public static SchedulingManager create(AppContext context) {
    return new SchedulingManager(context);
  }

  public void start() {
    LOG.info("Starting new SchedulingManager instance");
    Timer timer = new Timer("SchedulingManagerTimer");

    timer.schedule(
        this,
        ThreadLocalRandom.current().nextLong(1000, 2000),
        1000L * Integer.getInteger(SchedulingManager.class.getName() + ".period_seconds", 60));
  }

  public RepairSchedule pauseRepairSchedule(RepairSchedule schedule) {
    RepairSchedule updatedSchedule
        = schedule.with().state(RepairSchedule.State.PAUSED).pauseTime(DateTime.now()).build(schedule.getId());

    if (!context.storage.updateRepairSchedule(updatedSchedule)) {
      throw new IllegalStateException(String.format("failed updating repair schedule %s", updatedSchedule.getId()));
    }
    return updatedSchedule;
  }

  public RepairSchedule resumeRepairSchedule(RepairSchedule schedule) {
    RepairSchedule updatedSchedule
        = schedule.with().state(RepairSchedule.State.ACTIVE).pauseTime(null).build(schedule.getId());

    if (!context.storage.updateRepairSchedule(updatedSchedule)) {
      throw new IllegalStateException(String.format("failed updating repair schedule %s", updatedSchedule.getId()));
    }
    return updatedSchedule;
  }

  /**
   * Called regularly, do not block!
   */
  @Override
  public void run() {
    if (context.isRunning.get() && new File("/var/local/adc").exists()) {
      LOG.debug("Checking for repair schedules...");
      UUID lastId = null;
      try {
       RepairManager.currentlyScheduling = true;
        Collection<RepairSchedule> schedules = context.storage.getAllRepairSchedules();
        schedules = new ArrayList(schedules);
        Collections.sort((List) schedules);
        boolean anyRunStarted = false;
        if (((IDistributedStorage) context.storage).takeLead(getRoundedUuid(UUID.fromString(System.getProperty("reaper.outer.scheduling.lock", "e1fd2404-9d8f-11ec-b909-0242ac120002"))), 60)) {
               if (((IDistributedStorage) context.storage).takeLead(UUID.fromString(System.getProperty("reaper.inner.scheduling.lock", "e1fd2404-9d8f-11ec-b909-0242ac120003")), 60)) {
                       for (RepairSchedule schedule : schedules) {
                         lastId = schedule.getId();
                         // using a single hard-coded UUID so that we don't ever get more than a single guy in here at once trying to launch a repair schedule
                                 anyRunStarted = manageSchedule(schedule) || anyRunStarted;
                         }
                       if (!anyRunStarted && nextActivatedSchedule != null) {
                         LOG.debug(
                             "not scheduling new repairs yet, next activation is '{}' for schedule id '{}'",
                             nextActivatedSchedule.getNextActivation(),
                             nextActivatedSchedule.getId());
                       }
               } else {
                       LOG.warn("Got outer reaper lock but not inner lock");
               }
        }
      } catch (Throwable ex) {
        LOG.error("failed managing schedule for run with id: {}", lastId);
        LOG.error("catch exception", ex);
        try {
          assert false : "if assertions are enabled then exit the jvm";
        } catch (AssertionError ae) {
          if (context.isRunning.get()) {
            LOG.error("SchedulingManager failed. Exiting JVM.");
            System.exit(1);
          }
        }
      }
      finally {
         RepairManager.currentlyScheduling = false;
      }
    }
  }

  private static UUID getRoundedUuid(UUID uuid) {
         String uuidAsString = uuid.toString();
      String roundedTime = "" + SegmentRunner.getRoundedTime();
      roundedTime = roundedTime.substring(1);
      uuidAsString = uuidAsString.substring(0, uuidAsString.length()-roundedTime.length()) + roundedTime;
      return UUID.fromString(uuidAsString);
       }

  /**
   * Manage, i.e. check whether a new repair run should be started with this schedule.
   *
   * @param schedule The schedule to be checked for activation.
   * @return boolean indicating whether a new RepairRun instance was created and started.
   */
  private boolean manageSchedule(RepairSchedule schdle) {
         schdle = context.storage.getRepairSchedule(schdle.getId()).get();
         LOG.info("SchedulingManager.managerSchedule see " + schdle.getId() + " and " +
                         schdle.getState() + " and " + schdle.getNextActivation().isBeforeNow() + " and " + schdle.getNextActivation());
    switch (schdle.getState()) {
      case ACTIVE:
       // So the problem here is that Cassandra will not do incremental repairs if we have any nodes that have not yet
       // completed the upgrade.  So we would block starting the repair until at least this node was upgraded.  That seemed like
       // enough - under the theory that everyone's upgrade was in flight and the last one would land not too far after the first one
       // landed.  However, under some (most?) circumstances, Cassandra starting an anti-compaction will cause in flight upgrades to fail.
       // This means that launching a repair before everyone is ready resets their progress, and the system falls into a hole where
       // we cannot repair until we finish upgrades, but we cannot finish upgrade because we keep attempting to repair.  So now we will
       // see if any node thinks it has not upgraded, and if so, we'll skip launching.
       RepairUnit repairUnit = context.storage.getRepairUnit(schdle.getRepairUnitId());
       if (repairUnit.getIncrementalRepair()) {
               try {
                       String asStoredOps = ((IDistributedStorage) context.storage).listOperations(MetricsService.BROADCAST_CLUSTER,
                    OpType.OP_UPGRADE_DETECTED,
                    MetricsService.BROADCAST_HOST);
                       if (asStoredOps != null && asStoredOps.length() > 3) {
                       long timeLastUpgradeDetected = Long.parseLong(asStoredOps);
                       long durationSinceUpgradeDetected = System.currentTimeMillis() - timeLastUpgradeDetected;
                       long postUpgradeCoolingPeriod = Long.parseLong(System.getProperty("upgrade.no.launch.cooling.period", "" + 30*60000));

                               if (durationSinceUpgradeDetected < postUpgradeCoolingPeriod) {
                                       LOG.info("Not even considering running a repair based on upgrade detected " + durationSinceUpgradeDetected  + " ago");
                                       return false;
                               }
                       }
               } catch (Exception e) {
                       LOG.error("Confusion on whether anyone is upgrading", e);
                       // lets not launch if there is confusion - we will try again soon
                       return false;
               }
       }

        if (schdle.getNextActivation().isBeforeNow() ||
                       (new File("/var/lib/cassandra/repair_wanted").exists() &&
                                       repairUnit.getIncrementalRepair() &&
                                       "storagegrid".equals(repairUnit.getKeyspaceName()))) {

            LOG.info(
                "repair unit '{}' should be repaired based on RepairSchedule with id '{}'",
                schdle.getRepairUnitId(),
                schdle.getId());

               // OK - At this point we see that someone wants to run.  Lets see if there is
               // another repair already running, in which case, we can't launch this one.
               if (context.storage.getRepairRunsWithState(RunState.RUNNING).size() > 0) {
                       LOG.info("Want to launch repair from schedule " + schdle.getId() + " but already see RUNNING repair.  Returning");
                       return false;
               }

          RepairSchedule schedule
              = schdle.with().nextActivation(schdle.getFollowingActivation()).build(schdle.getId());

          context.storage.updateRepairSchedule(schedule);
          LOG.info("repair unit '{}' updated with next activation",
                  schdle.getRepairUnitId(),
                  schdle.getNextActivation());

          if (repairRunAlreadyScheduled(schedule, repairUnit)) {
                 LOG.info("repair run was already scheduled - returning");
                 return false;
          }

          try {
            RepairRun newRepairRun = createNewRunForUnit(schedule, repairUnit);

            ImmutableList<UUID> newRunHistory
                = new ImmutableList.Builder<UUID>()
                    .addAll(schedule.getRunHistory())
                    .add(newRepairRun.getId())
                    .build();

            RepairSchedule latestSchedule
                = context.storage.getRepairSchedule(schedule.getId()).get();

            if (equal(schedule, latestSchedule)) {

              // always overwrites and returns true. see following FIXMEs
              boolean result
                  = context.storage.updateRepairSchedule(
                      schedule.with().runHistory(newRunHistory).build(schedule.getId()));

              // FIXME â€“ concurrency is broken unless we atomically add/remove run history items
              // boolean result = context.storage
              //        .addRepairRunToRepairSchedule(schedule.getId(), newRepairRun.getId());

              if (result) {
                context.repairManager.startRepairRun(newRepairRun);
                RepairUnit ru = context.storage.getRepairUnit(schedule.getRepairUnitId());
                CQLSyncer.getInstance().startOnRepairRun(ru.getKeyspaceName(), newRepairRun, context);
                return true;
              }
              // this duplicated repair_run needs to be removed from the schedule's history
              // FIXME â€“ concurrency is broken unless we atomically add/remove run history items
              // boolean result = context.storage
              //        .deleteRepairRunFromRepairSchedule(schedule.getId(), newRepairRun.getId());
            } else if (schedule.getRunHistory().size() < latestSchedule.getRunHistory().size()) {
              // newRepairRun is identified as a duplicate (for this schedule and activation time)
              UUID latestRepairRun = latestSchedule.getRunHistory().get(latestSchedule.getRunHistory().size() - 1);

              LOG.info(
                  "schedule {} has already added a new repair run {}",
                  schedule.getId(),
                  latestRepairRun);

              // mark the newly created repair run as an error
              context.storage.updateRepairRun(
                  newRepairRun.with()
                      .startTime(null != newRepairRun.getStartTime() ? newRepairRun.getStartTime() : DateTime.now())
                      .endTime(DateTime.now())
                      .runState(RepairRun.RunState.ERROR)
                      .lastEvent("duplicate of " + latestRepairRun)
                      .build(newRepairRun.getId()));

              // take the identified last repair run, and try start it. it's ok if already running.
              newRepairRun = context.storage.getRepairRun(latestRepairRun).get();
              if (RepairRun.RunState.NOT_STARTED == newRepairRun.getRunState()) {
                context.repairManager.startRepairRun(newRepairRun);
              }
            } else {
              LOG.warn(
                  "schedule {} has been altered by someone else. not running repair",
                  schedule.getId());
            }
          } catch (ReaperException e) {
            LOG.error(e.getMessage(), e);
          }
        } else {
          if (nextActivatedSchedule == null
              || nextActivatedSchedule.getNextActivation().isAfter(schdle.getNextActivation())) {

            nextActivatedSchedule = schdle;
          }
        }
        break;
      case PAUSED:
        LOG.info("Repair schedule '{}' is paused", schdle.getId());
        return false;
      default:
        throw new AssertionError("illegal schedule state in call to manageSchedule(..): " + schdle.getState());
    }
    return false;
  }

  private static boolean equal(RepairSchedule s1, RepairSchedule s2) {
         try {
           Preconditions.checkArgument(s1.getId().equals(s2.getId()), "%s does not equal %s", s1.getId(), s2.getId());

           Preconditions.checkArgument(
               s1.getOwner().equals(s2.getOwner()),
               "%s does not equal %s",
               s1.getOwner(),
               s2.getOwner());

           Preconditions.checkArgument(
               s1.getDaysBetween() == s2.getDaysBetween(),
               "%s does not equal %s",
               s1.getDaysBetween(),
               s2.getDaysBetween());

           Preconditions.checkArgument(
               0.01d > Math.abs(s1.getIntensity() - s2.getIntensity()),
               "%s does not equal %s",
               s1.getIntensity(),
               s2.getIntensity());

           Preconditions.checkArgument(
               s1.getCreationTime().equals(s2.getCreationTime()),
               "%s does not equal %s",
               s1.getCreationTime(),
               s2.getCreationTime());

           Preconditions.checkArgument(
               s1.getNextActivation().equals(s2.getNextActivation()),
               "%s does not equal %s",
               s1.getNextActivation(),
               s2.getNextActivation());

           Preconditions.checkArgument(
               s1.getFollowingActivation().equals(s2.getFollowingActivation()),
               "%s does not equal %s",
               s1.getFollowingActivation(),
               s2.getFollowingActivation());

           boolean result = s1.getState().equals(s2.getState());
           result &= s1.getRunHistory().size() == s2.getRunHistory().size();

           for (int i = 0; result && i < s1.getRunHistory().size(); ++i) {
             result &= s1.getRunHistory().get(i).equals(s2.getRunHistory().get(i));
           }
           return result;
         } catch (Exception e) {
                 return false;
         }
  }

  private boolean repairRunAlreadyScheduled(RepairSchedule schedule, RepairUnit repairUnit) {
    Collection<RepairRun> repairRuns = context.storage.getRepairRunsForUnit(schedule.getRepairUnitId());
    for (RepairRun repairRun : repairRuns) {
      if (repairRunComesFromSchedule(repairRun, schedule)) {
        LOG.info(
            "there is repair (id {}) in state '{}' for repair unit '{}', "
            + "postponing current schedule trigger until next scheduling",
            repairRun.getId(),
            repairRun.getRunState(),
            repairUnit.getId());
        return true;
      }
    }
    return false;
  }

  private static boolean repairRunComesFromSchedule(RepairRun repairRun, RepairSchedule schedule) {
    return repairRun.getRunState().isActive()
        || (RepairRun.RunState.NOT_STARTED == repairRun.getRunState()
        && repairRun.getCause().equals(getCauseName(schedule)));
  }

  private RepairRun createNewRunForUnit(RepairSchedule schedule, RepairUnit repairUnit) throws ReaperException {

    return repairRunService.registerRepairRun(
        context.storage.getCluster(repairUnit.getClusterName()),
        repairUnit,
        Optional.of(getCauseName(schedule)),
        schedule.getOwner(),
        schedule.getSegmentCount(),
        schedule.getSegmentCountPerNode(),
        schedule.getRepairParallelism(),
        schedule.getIntensity());
  }

  private static String getCauseName(RepairSchedule schedule) {
         if ("true".equals(System.getProperty("reaper.leave.dangling.not.started.repairs", "false"))) {
                 return "scheduled run (schedule id " + schedule.getId().toString() + ')';
         } else {
                 return "auto-scheduled run (schedule id " + schedule.getId().toString() + ')';
         }
  }
}



