/*
 * Copyright 2015-2017 Spotify AB
 * Copyright 2016-2019 The Last Pickle Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.cassandrareaper.service;

import io.cassandrareaper.AppContext;
import io.cassandrareaper.ReaperException;
import io.cassandrareaper.core.RepairRun;
import io.cassandrareaper.core.RepairSegment;
import io.cassandrareaper.core.RepairUnit;
import io.cassandrareaper.jmx.ClusterFacade;
import io.cassandrareaper.jmx.JmxProxy;
import io.cassandrareaper.service.RepairManager.MetricsManager;
import io.cassandrareaper.storage.CassandraStorage;
import io.cassandrareaper.storage.IDistributedStorage;

import static com.datastax.driver.core.querybuilder.QueryBuilder.bindMarker;
import static com.datastax.driver.core.querybuilder.QueryBuilder.gte;
import static com.datastax.driver.core.querybuilder.QueryBuilder.lte;
import static com.datastax.driver.core.querybuilder.QueryBuilder.token;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.InputStreamReader;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.Callable;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import java.util.stream.Collectors;

import com.codahale.metrics.InstrumentedScheduledExecutorService;
import com.codahale.metrics.MetricRegistry;
import com.codahale.metrics.Timer;
import com.datastax.driver.core.BoundStatement;
import com.datastax.driver.core.ColumnMetadata;
import com.datastax.driver.core.ConsistencyLevel;
import com.datastax.driver.core.KeyspaceMetadata;
import com.datastax.driver.core.PreparedStatement;
import com.datastax.driver.core.ResultSet;
import com.datastax.driver.core.Row;
import com.datastax.driver.core.Session;
import com.datastax.driver.core.TableMetadata;
import com.datastax.driver.core.querybuilder.QueryBuilder;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.ListeningScheduledExecutorService;
import com.google.common.util.concurrent.MoreExecutors;
import org.joda.time.DateTime;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public final class RepairManager implements AutoCloseable {

  private static final Logger LOG = LoggerFactory.getLogger(RepairManager.class);

  // State of all active RepairRunners
  public final Map<UUID, RepairRunner> repairRunners = Maps.newConcurrentMap();
  private final Lock repairRunnersLock = new ReentrantLock();

  private final AppContext context;
  private final ClusterFacade clusterFacade;
  private final Heart heart;
  private final ListeningScheduledExecutorService executor;
  private final long repairTimeoutMillis;
  private final long retryDelayMillis;
  public static boolean currentlyScheduling = false;
  private static RepairManager lastInstance = null;

  private RepairManager(
      AppContext context,
      ClusterFacade clusterFacade,
      ScheduledExecutorService executor,
      long repairTimeout,
      TimeUnit repairTimeoutTimeUnit,
      long retryDelay,
      TimeUnit retryDelayTimeUnit) throws ReaperException {

    this.context = context;
    this.clusterFacade = clusterFacade;
    this.heart = Heart.create(context);
    this.repairTimeoutMillis = repairTimeoutTimeUnit.toMillis(repairTimeout);
    this.retryDelayMillis = retryDelayTimeUnit.toMillis(retryDelay);

    this.executor = MoreExecutors.listeningDecorator(
        new InstrumentedScheduledExecutorService(executor, context.metricRegistry));
  }

  @VisibleForTesting
  static RepairManager create(
      AppContext context,
      ClusterFacade clusterFacadeSupplier,
      ScheduledExecutorService executor,
      long repairTimeout,
      TimeUnit repairTimeoutTimeUnit,
      long retryDelay,
      TimeUnit retryDelayTimeUnit) throws ReaperException {

    RepairManager retVal = new RepairManager(
        context,
        clusterFacadeSupplier,
        executor,
        repairTimeout,
        repairTimeoutTimeUnit,
        retryDelay,
        retryDelayTimeUnit);
    lastInstance = retVal;
    return retVal;
  }

  public static RepairManager create(
      AppContext context,
      ScheduledExecutorService executor,
      long repairTimeout,
      TimeUnit repairTimeoutTimeUnit,
      long retryDelay,
      TimeUnit retryDelayTimeUnit) throws ReaperException {

    RepairManager retVal = create(
        context,
        ClusterFacade.create(context),
        executor,
        repairTimeout,
        repairTimeoutTimeUnit,
        retryDelay,
        retryDelayTimeUnit);
    lastInstance = retVal;
    return retVal;
  }

  public static RepairManager getInstance() {
         return lastInstance;
  }

  long getRepairTimeoutMillis() {
    return repairTimeoutMillis;
  }

  /**
   * Consult storage to see if any repairs are running, and resume those repair runs.
   */
  public void resumeRunningRepairRuns() throws ReaperException {
    try {
      heart.beat();
      if (!currentlyScheduling) {
             Collection<RepairRun> runningRepairRuns = context.storage.getRepairRunsWithState(RepairRun.RunState.RUNNING);
             Collection<RepairRun> pausedRepairRuns = context.storage.getRepairRunsWithState(RepairRun.RunState.PAUSED);
             abortAllRunningSegmentsWithNoLeader(runningRepairRuns);
             abortAllRunningSegmentsInKnownPausedRepairRuns(pausedRepairRuns);
             resumeUnkownRunningRepairRuns(runningRepairRuns);
             resumeUnknownPausedRepairRuns(pausedRepairRuns);
      }
    } catch (RuntimeException e) {
      throw new ReaperException(e);
    }
  }

  public void handleMetricsRequests() throws ReaperException {
    try {
      heart.beatMetrics();
    } catch (RuntimeException e) {
      throw new ReaperException(e);
    }
  }

  private void abortAllRunningSegmentsWithNoLeader(Collection<RepairRun> runningRepairRuns) {
    runningRepairRuns
        .forEach((repairRun) -> {
          Collection<RepairSegment> runningSegments
              = context.storage.getSegmentsWithState(repairRun.getId(), RepairSegment.State.RUNNING);
          Collection<RepairSegment> startedSegments
          = context.storage.getSegmentsWithState(repairRun.getId(), RepairSegment.State.STARTED);
          abortSegmentsWithNoLeader(repairRun, runningSegments);
          abortSegmentsWithNoLeader(repairRun, startedSegments);
        });
  }

  private void resumeUnkownRunningRepairRuns(Collection<RepairRun> runningRepairRuns) throws ReaperException {
    try {
      repairRunnersLock.lock();
      for (RepairRun repairRun : runningRepairRuns) {
        if (!repairRunners.containsKey(repairRun.getId())) {
          LOG.info("Restarting run id {} that has no runner", repairRun.getId());
          // it may be that this repair is already "running" actively on other reaper instances
          //  nonetheless we need to make it actively running on this reaper instance as well
          //   so to help in running the queued segments
          startRepairRun(repairRun);
        }
      }
    } finally {
      repairRunnersLock.unlock();
    }
  }

  private void abortAllRunningSegmentsInKnownPausedRepairRuns(Collection<RepairRun> pausedRepairRuns) {
    try {
      repairRunnersLock.lock();

      pausedRepairRuns
          .stream()
          .filter((pausedRepairRun) -> repairRunners.containsKey(pausedRepairRun.getId()))
          .forEach((pausedRepairRun) -> {
            // Abort all running segments for paused repair runs
            Collection<RepairSegment> runningSegments
                = context.storage.getSegmentsWithState(pausedRepairRun.getId(), RepairSegment.State.RUNNING);
            Collection<RepairSegment> startedSegments
            = context.storage.getSegmentsWithState(pausedRepairRun.getId(), RepairSegment.State.STARTED);


            abortSegments(runningSegments, pausedRepairRun);
            abortSegments(startedSegments, pausedRepairRun);
          });
    } finally {
      repairRunnersLock.unlock();
    }
  }

  private void resumeUnknownPausedRepairRuns(Collection<RepairRun> pausedRepairRuns) {
    try {
      repairRunnersLock.lock();

      pausedRepairRuns
          .stream()
          .filter((pausedRepairRun) -> (!repairRunners.containsKey(pausedRepairRun.getId())))
          // add "paused" repair run to this reaper instance, so it can be visualised in UI
          .forEachOrdered((pausedRepairRun) -> startRunner(pausedRepairRun.getId()));
    } finally {
      repairRunnersLock.unlock();
    }
  }

  private void abortSegmentsWithNoLeader(RepairRun repairRun, Collection<RepairSegment> runningSegments) {

    if (LOG.isDebugEnabled()) {
      LOG.debug(
          "Checking leadership on the following segments : {}",
          runningSegments.stream().map(seg -> seg.getId()).collect(Collectors.toList()));
    }
    try {
      repairRunnersLock.lock();
      if (context.storage instanceof IDistributedStorage || !repairRunners.containsKey(repairRun.getId())) {
        // When multiple Reapers are in use, we can get stuck segments when one instance is rebooted
        // Any segment in RUNNING state but with no leader should be killed
        List<UUID> leaders = context.storage instanceof IDistributedStorage
                ? ((IDistributedStorage) context.storage).getLeaders()
                : Collections.emptyList();

        Collection<RepairSegment> orphanedSegments = runningSegments
            .stream()
            .filter(segment -> !leaders.contains(segment.getId()) && !leaders.contains(segment.getRunId()))
            .collect(Collectors.toSet());

        LOG.debug("No leader on the following segments : {}", orphanedSegments);
        abortSegments(orphanedSegments, repairRun);
      }
    } finally {
      repairRunnersLock.unlock();
    }
  }

  public RepairSegment abortSegment(UUID runId, UUID segmentId) throws ReaperException {
    RepairSegment segment = context.storage.getRepairSegment(runId, segmentId).get();
    try {
      if (null == segment.getCoordinatorHost() || RepairSegment.State.DONE == segment.getState()) {
        RepairUnit repairUnit = context.storage.getRepairUnit(segment.getRepairUnitId());
        UUID leaderElectionId = repairUnit.getIncrementalRepair() ? runId : segmentId;
        boolean tookLead;
        if (tookLead = takeLead(context, leaderElectionId) || renewLead(context, leaderElectionId)) {
          try {
            SegmentRunner.postponeSegment(context, segment, true);
          } finally {
            if (tookLead) {
              releaseLead(context, leaderElectionId);
            }
          }
        }
      } else {
        abortSegments(Arrays.asList(segment), context.storage.getRepairRun(runId).get());
      }
      return context.storage.getRepairSegment(runId, segmentId).get();
    } catch (AssertionError error) {
      throw new ReaperException("lead is already taken on " + runId + ":" + segmentId, new Exception(error));
    }
  }

  void abortSegments(Collection<RepairSegment> runningSegments, RepairRun repairRun) {
    RepairUnit repairUnit = context.storage.getRepairUnit(repairRun.getRepairUnitId());
    for (RepairSegment segment : runningSegments) {
      LOG.debug("Trying to abort stuck segment {} in repair run {}", segment.getId(), repairRun.getId());
      UUID leaderElectionId = repairUnit.getIncrementalRepair() ? repairRun.getId() : segment.getId();
      boolean tookLead;
      if (tookLead = takeLead(context, leaderElectionId) || renewLead(context, leaderElectionId)) {
        try {
          // refresh segment once we're inside leader-election
          segment = context.storage.getRepairSegment(repairRun.getId(), segment.getId()).get();
          if (RepairSegment.State.RUNNING == segment.getState()
                  || RepairSegment.State.STARTED == segment.getState()) {
            JmxProxy jmxProxy = ClusterFacade.create(context).connect(
                      context.storage.getCluster(repairRun.getClusterName()),
                      Arrays.asList(segment.getCoordinatorHost()));

            SegmentRunner.postponeSegment(context, segment, false);
            MetricsManager.getInstance().addStat("storagegrid_private_repair_incremental_cancel_triggered{source=\"abort_segments\"}", 1, false);
            jmxProxy.cancelAllRepairs();
          }
        } catch (ReaperException | NumberFormatException e) {
          String msg = "Tried to abort repair on segment {} marked as RUNNING, but the "
              + "host was down (so abortion won't be needed). Postponing the segment.";

          LOG.debug(msg, segment.getId(), e);
          SegmentRunner.postponeSegment(context, segment, true);
        } finally {
          if (tookLead) {
            releaseLead(context, leaderElectionId);
          }
        }
      }
    }
  }

  public RepairRun startRepairRun(RepairRun runToBeStarted) throws ReaperException {
    assert null != executor : "you need to initialize the thread pool first";
    UUID runId = runToBeStarted.getId();
    LOG.info("Starting a run with id #{} with current state '{}'", runId, runToBeStarted.getRunState());
    switch (runToBeStarted.getRunState()) {
      case NOT_STARTED: {
         if ("true".equals(System.getProperty("reaper.leave.dangling.not.started.repairs", "false"))) {
                 return runToBeStarted;
         }
        RepairRun updatedRun = runToBeStarted
            .with()
            .runState(RepairRun.RunState.RUNNING)
            .startTime(DateTime.now())
            .build(runToBeStarted.getId());
        if (!context.storage.updateRepairRun(updatedRun)) {
          throw new ReaperException("failed updating repair run " + updatedRun.getId());
        }
        startRunner(runId);
        return updatedRun;
      }
      case PAUSED: {
        RepairRun updatedRun = runToBeStarted.with()
            .runState(RepairRun.RunState.RUNNING)
            .pauseTime(null)
            .build(runToBeStarted.getId());

        if (!context.storage.updateRepairRun(updatedRun)) {
          throw new ReaperException("failed updating repair run " + updatedRun.getId());
        }
        return updatedRun;
      }
      case RUNNING:
        LOG.info("re-trigger a running run after restart, with id {}", runId);
        startRunner(runId);
        return runToBeStarted;
      case ERROR: {
        RepairRun updatedRun
            = runToBeStarted.with().runState(RepairRun.RunState.RUNNING).endTime(null).build(runToBeStarted.getId());
        if (!context.storage.updateRepairRun(updatedRun)) {
          throw new ReaperException("failed updating repair run " + updatedRun.getId());
        }
        startRunner(runId);
        return updatedRun;
      }
      default:
        throw new ReaperException("cannot start run with state: " + runToBeStarted.getRunState());
    }
  }

  public RepairRun updateRepairRunIntensity(RepairRun repairRun, Double intensity) throws ReaperException {
    RepairRun updatedRun = repairRun.with().intensity(intensity).build(repairRun.getId());
    if (!context.storage.updateRepairRun(updatedRun)) {
      throw new ReaperException("failed updating repair run " + updatedRun.getId());
    }
    return updatedRun;
  }

  private void startRunner(UUID runId) {
    try {
      repairRunnersLock.lock();

      Preconditions.checkState(
          !repairRunners.containsKey(runId),
          "there is already a repair runner for run with id " + runId + ". This should not happen.");

      LOG.info("scheduling repair for repair run #{}", runId);
      try {
        RepairRunner newRunner = RepairRunner.create(context, runId, clusterFacade);
        repairRunners.put(runId, newRunner);
        executor.submit(newRunner);
      } catch (ReaperException e) {
        LOG.warn("Failed to schedule repair for repair run #" + runId, e);
      }
    } finally {
      repairRunnersLock.unlock();
    }
  }

  public RepairRun pauseRepairRun(RepairRun runToBePaused) throws ReaperException {
    RepairRun updatedRun = runToBePaused.with()
        .runState(RepairRun.RunState.PAUSED)
        .pauseTime(DateTime.now())
        .build(runToBePaused.getId());

    if (!context.storage.updateRepairRun(updatedRun)) {
      throw new ReaperException("failed updating repair run " + updatedRun.getId());
    }
    return updatedRun;
  }

  public RepairRun abortRepairRun(RepairRun runToBeAborted) throws ReaperException {
    RepairRun updatedRun = runToBeAborted
        .with()
        .runState(RepairRun.RunState.ABORTED)
        .endTime(DateTime.now())
        .build(runToBeAborted.getId());

    if (!context.storage.updateRepairRun(updatedRun)) {
      throw new ReaperException("failed updating repair run " + updatedRun.getId());
    }
    return updatedRun;
  }

  void scheduleRetry(RepairRunner runner) {
    executor.schedule(runner, retryDelayMillis, TimeUnit.MILLISECONDS);
  }

  ListenableFuture<?> submitSegment(SegmentRunner runner) {
    return executor.submit(runner);
  }

  void removeRunner(RepairRunner runner) {
    try {
      repairRunnersLock.lock();
      repairRunners.remove(runner.getRepairRunId());
    } finally {
      repairRunnersLock.unlock();
    }
  }

  private static boolean takeLead(AppContext context, UUID leaderElectionId) {
    try (Timer.Context cx
        = context.metricRegistry.timer(MetricRegistry.name(RepairManager.class, "takeLead")).time()) {

      boolean result = context.storage instanceof IDistributedStorage
          ? ((IDistributedStorage) context.storage).takeLead(leaderElectionId)
          : true;

      if (!result) {
        context.metricRegistry.counter(MetricRegistry.name(RepairManager.class, "takeLead", "failed")).inc();
      }
      return result;
    }
  }

  private static boolean renewLead(AppContext context, UUID leaderElectionId) {
    try (Timer.Context cx
        = context.metricRegistry.timer(MetricRegistry.name(RepairManager.class, "renewLead")).time()) {

      boolean result = context.storage instanceof IDistributedStorage
          ? ((IDistributedStorage) context.storage).renewLead(leaderElectionId)
          : true;

      if (!result) {
        context.metricRegistry.counter(MetricRegistry.name(RepairManager.class, "renewLead", "failed")).inc();
      }
      return result;
    }
  }

  private static void releaseLead(AppContext context, UUID leaderElectionId) {
    try (Timer.Context cx
        = context.metricRegistry.timer(MetricRegistry.name(RepairManager.class, "releaseLead")).time()) {
      if (context.storage instanceof IDistributedStorage) {
        ((IDistributedStorage) context.storage).releaseLead(leaderElectionId);
      }
    }
  }

  @Override
  public void close() {
    heart.close();
    executor.shutdownNow();
  }


public static class CQLSyncer {
       private static final Logger LOG = LoggerFactory.getLogger(CQLSyncer.class);
       private static int NUMBER_OF_THREADS = Integer.parseInt(System.getProperty("cqlsyncer.number.of.threads", "1"));
       private static final boolean ALLOW_TO_RUN = "true".equals(System.getProperty("cqlsyncer.allow.to.run", "true"));
       private static final String DISALLOWED_TABLES = System.getProperty("cqlsyncer.skip.tables", "object_by_cbid,s3_key_versions_by_bucket,object_by_version,object_by_uuid");

       private static CQLSyncer instance;

       public static CQLSyncer getInstance() {
               if (instance == null) {
                       instance = new CQLSyncer();
               }
               return instance;
       }

       public void startOnRepairRun(String keyspace, RepairRun newRepairRun, AppContext context) {
               if (ALLOW_TO_RUN) {
                       MetricsManager.getInstance().reset();
                       new CQLSyncerMasterRunnable(keyspace, newRepairRun, context).start();
               } else {
                       LOG.info("CQLSyncer is administratively disabled");
               }
       }

       private static class TableOrderer implements Comparator {
               // We put these at the end of the list in this order
               static List<String> TABLES_WITH_KNOWN_ORDER = new ArrayList();
               static {
                       TABLES_WITH_KNOWN_ORDER.add("object_by_cbid");
                       TABLES_WITH_KNOWN_ORDER.add("s3_key_versions_by_bucket");
                       TABLES_WITH_KNOWN_ORDER.add("object_by_version");
                       TABLES_WITH_KNOWN_ORDER.add("object_by_uuid");
               }

               @Override
               public int compare(Object o1, Object o2) {
                       TableMetadata tmL = (TableMetadata) o1;
                       TableMetadata tmR = (TableMetadata) o2;
                       int indexL = TABLES_WITH_KNOWN_ORDER.indexOf(tmL.getName());
                       int indexR = TABLES_WITH_KNOWN_ORDER.indexOf(tmR.getName());
                       if (indexL >= 0 && indexR >= 0) {
                               return indexL - indexR;
                       }
                       if (indexL >= 0) {
                               return 1;
                       }
                       if (indexR >= 0) {
                               return -1;
                       }
                       return tmL.getName().compareTo(tmR.getName());
               }

       }

       private static class CQLSyncerMasterRunnable extends Thread {
               private String keyspace;
               private RepairRun newRepairRun;
               private AppContext context;

               public CQLSyncerMasterRunnable(String keyspace, RepairRun newRepairRun, AppContext context) {
                       super.setDaemon(true);
                       this.keyspace = keyspace;
                       this.newRepairRun = newRepairRun;
                       this.context = context;
               }

               private String getEffectiveDisallowedTables() {
                  String retVal = DISALLOWED_TABLES;
                  try {
                          Process p = Runtime.getRuntime().exec("nodetool info");
                          p.waitFor(90, TimeUnit.SECONDS);
                          BufferedReader in = new BufferedReader(new InputStreamReader(p.getInputStream()));
                          String s = null;
                          while ((s = in.readLine()) != null) {
                                  if (s.indexOf("Load") != -1) {
                                          s = s.toUpperCase();
                                          if (s.indexOf("GB") > 0 || s.indexOf("GIB") > 0 || s.indexOf("TB") > 0 || s.indexOf("TIB") > 0) {
                                                  MetricsManager.getInstance().overwriteStat("cqlsync_all", 0);
                                                  retVal = DISALLOWED_TABLES;
                                          } else {
                                                  MetricsManager.getInstance().overwriteStat("cqlsync_all", 1);
                                                  retVal = "";
                                          }
                                          in.close();
                                  }
                          }
                  } catch (Exception e) {
                          LOG.error("Error computing getEffectiveDisallowedTables", e);
                  }
                  LOG.info("getEffectiveDisallowedTables - computed " + retVal);
                  return retVal;
               }

               public void run() {
                       LOG.info("Repairing keyspace " + keyspace);
                       Session session = ((CassandraStorage) context.storage).getSession();
                       RepairStatementManager rsm = new RepairStatementManager(session);
                       KeyspaceMetadata ks = session.getCluster().getMetadata().getKeyspace(keyspace);
                       Set<String> tablesWeHaveProcessed = new HashSet();
                       List<TableMetadata> tablesToProcess = new ArrayList(ks.getTables());
                       Collections.sort(tablesToProcess, new TableOrderer());
                       List<Callable> cqlSyncerTableRunnables = new ArrayList();
                       String effectiveDisallowedTables = getEffectiveDisallowedTables();
                       for(TableMetadata tm : tablesToProcess) {

                               if (effectiveDisallowedTables.contains(tm.getName())) {
                                       LOG.info("CQLSyncer is ordered to skip " + tm.getName());
                                       continue;
                               }
                               cqlSyncerTableRunnables.add(new CQLSyncerPerTableRunnable(session, rsm, tm,
                                               tablesWeHaveProcessed, newRepairRun, context, new CQLSyncerMasterStats(tablesToProcess.size())));
                       }
                       try {
                               Executors.newFixedThreadPool(NUMBER_OF_THREADS).invokeAll((Collection) cqlSyncerTableRunnables);
                       } catch (Exception e) {
                               LOG.error("Error invoking thread pool", e);
                       }
               }
       }

       private static class CQLSyncerMasterStats {
               private long masterStartTime = System.currentTimeMillis();
               int tablesToProcess = 0;
               int successes = 0;
               int failures = 0;

               public CQLSyncerMasterStats(int tablesToProcess) {
                       super();
                       this.tablesToProcess = tablesToProcess;
               }

               public void updateStats() {
                       MetricsManager.getInstance().overwriteStat("total_runtime_ms", System.currentTimeMillis() - masterStartTime);
                       MetricsManager.getInstance().overwriteStat("total_successes", successes);
                       MetricsManager.getInstance().overwriteStat("total_failures", failures);
                       MetricsManager.getInstance().overwriteStat("total_to_process", tablesToProcess);
               }
       }

       private static class CQLSyncerPerTableRunnable implements Callable {
               private Session session;
               private RepairStatementManager rsm;
               private TableMetadata tm;
               private Set<String> tablesWeHaveProcessed;
               private RepairRun newRepairRun;
               private AppContext context;
               private CQLSyncerMasterStats masterStats;

               public CQLSyncerPerTableRunnable(Session session, RepairStatementManager rsm, TableMetadata tm,
                               Set<String> tablesWeHaveProcessed, RepairRun newRepairRun, AppContext context, CQLSyncerMasterStats masterStats) {
                       super();
                       this.session = session;
                       this.rsm = rsm;
                       this.tm = tm;
                       this.tablesWeHaveProcessed = tablesWeHaveProcessed;
                       this.newRepairRun = newRepairRun;
                       this.context = context;
                       this.masterStats = masterStats;
               }

               @Override
               public Object call() throws Exception {
                       try {
                               if (executeForTable(session, rsm, tm)) {
                                       masterStats.updateStats();
                                       tablesWeHaveProcessed.add(tm.getName());
                                       new BackgroundTableUpdater(session, tablesWeHaveProcessed, context, newRepairRun).start();
                               }
                       } catch (Exception e) {
                               LOG.error("Error running on table", e);
                       } finally {
                               masterStats.updateStats();
                       }
                       return null;
               }

               private boolean executeForTable(Session session, RepairStatementManager rsm, TableMetadata tm) throws InterruptedException {
                       long startTime = System.currentTimeMillis();
                       try {
                               LOG.info("CQLSyncer launching on " + tm.getName());
                               if (rsm.goForTable(session, tm)) {
                                       masterStats.successes++;
                                       LOG.info("CQLSyncer reports success on " + tm.getName());
                                       return true;
                               } else {
                                       masterStats.failures++;
                                       LOG.info("CQLSyncer reports negative success on " + tm.getName());
                                       return false;
                               }
                       } finally {
                               MetricsManager.getInstance().addStatTable("execute_for_table_ms", tm.getName(), System.currentTimeMillis() - startTime);
                       }
               }
       }

       private static class BackgroundTableUpdater extends Thread {
               Session session;
               Set<String> tablesWeHaveProcessed;
               AppContext context;
               RepairRun newRepairRun;

               public BackgroundTableUpdater(Session session, Set<String> tablesWeHaveProcessed, AppContext context,
                               RepairRun newRepairRun) {
                       super();
                       this.session = session;
                       this.tablesWeHaveProcessed = tablesWeHaveProcessed;
                       this.context = context;
                       this.newRepairRun = newRepairRun;
                       super.setDaemon(true);
               }

               public void run() {
                       try {
                               this.updateTablesOnRepairRun();
                       } catch (Exception e) {
                               LOG.error("Unable to update repair run tables", e);
                       }
               }

               private void updateTablesOnRepairRun() throws Exception {
                       long startTime = System.currentTimeMillis();
                       synchronized(CQLSyncerMasterRunnable.class) {
                               MetricsManager.getInstance().addStat("update_lock_synchronized", System.currentTimeMillis() - startTime);
                               startTime = System.currentTimeMillis();
                               UUID lockId = SegmentRunner.getLockId();
                               while(!((IDistributedStorage) context.storage).takeLead(
                                             lockId,30)) {
                                       LOG.info("CQLSyncer - Sleeping waiting to obtain lock on " + lockId);
                                       Thread.sleep(1000);
                               }
                               MetricsManager.getInstance().addStat("update_lock_cassandra_lock", System.currentTimeMillis() - startTime);
                               startTime = System.currentTimeMillis();
                               try {
                                       RepairRun currentVersion = context.storage.getRepairRun(newRepairRun.getId()).get();
                                       if (currentVersion != null) {
                                               LOG.info("CQLSyncer tables were originally " + currentVersion.getTables());
                                               for (String tableName : tablesWeHaveProcessed) {
                                                       currentVersion.getTables().remove(tableName);
                                               }
                                               if (currentVersion.getTables().size() == 0) {
                                                       currentVersion.getTables().add("all_done");
                                               }
                                               LOG.info("CQLSyncer tables are now " + currentVersion.getTables());
                                               context.storage.updateRepairRun(currentVersion);
                                       }
                               } finally {
                                       ((IDistributedStorage) context.storage).releaseLead(lockId);
                               }
                               MetricsManager.getInstance().addStat("update_do_update", System.currentTimeMillis() - startTime);
                       }
               }
       }
}

public static class RepairStatementManager {
       private static final Logger LOG = LoggerFactory.getLogger(CQLSyncer.class);
       private Session session;
       private String keyspace;
       private HashMap<String, PreparedStatement> preparedStatementCache = new HashMap();

       public RepairStatementManager(Session session) {
               this.session = session;
       }

       private BoundStatement prepare(Session session, TableMetadata table, BigInteger lowBound, BigInteger highBound, BigInteger currentRange) {
               if (preparedStatementCache.get(table.getName()) == null) {
               List<String> columns = new ArrayList<>(table.getColumns().size() + 1);
               String[] partitionColumns = table.getPartitionKey().stream().map(ColumnMetadata::getName).toArray(String[]::new);
               String tokenKeys = token(partitionColumns);
               columns.add(tokenKeys);
               table.getColumns().stream().map(ColumnMetadata::getName).forEachOrdered(columns::add);
               PreparedStatement statement = session.prepare(QueryBuilder.select(columns.toArray(new String[columns.size()]))
                       .from(table).where(gte(tokenKeys, bindMarker())).and(lte(tokenKeys, bindMarker())).limit(bindMarker()));
               statement.setConsistencyLevel(ConsistencyLevel.ALL);
               preparedStatementCache.put(table.getName(), statement);
               }
        return preparedStatementCache.get(table.getName()).bind(lowBound.longValue(), highBound.longValue(), currentRange.intValue());
    }

       public boolean goForTable(Session session, TableMetadata table) {
               RangeManager rm = new RangeManager(table.getName());
               boolean success = false;
               boolean done = false;
               long startTime = System.currentTimeMillis();

               while(!done) {
                       MetricsManager.getInstance().overwriteStatTable("percent_done", table.getName(), rm.getPercentageDone());
                       MetricsManager.getInstance().overwriteStatTable("execution_time_ms", table.getName(), System.currentTimeMillis() - startTime);
                       if (rm.succeeded()) {
                               MetricsManager.getInstance().overwriteStatTable("table_success", table.getName(), 1);
                               success = true;
                               done = true;
                               LOG.info("Exiting on success path for " + table.getName());
                               break;
                       }
                       List<BigInteger> nextBounds = rm.getNextBounds();
                       if (nextBounds.size() < 2) {
                               MetricsManager.getInstance().overwriteStat("table_fail{table=\"" + table.getName() + "\"}", 1);
                               done = true;
                               success = false;
                               LOG.info("Exiting on failure path for " + table.getName());
                               break;
                       }
                       try {
                               MetricsManager.getInstance().addStatTable("table_chunk_taken", table.getName(), 1);
                               long currentBiteSize = nextBounds.get(1).subtract(nextBounds.get(0)).longValue();
                               MetricsManager.getInstance().overwriteStat("current_chunk_size{table=\"" + table.getName() + "\"}", currentBiteSize);
                               BoundStatement b = prepare(session, table, nextBounds.get(0), nextBounds.get(1), nextBounds.get(2));
                               LOG.trace("Requesting " + table.getName() + " on " + nextBounds);
                               ResultSet rs = session.execute(b);
                               LOG.trace("Requesting " + table.getName() + " on " + nextBounds + " executed");
                               List<Row> rows = rs.all();
                               MetricsManager.getInstance().addStatTable("table_rows_seen", table.getName(), rows.size());
                               MetricsManager.getInstance().addStatTable("table_query_success", table.getName(), 1);
                               long lowToken = 0;
                               long highToken = 0;
                               if (rows.size() > 0) {
                                       lowToken = rows.get(0).getLong(0);
                                       highToken = rows.get(rows.size()-1).getLong(0);
                               }
                               LOG.info("Requesting " + table.getName() + " on " + nextBounds + " returned " + rows.size() + " rows between " + lowToken + " and " + highToken);
                               rm.noteSuccess(lowToken, highToken, rows.size());
                               LOG.trace("Success " + table.getName() + " on " + nextBounds);
                       } catch (Exception e) {
                               MetricsManager.getInstance().addStatTable("table_query_fail", table.getName(), 1);
                               rm.noteFailure();
                               LOG.info("Failure " + table.getName() + " on " + nextBounds + " with " + e);
                       }
               }
               return success;
       }
}

public static class MetricsManager {
       private static MetricsManager instance = null;
       private Hashtable<String, Double> toWrite = new Hashtable();
       private static final long SLEEP_TIME = Long.parseLong(System.getProperty("cqlsyncer.metrics.sleep.time", "120000"));
       private static final String BASE_STAT = "storagegrid_private_cqlsyncer_";

       private MetricsManager() {
               new DumperThread().start();
       }

       public static MetricsManager getInstance() {
               if (instance == null) {
                       instance = new MetricsManager();
               }
               return instance;
       }

       public synchronized void reset() {
               toWrite = new Hashtable();
       }

       public synchronized void addStatTable(String key, String table, double value) {
               addStat(key + "{table=\"" + table + "\"}", value);
       }

       public synchronized void addStat(String key, double value) {
          addStat(key, value, true);
       }

       public synchronized void addStat(String key, double value, boolean includeBase) {
             if (key.startsWith("storagegrid_")) {
                 includeBase = false;
             }
              if (includeBase) {
                  key = BASE_STAT + key;
              }
               Double existingValue = toWrite.get(key);
               if (existingValue == null) {
                       existingValue = new Double(0);
               }
               existingValue = new Double(existingValue + value);
               toWrite.put(key, existingValue);
       }

       public synchronized void overwriteStatTable(String key, String table, double value) {
               overwriteStat(key + "{table=\"" + table + "\"}", value);
       }

       public synchronized void overwriteStat(String key, double theVal) {
                  boolean includeBase = true;
                  if (key.startsWith("storagegrid_")) {
                          includeBase = false;
                  }
                  if (includeBase) {
                          key = BASE_STAT + key;
                  }
               toWrite.put(key, theVal);
       }

       public synchronized void writeStats() {
               BufferedWriter bw = null;
               try {
                       bw = new BufferedWriter(new FileWriter("/var/local/tmp/cassandra_metrics/reaper_metrics"));
                       for(String key : toWrite.keySet()) {
                               bw.write(key);
                               bw.write(" " + toWrite.get(key));
                               bw.write("\n");
                       }
               } catch (Exception e) {
                       e.printStackTrace();
               } finally {
                       if (bw != null) {
                               try { bw.close(); } catch (Exception e) { e.printStackTrace(); }
                       }
               }
       }

       private static class DumperThread extends Thread {
               public DumperThread() {
                       super.setDaemon(true);
               }

               public void run() {
                       while(true) {
                               try {
                                       Thread.sleep(SLEEP_TIME);
                                       instance.writeStats();
                               } catch (Exception e) {
                                       e.printStackTrace();
                               }
                       }
               }
       }
}

public static class RangeManager {
       private static final BigInteger MIN_TOKEN = new BigInteger("2").pow(63).negate();
       private static final BigInteger MAX_TOKEN = new BigInteger("2").pow(63).subtract(BigInteger.ONE);
       private BigInteger currLowBound = MIN_TOKEN;
       private long currBiteSize = Long.parseLong(System.getProperty("cqlsyncer.start.bite.size", "100000000"));
       private static final double SCALING_FACTOR = Double.parseDouble(System.getProperty("cqlsyncer.bite.size.scaling.factor", "2.0"));
       private List<BigInteger> latestSuggestion;
       private boolean coveredEntireRange = false;
       private static final long NORMAL_LIMIT = Integer.parseInt(System.getProperty("cqlsyncer.limit", "50000"));
       private static final long SUPER_BIG_LIMIT = Integer.parseInt(System.getProperty("cqlsyncer.superbig.limit", "1000000"));
       private long currLimit = NORMAL_LIMIT;
       private boolean hopeless = false;
       private static final Logger LOG = LoggerFactory.getLogger(CQLSyncer.class);
       private static final long FAILURE_BACKOFF_BASE_UNIT = Long.parseLong(System.getProperty("cqlsyncer.failure.backoff.unit", "4000"));
       private static final double FAILURE_BACKOFF_SCALING_FACTOR = Double.parseDouble(System.getProperty("cqlsyncer.failure.backoff.scaling.factor", "2"));
       private static final long SUCCESS_WAIT_PERIOD_EVERY_QUERY = Long.parseLong(System.getProperty("cqlsyncer.success.wait.period", "0"));
       private static final long SUCCESS_WAIT_PERIOD_NUMBER_OF_RECORDS_PER_MILLISECOND = Long.parseLong(System.getProperty("cqlsyncer.success.wait.period", "0"));
       private static final long MAX_SLEEP_AMOUNT = Long.parseLong(System.getProperty("cqlsyncer.max.sleep.amount", "240000"));
       private long currentConsecutiveFailures = 0;
       private String tableName;
       private boolean allowSleep = true;


       public RangeManager(String tableName) {
               this.tableName = tableName;
       }

       public double getPercentageDone() {
               BigInteger rangeSize = MAX_TOKEN.subtract(MIN_TOKEN);
               BigInteger partDone = currLowBound.subtract(MIN_TOKEN);
               return partDone.doubleValue() / rangeSize.doubleValue();
       }

       public void noteSuccess(long lowTokenWeSaw, long highTokenWeSaw, int numberOfRowsWeGot) {
               sleepIfNecessaryOnSuccess(numberOfRowsWeGot);
               currentConsecutiveFailures = 0;
               currLowBound = latestSuggestion.get(1);
               if (currLowBound.equals(MAX_TOKEN)) {
                       coveredEntireRange = true;
               }
               if(numberOfRowsWeGot == currLimit) {
                       currLowBound = BigInteger.valueOf(highTokenWeSaw);
                       currBiteSize /= SCALING_FACTOR;
                       MetricsManager.getInstance().addStatTable("chunk_limited_by_rows", this.tableName, 1);
               } else {
                       currBiteSize *= SCALING_FACTOR;
               }
               if (lowTokenWeSaw == highTokenWeSaw && numberOfRowsWeGot == currLimit) {
                       // this is an interesting case.  This means we got an entire LIMIT worth of keys that are on a single token.
                       // It is a large partition - and likely to happen on s3_key_versions_by_bucket.
                       MetricsManager.getInstance().addStatTable("chunk_limited_by_rows_initiate_super_chunk", this.tableName, 1);
                       if (currLimit == SUPER_BIG_LIMIT) {
                               LOG.info("RangeManager - too many partition keys at token " + currLowBound + " considering hopeless");
                               hopeless = true;
                       }
                       currLimit = SUPER_BIG_LIMIT;
                       currBiteSize = 10;
               } else {
                       currLimit = NORMAL_LIMIT;
               }

       }

       public void noteFailure() {
               currentConsecutiveFailures++;
               currBiteSize /= SCALING_FACTOR;
       }

       public boolean succeeded() {
               return coveredEntireRange;
       }

       private void sleepIfNecessaryOnSuccess(long numberOfRows) {
               long startTime = System.currentTimeMillis();
               sleep(SUCCESS_WAIT_PERIOD_EVERY_QUERY);
               if (SUCCESS_WAIT_PERIOD_NUMBER_OF_RECORDS_PER_MILLISECOND > 0) {
                       long numberOfSlices = numberOfRows / SUCCESS_WAIT_PERIOD_NUMBER_OF_RECORDS_PER_MILLISECOND;
                       sleep(numberOfSlices);
               }
               MetricsManager.getInstance().addStatTable("sleep_on_success", this.tableName, System.currentTimeMillis() - startTime);
       }

       private void sleep(long sleepAmount) {
               if (!allowSleep) {
                       return;
               }
               if (sleepAmount > MAX_SLEEP_AMOUNT) {
                       sleepAmount = MAX_SLEEP_AMOUNT;
               }
               if (sleepAmount > 0) {
                       try {
                               Thread.sleep(sleepAmount);
                       } catch (Exception e) {
                               // whatever
                       }
               }
       }

       private void sleepIfNecessaryOnFailure() {
               long startTime = System.currentTimeMillis();
               if (currentConsecutiveFailures > 0) {
                       sleep(FAILURE_BACKOFF_BASE_UNIT * Math.round(Math.pow(FAILURE_BACKOFF_SCALING_FACTOR, currentConsecutiveFailures)));
               }
               MetricsManager.getInstance().addStatTable("sleep_on_failure", this.tableName, System.currentTimeMillis() - startTime);
       }

       public List<BigInteger> getNextBounds() {
               if (currBiteSize > 1 && !hopeless) {
                       sleepIfNecessaryOnFailure();
                       latestSuggestion = new ArrayList();
                       latestSuggestion.add(currLowBound);
                       BigInteger candidateMax = BigInteger.valueOf(currLowBound.longValue() + currBiteSize);
                       if (candidateMax.compareTo(MAX_TOKEN) >= 0) {
                               candidateMax = MAX_TOKEN;
                       }
                       if (candidateMax.compareTo(currLowBound) <= 0) {
                               candidateMax = MAX_TOKEN;
                       }
                       latestSuggestion.add(candidateMax);
                       latestSuggestion.add(BigInteger.valueOf(currLimit));
                       return latestSuggestion;
               } else {
                       // bites got too small - hopeless
                       return new ArrayList();
               }
       }
}

}


