// Derived from Python 3.11.1 source code found under http://svn.python.org/projects/python/trunk/Objects/listobject.c
// Python 3.11.1 source code comes under the PSF License Agreement:
//
// PSF LICENSE AGREEMENT
// 1. This LICENSE AGREEMENT is between the Python Software Foundation ("PSF"), and
//    the Individual or Organization ("Licensee") accessing and otherwise using Python
//    3.11.1 software in source or binary form and its associated documentation.
//
// 2. Subject to the terms and conditions of this License Agreement, PSF hereby
//    grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce,
//    analyze, test, perform and/or display publicly, prepare derivative works,
//    distribute, and otherwise use Python 3.11.1 alone or in any derivative
//    version, provided, however, that PSF's License Agreement and PSF's notice of
//    copyright, i.e., "Copyright © 2001-2023 Python Software Foundation; All Rights
//    Reserved" are retained in Python 3.11.1 alone or in any derivative version
//    prepared by Licensee.
//
// 3. In the event Licensee prepares a derivative work that is based on or
//    incorporates Python 3.11.1 or any part thereof, and wants to make the
//    derivative work available to others as provided herein, then Licensee hereby
//    agrees to include in any such work a brief summary of the changes made to Python
//    3.11.1.
//
// 4. PSF is making Python 3.11.1 available to Licensee on an "AS IS" basis.
//    PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED.  BY WAY OF
//    EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY REPRESENTATION OR
//    WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE
//    USE OF PYTHON 3.11.1 WILL NOT INFRINGE ANY THIRD PARTY RIGHTS.
//
// 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON 3.11.1
//    FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF
//    MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 3.11.1, OR ANY DERIVATIVE
//    THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
//
// 6. This License Agreement will automatically terminate upon a material breach of
//    its terms and conditions.
//
// 7. Nothing in this License Agreement shall be deemed to create any relationship
//    of agency, partnership, or joint venture between PSF and Licensee.  This License
//    Agreement does not grant permission to use PSF trademarks or trade name in a
//    trademark sense to endorse or promote products or services of Licensee, or any
//    third party.
//
// 8. By copying, installing or otherwise using Python 3.11.1, Licensee agrees
//    to be bound by the terms and conditions of this License Agreement.
//
package de.caff.generics.algorithm;

import de.caff.annotation.NotNull;
import de.caff.generics.Indexable;
import de.caff.generics.MutableIndexable;
import de.caff.generics.Order;
import de.caff.generics.function.Ordering;

/**
 * TimSort is an advanced stable sorting algorithm developed by Tim Peters.
 * This code is a port derived from Python 3.11.1 source code found under
 * <a href="http://svn.python.org/projects/python/trunk/Objects/listobject.c">http://svn.python.org/projects/python/trunk/Objects/listobject.c</a>.
 * TimSort is already included in Java since version 1.7 as a package-private class
 * under {@code java.util.TimSort} (only for sorting Objects).
 * <p>
 * The workings of TimSort are described in a paper found under
 * <a href="http://svn.python.org/projects/python/trunk/Objects/listsort.txt">http://svn.python.org/projects/python/trunk/Objects/listsort.txt</a>.
 * <p>
 * Here the algorithms are based on mutable indexables because this makes
 * implementations for lists, arrays and more work in the same way.
 *
 * Also {@link Ordering} is used instead of the standard {@link java.util.Comparator} to make this more consistent
 * with the sister classes with sort primitive values.
 *
 * @param <T> type of sorted objects
 * @author Tim Peters
 * @author <a href="mailto:rammi@caff.de">Rammi</a> (adaptions)
 * @since January 12, 2023
 * @see TimSortDouble
 * @see TimSortFloat
 * @see TimSortInt
 * @see TimSortLong
 */
public class TimSort<T>
{
  /**
   * Minimal merge size, slices smaller than this are sorted by binary insertion.
   * The original Python implementations used {@code 64}, standard Java {@code java.util.TimSort} is using
   * {@code 32} because it was measured faster. Our own measurements agree with that.
   */
  private static final int MIN_MERGE = 16;
  /**
   * Maximal size of pending merges.
   * This depends on {@linkplain #MIN_MERGE} in a complex way, see
   * <a href="http://svn.python.org/projects/python/trunk/Objects/listsort.txt">http://svn.python.org/projects/python/trunk/Objects/listsort.txt</a>.
   */
  private static final int MAX_MERGE_PENDING = 38;

  /**
   * Minimal value for considering galloping mode.
   * See <a href="http://svn.python.org/projects/python/trunk/Objects/listsort.txt">http://svn.python.org/projects/python/trunk/Objects/listsort.txt</a>.
   */
  private static final int MIN_GALLOP = 7;

  /**
   * Initial size of the merge array.
   * The array is enlarged when necessary, and may become half as large
   * as the half the number of sorted elements.
   */
  private static final int INITIAL_MERGE_ARRAY_SIZE = 256;

  /**
   * Penalty using for leaving galloping mode.
   * The original Python implementation used {@code 1},
   * the standard Java implementation {@code 2}.
   * Staying here with the original because measurements
   * didn't state a winner.
   */
  private static final int MIN_GALLOP_PENALTY = 1;

  /**
   * The mutable indexable being sorted.
   */
  @NotNull
  private final MutableIndexable<T> elements;
  /** Ordering defining the sort order. */
  @NotNull
  private final Ordering<? super T> order;
  /** Minimal galloping value. */
  private int minGallop = MIN_GALLOP;

  /**
   * Temporary array used during merges.
   * It is defined here as an {@code Object} array, but always used as an array of {@code T}s.
   */
  @NotNull
  private Object[] tmp;

  /**
   * Number of already sorted slices waiting for merge.
   * A slice is not an object (struct) here because objects are too heavy in Java,
   * so we just store begin and length of a slice in two {@code int} arrays.
   */
  private int numSlices;
  /** Start indexes of a slices. */
  private final int[] sliceBase;
  /**
   * Lengths of slices.
   * Basically this array is redundant, as it can be calculated from {@link #sliceBase}
   * with only minor tweaking. But tests show that that is a bit slower.
   */
  private final int[] sliceLength;

  /**
   * Constructor.
   * @param elements elements which will be sorted
   * @param ordering ordering defining the sort order
   */
  private TimSort(@NotNull MutableIndexable<T> elements,
                  @NotNull Ordering<? super T> ordering)
  {
    this.elements = elements;
    this.order    = ordering;

    tmp           = new Object[INITIAL_MERGE_ARRAY_SIZE];
    sliceBase     = new int[MAX_MERGE_PENDING];
    sliceLength   = new int[MAX_MERGE_PENDING];
  }

  /**
   * Add a new sorted slice to the stack of slices waiting for merge.
   * @param base   start index of the slide
   * @param length length of the slice
   */
  private void addSlice(int base, int length)
  {
    this.sliceBase[numSlices] = base;
    this.sliceLength[numSlices]  = length;
    ++numSlices;
  }

  /**
   * Examine the stack of pending slices and merge them until the stack
   * invariants are re-established (using Pythonesque indexing):
   * <ol>
   *   <li>{@code sliceLength[-3] > sliceLength[-2] + sliceLength[-1]}</li>
   *   <li>{@code sliceLength[-2] > sliceLength[-1]}</li>
   * </ol>
   */
  private void mergeCollapse()
  {
    while (numSlices > 1) {
      int n = numSlices - 2;
      if (n > 0 && sliceLength[n - 1] <= sliceLength[n] + sliceLength[n + 1]) {
        if (sliceLength[n - 1] < sliceLength[n + 1]) {
          --n;
        }
      }
      else if (sliceLength[n] > sliceLength[n + 1]) {
        return;
      }

      mergeAt(n);
    }
  }

  /**
   * Merge the two run slices at stack indices {@code i} and {@code i + 1}.
   * @param i stack index
   */
  private void mergeAt(int i)
  {
    assert numSlices >= 2 && i >= 0 &&
           (i == numSlices - 2 || i == numSlices - 3);

    int base1 = sliceBase[i];
    int length1 = sliceLength[i];
    int base2 = sliceBase[i + 1];
    int length2 = sliceLength[i + 1];

    assert length1 > 0  &&  length2 > 0  &&
           base1 + length1 == base2;

    /*
     * Record the length of the combined runs; if i is the 3rd-last
     * run now, also slide over the last run (which isn't involved
     * in this merge).  The current run (i+1) goes away in any case.
     */
    sliceLength[i] = length1 + length2;
    if (i == numSlices - 3) {
      sliceBase[i + 1]   = sliceBase[i + 2];
      sliceLength[i + 1] = sliceLength[i + 2];
    }
    --numSlices;

    /*
     * Find where the first element of run2 goes in run1. Prior elements
     * in run1 can be ignored (because they're already in place).
     */
    final int k = gallopRight(elements.get(base2), elements.subSet(base1, base1 + length1), 0, order);
    assert k >= 0;
    base1 += k;
    length1 -= k;
    if (length1 == 0) {
      return;
    }

    /*
     * Find where the last element of run1 goes in run2. Subsequent elements
     * in run2 can be ignored (because they're already in place).
     */
    length2 = gallopLeft(elements.get(base1 + length1 - 1),
                      elements.subSet(base2, base2 + length2),
                      length2 - 1,
                      order);
    assert length2 >= 0;
    if (length2 > 0) {
      // Merge remaining runs, using tmp array with min(len1, len2) elements
      if (length1 <= length2) {
        mergeLo(base1, length1, base2, length2);
      }
      else {
        mergeHi(base1, length1, base2, length2);
      }
    }
    if (false) { // only enable for tests where we know that all elements are different
      // this is helpful to figure out where things went wrong
      final int end = sliceBase[i] + sliceLength[i];
      for (int j = sliceBase[i] + 1; j < end; ++j) {
        final T e1 = elements.get(j - 1);
        final T e2 = elements.get(j);
        assert e1 != e2 // ATTENTION: this shortcut will not work in general!
               && order.check(elements.get(j - 1), elements.get(j)) != Order.Descending;
      }
    }
  }

  /**
   * Get the temporary array.
   * This array is used for merging, and code takes care that always
   * the smaller of two slices is defining the size of this array.
   * @param capacity capacity of the returned array
   * @return an array of {@code T} with at least the required capacity
   */
  @SuppressWarnings("unchecked") // returned array is always used as T[]
  private T[] tmpArray(int capacity)
  {
    if (tmp.length < capacity) {
      final int tmpLen = Math.max(Integer.highestOneBit(capacity) << 1,
                                  capacity);
      tmp = new Object[tmpLen];
    }
    return (T[])tmp;
  }

  /**
   * Merge two adjacent slices with the left slice being smaller.
   * @param base1 start of the first slice
   * @param length1  length of the first slice
   * @param base2 start of the second slice
   * @param length2  length of the second slice
   */
  private void mergeLo(int base1, int length1,
                       int base2, int length2)
  {
    assert length1 > 0 && length2 > 0 && base1 + length1 == base2;

    // Copy first run into temp array
    final MutableIndexable<T> a = this.elements; // For performance
    final T[] tmp = tmpArray(length1);
    int cursor1 = 0;       // Indexes into tmp array
    int cursor2 = base2;   // Indexes int a
    int dest = base1;      // Indexes int a
    a.addToArray(tmp, cursor1, base1, length1);

    // Move first element of second run and deal with degenerate cases
    a.set(dest++, a.get(cursor2++));
    if (--length2 == 0) {
      a.setFromArray(tmp, cursor1, dest, length1);
      return;
    }
    if (length1 == 1) {
      a.copyInternally(cursor2, dest, length2);
      a.set(dest + length2, tmp[cursor1]); // Move last element of run 1 to end of merge
      return;
    }

    final Ordering<? super T> order = this.order;  // Use local variable for performance
    int minGallop = this.minGallop;    //  "    "       "     "      "
  outer:
    while (true) {
      int count1 = 0; // Number of times in a row that first run won
      int count2 = 0; // Number of times in a row that second run won

      /*
       * Do the straightforward thing until (if ever) one run starts
       * winning consistently.
       */
      do {
        assert length1 > 1 && length2 > 0;
        if (order.check(a.get(cursor2), tmp[cursor1]) == Order.Ascending) {
          a.set(dest++, a.get(cursor2++));
          ++count2;
          count1 = 0;
          if (--length2 == 0) {
            break outer;
          }
        } else {
          a.set(dest++, tmp[cursor1++]);
          ++count1;
          count2 = 0;
          if (--length1 == 1) {
            break outer;
          }
        }
      } while (count1 < minGallop || count2 < minGallop);

      /*
       * One run is winning so consistently that galloping may be a
       * huge win. So try that, and continue galloping until (if ever)
       * neither run appears to be winning consistently anymore.
       */
      do {
        assert length1 > 1 && length2 > 0;
        count1 = gallopRight(a.get(cursor2), Indexable.viewArray(tmp, cursor1, length1), 0, order);
        if (count1 != 0) {
          a.setFromArray(tmp, cursor1, dest, count1);
          dest += count1;
          cursor1 += count1;
          length1 -= count1;
          if (length1 <= 1) {
            break outer;
          }
        }
        a.set(dest++, a.get(cursor2++));
        if (--length2 == 0) {
          break outer;
        }

        count2 = gallopLeft(tmp[cursor1], a.subSet(cursor2, length2), 0, order);
        if (count2 != 0) {
          a.copyInternally(cursor2, dest, count2);
          dest += count2;
          cursor2 += count2;
          length2 -= count2;
          if (length2 == 0)
            break outer;
        }
        a.set(dest++, tmp[cursor1++]);
        if (--length1 == 1) {
          break outer;
        }
        --minGallop;
      } while (count1 >= MIN_GALLOP | count2 >= MIN_GALLOP);

      if (minGallop < 0) {
        minGallop = 0;
      }
      minGallop += MIN_GALLOP_PENALTY;  // Penalize for leaving gallop mode
    }  // End of "outer" loop

    this.minGallop = Math.max(minGallop, 1);  // Write back to field

    switch (length1) {
    case 0:
      throw new IllegalArgumentException("Order violates contract!");
    case 1:
      assert length2 > 0;
      a.copyInternally(cursor2, dest, length2);
      a.set(dest + length2, tmp[cursor1]); //  Last element of run 1 to end of merge
      break;
    default:
      assert length2 == 0;
      a.setFromArray(tmp, cursor1, dest, length1);
      break;
    }
  }

  /**
   * Merge two adjacent slices with the right slice being smaller.
   * @param base1 start of the first slice
   * @param length1  length of the first slice
   * @param base2 start of the second slice
   * @param length2  length of the second slice
   */
  private void mergeHi(int base1, int length1, int base2, int length2) {
    assert length1 > 0  &&  length2 > 0  &&  base1 + length1 == base2;

    // Copy second run into temp array
    final MutableIndexable<T> a = this.elements; // For performance
    final T[] tmp = tmpArray(length2);
    a.addToArray(tmp, 0, base2, length2);

    int cursor1 = base1 + length1 - 1;  // Indexes into a
    int cursor2 = length2 - 1; // Indexes into tmp array
    int dest = base2 + length2 - 1;     // Indexes into a

    // Move last element of first run and deal with degenerate cases
    a.set(dest--, a.get(cursor1--));
    if (--length1 == 0) {
      a.setFromArray(tmp, 0, dest - (length2 - 1), length2);
      return;
    }
    if (length2 == 1) {
      dest -= length1;
      cursor1 -= length1;
      a.copyInternally(cursor1 + 1, dest + 1, length1);
      a.set(dest, tmp[cursor2]);
      return;
    }

    final Ordering<? super T> order = this.order;  // Use local variable for performance
    int minGallop = this.minGallop;    //  "    "       "     "      "
    outer:
    while (true) {
      int count1 = 0; // Number of times in a row that first run won
      int count2 = 0; // Number of times in a row that second run won

      /*
       * Do the straightforward thing until (if ever) one run
       * appears to win consistently.
       */
      do {
        assert length1 > 0 && length2 > 1;
        if (order.check(tmp[cursor2], a.get(cursor1)) == Order.Ascending) {
          a.set(dest--, a.get(cursor1--));
          ++count1;
          count2 = 0;
          if (--length1 == 0) {
            break outer;
          }
        } else {
          a.set(dest--, tmp[cursor2--]);
          ++count2;
          count1 = 0;
          if (--length2 == 1) {
            break outer;
          }
        }
      } while (count1 < minGallop  ||  count2 < minGallop);

      /*
       * One run is winning so consistently that galloping may be a
       * huge win. So try that, and continue galloping until (if ever)
       * neither run appears to be winning consistently anymore.
       */
      do {
        assert length1 > 0 && length2 > 1;
        count1 = length1 - gallopRight(tmp[cursor2], a.subSet(base1, length1), length1 - 1, order);
        if (count1 != 0) {
          dest    -= count1;
          cursor1 -= count1;
          length1    -= count1;
          a.copyInternally(cursor1 +1, dest + 1, count1);
          if (length1 == 0) {
            break outer;
          }
        }
        a.set(dest--, tmp[cursor2--]);
        if (--length2 == 1) {
          break outer;
        }

        count2 = length2 - gallopLeft(a.get(cursor1), Indexable.viewArray(tmp, 0, length2), length2 - 1, order);
        if (count2 != 0) {
          dest -= count2;
          cursor2 -= count2;
          length2 -= count2;
          a.setFromArray(tmp, cursor2 + 1,dest + 1, count2);
          if (length2 <= 1) {
            break outer;
          }
        }
        a.set(dest--, a.get(cursor1--));
        if (--length1 == 0) {
          break outer;
        }
        --minGallop;
      } while (count1 >= MIN_GALLOP | count2 >= MIN_GALLOP);

      if (minGallop < 0) {
        minGallop = 0;
      }
      minGallop += MIN_GALLOP_PENALTY;  // Penalize for leaving gallop mode
    }  // End of "outer" loop
    this.minGallop = Math.max(minGallop, 1);  // Write back to field

    switch (length2) {
    case 0:
      throw new IllegalArgumentException("Order is broken!");
    case 1:
      assert length1 > 0;
      dest -= length1;
      cursor1 -= length1;
      a.copyInternally(cursor1 + 1,  dest + 1, length1);
      a.set(dest, tmp[cursor2]);  // Move first element of run2 to front of merge
      break;
    default:
      assert length1 == 0  &&  length2 > 0;
      a.setFromArray(tmp, 0, dest - (length2 - 1), length2);
      break;
    }
  }

  /**
   * Sort the comparable elements of a mutable indexable in their natural order.
   * @param elements elements to sort
   * @param <E> element type of the indexable
   */
  public static <E extends Comparable<? super E>> void sort(@NotNull MutableIndexable<E> elements)
  {
    sort(elements, Ordering.natural());
  }

  /**
   * Sort a mutable indexable in the given order.
   * @param elements elements to sort
   * @param order    order defined for elements
   * @param <E> element type of the indexable
   * @throws IllegalArgumentException if order fails its invariant
   */
  public static <E> void sort(@NotNull MutableIndexable<E> elements,
                              @NotNull Ordering<? super E> order)
  {
    int nRemaining = elements.size();
    if (nRemaining < 2) {
      return;
    }

    final TimSort<E> tim = new TimSort<>(elements, order);
    final int minRun = mergeComputeMinrun(nRemaining);

    int lo = 0;
    final int hi = nRemaining;
    do  {
      /* Identify next run. */
      int runLen = countRun(elements, lo, hi, order);
      /* If short, extend to min(minrun, nremaining). */
      if (runLen < minRun) {
        final int force = Math.min(nRemaining, minRun);
        binarySort(elements.subSet(lo, lo + force), runLen, order);
        runLen = force;
      }
      /* Push run onto pending-runs stack, and maybe merge. */
      tim.addSlice(lo, runLen);
      tim.mergeCollapse();

      lo += runLen;
      nRemaining -= runLen;
    } while (nRemaining != 0);

    assert lo == hi;
    tim.mergeForceCollapse();

    assert tim.numSlices == 1;
  }

  /**
   * Regardless of invariants, merge all run slices on the stack until
   * one remains. This is performed at the end of the sort.
   */
  private void mergeForceCollapse()
  {
    while (numSlices > 1) {
      int n = numSlices - 2;
      if (n > 0 && sliceLength[n - 1] < sliceLength[n + 1]) {
        --n;
      }
      mergeAt(n);
    }
  }

  /**
   * Compute a good value for the minimum run length.
   * Natural orders shorter than this are boosted artificially via binary insertion.
   * Returns {@code n} if it is less than {@link #MIN_MERGE} because it's too small
   * for fancy stuff.
   * <p>
   * See <a href="http://svn.python.org/projects/python/trunk/Objects/listsort.txt">http://svn.python.org/projects/python/trunk/Objects/listsort.txt</a> for more info.
   * @param n size of remaining run
   * @return useful value for minimal run length
   */
  static int mergeComputeMinrun(int n)
  {
    int r = 0;

    assert n >= 0;
    while (n >= MIN_MERGE) {
      r |= n & 1;
      n >>= 1;
    }
    return n + r;
  }

  /**
   * Binary sort used for sorting small slices.
   * Binary sort does few compares, but ca do data movement quadratic in the number of elements.
   * @param elements (slice of) elements to sort
   * @param start    start value for pivot (0 <= start < elements.size())
   * @param order    ordering
   * @param <E> element type
   */
  private static <E> void binarySort(@NotNull MutableIndexable<E> elements,
                                     int start,
                                     @NotNull Ordering<? super E> order)
  {
    final int hi = elements.size();
    assert 0 <= start  && start <= elements.size();

    if (start == 0) {
      ++start;
    }
    for ( ;  start < hi;  ++start) {
      /* set lo to where *start belongs */
      int l = 0; // left
      int r = start; // right
      final E pivot = elements.get(r);
      /* Invariants:
       * pivot >= all in [lo, l).
       * pivot  < all in [r, start).
       * The second is vacuously true at the start.
       */
      assert l < r;
      do {
        final int mid = (l + r) >> 1;
        if (order.check(pivot, elements.get(mid)) == Order.Ascending) {
          r = mid;
        }
        else {
          l = mid + 1;
        }
      } while (l < r);
      assert l == r;
      /* The invariants still hold, so pivot >= all in [lo, l) and
         pivot < all in [l, start), so pivot belongs at l.  Note
         that if there are elements equal to pivot, l points to the
         first slot after them -- that's why this sort is stable.
         Slide over to make room. */
      for (int p = start;  p > l;  --p) {
        elements.set(p, elements.get(p - 1));
      }
      elements.set(l, pivot);
    }
  }

  /**
   * Get the length of the next run of elements which are ascending or descending.
   * Revert the order of the elements in the second case.
   * This is a combination of original {@code count_run()} method and post-processing
   * performed elsewhere.
   *
   * @param elements (slice of) elements to analyze (may get reordered)
   * @param lo       start index
   * @param hi       max index
   * @param order    ordering
   * @return number of ascending elements waiting at position {@code lo},
   *         at least {@code 2}
   * @param <E> element type
   */
  private static <E> int countRun(@NotNull MutableIndexable<E> elements,
                                  final int lo, final int hi,
                                  @NotNull Ordering<? super E> order)
  {
    assert lo < hi;
    int runHi = lo + 1;
    if (runHi == hi) {
      return 1;
    }

    // Find end of run, and reverse range if descending
    if (order.check(elements.get(lo), elements.get(runHi++)) == Order.Descending) {
      while (runHi < hi  && order.check(elements.get(runHi), elements.get(runHi - 1)) == Order.Ascending) {
        ++runHi;
      }
      elements.revert(lo, runHi - 1);
    }
    else {
      while (runHi < hi  && order.check(elements.get(runHi - 1), elements.get(runHi)) != Order.Descending) {
        ++runHi;
      }
    }

    return runHi - lo;
  }

  /**
   * Get the position where to insert the specified key into a slice
   * of the original indexable.
   * If there are elements which are considered equal to {@code key},
   * the index of the left-most of these elements is returned.
   * @param key   key for which an insertion position is requested
   * @param a     a slice of the indexable which is sorted, expected to be sorted
   * @param hint  start point of the search
   * @param order ordering
   * @return insertion index
   * @param <E> element type
   */
  private static <E> int gallopLeft(final E key,
                                    @NotNull final Indexable<E> a,
                                    final int hint,
                                    @NotNull final Ordering<? super E> order)
  {
    final int n = a.size();
    assert hint >= 0  &&  hint < n;

    int lastofs = 0;
    int ofs = 1;
    if (order.check(a.get(hint), key) == Order.Ascending) {
      /* a[hint] < key -- gallop right, until
       * a[hint + lastofs] < key <= a[hint + ofs]
       */
      final int maxofs = n - hint;
      while (ofs < maxofs &&
             order.check(a.get(hint + ofs), key) == Order.Ascending) {
        lastofs = ofs;
        ofs = (ofs << 1) + 1;
        if (ofs <= 0) { // int overflow
          ofs = maxofs;
        }
      }
      if (ofs > maxofs) {
        ofs = maxofs;
      }
      // Translate offsets to be relative to a's start
      lastofs += hint;
      ofs     += hint;
    }
    else {
      /* key <= a[hint] -- gallop left, until
       * a[hint - ofs] < key <= a[hint - lastofs]
       */
      final int maxofs = hint + 1;
      while (ofs < maxofs  &&
             order.check(a.get(hint - ofs), key) != Order.Ascending) {
        /* key <= a[hint - ofs] */
        lastofs = ofs;
        ofs = (ofs << 1) + 1;
        if (ofs <= 0) {              /* int overflow */
          ofs = maxofs;
        }
      }
      if (ofs > maxofs) {
        ofs = maxofs;
      }

      // Translate offsets to be relative to a's start
      final int k = lastofs;
      lastofs = hint - ofs;
      ofs = hint - k;
    }

    assert -1 <= lastofs  &&  lastofs < ofs  &&  ofs <= n;
    /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the
     * right of lastofs but no farther right than ofs.  Do a binary
     * search, with invariant a[lastofs-1] < key <= a[ofs].
     */
    ++lastofs;
    while (lastofs < ofs) {
      final int m = lastofs + ((ofs - lastofs) >>> 1);

      if (order.check(a.get(m), key) == Order.Ascending) {
        lastofs = m + 1;              /* a[m] < key */
      }
      else {
        ofs = m;                      /* key <= a[m] */
      }
    }
    assert lastofs == ofs;            /* so a[ofs-1] < key <= a[ofs] */
    return ofs;
  }

  /**
   * Get the position where to insert the specified key into a slice
   * of the original indexable.
   * If there are elements which are considered equal to {@code key},
   * the index after the right-most of these elements is returned.
   *
   * @param key   key for which an insertion position is requested
   * @param a     a slice of the indexable which is sorted, expected to be sorted
   * @param hint  start point of the search
   * @param order ordering
   * @return insertion index
   * @param <E> element type
   */
  private static <E> int gallopRight(final E key,
                                     @NotNull final Indexable<E> a,
                                     final int hint,
                                     @NotNull final Ordering<? super E> order)
  {
    final int n = a.size();
    assert hint >= 0  &&  hint < n;

    int lastofs = 0;
    int ofs = 1;
    if (order.check(key, a.get(hint)) == Order.Ascending) {
      /* key < a[hint] -- gallop left, until
       * a[hint - ofs] <= key < a[hint - lastofs]
       */
      final int maxofs = hint + 1;             /* &a[0] is lowest */
      while (ofs < maxofs &&
             order.check(key, a.get(hint - ofs)) == Order.Ascending) {
        lastofs = ofs;
        ofs = (ofs << 1) + 1;
        if (ofs <= 0) {                  /* int overflow */
          ofs = maxofs;
        }
      }
      if (ofs > maxofs) {
        ofs = maxofs;
      }
      // Translate back to offsets relative to a's start
      final int k = lastofs;
      lastofs = hint - ofs;
      ofs = hint - k;
    }
    else {
      /* key <= a[hint] -- gallop left, until
       * a[hint - ofs] < key <= a[hint - lastofs]
       */
      final int maxofs = n - hint;         /* &a[n-1] is highest */
      while (ofs < maxofs &&
             order.check(key, a.get(hint + ofs)) != Order.Ascending) {
        /* a[hint + ofs] <= key */
        lastofs = ofs;
        ofs = (ofs << 1) + 1;
        if (ofs <= 0) {              /* int overflow */
          ofs = maxofs;
        }
      }
      if (ofs > maxofs) {
        ofs = maxofs;
      }
      // Translate back to positive offsets relative to a's start
      lastofs += hint;
      ofs += hint;
    }

    assert -1 <= lastofs && lastofs < ofs && ofs <= n;
    /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the
     * right of lastofs but no farther right than ofs.  Do a binary
     * search, with invariant a[lastofs-1] < key <= a[ofs].
     */
    ++lastofs;
    while (lastofs < ofs) {
      final int m = lastofs + ((ofs - lastofs) >>> 1);

      if (order.check(key, a.get(m)) == Order.Ascending) {
        ofs = m;                    /* key < a[m] */
      }
      else {
        lastofs = m+1;              /* a[m] <= key */
      }
    }
    assert lastofs == ofs;             /* so a[ofs-1] <= key < a[ofs] */
    return ofs;
  }
}
