// ============================================================================
// COPYRIGHT NOTICE
// ----------------------------------------------------------------------------
// (This is the open source ISC license, see
// http://en.wikipedia.org/wiki/ISC_license
// for more info)
//
// Copyright © 2020-2024  Andreas M. Rammelt <rammi@caff.de>
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
//=============================================================================
// Latest version on https://caff.de/projects/decaff-commons/
//=============================================================================
package de.caff.generics.algorithm;

import de.caff.annotation.NotNull;
import de.caff.generics.CharIndexable;
import de.caff.generics.ExpandableIntIndexable;
import de.caff.generics.IntIndexable;

import java.util.HashMap;
import java.util.Map;

/**
 * Fast search algorithm in sequences of random-accessible char values.
 * <p>
 * Currently only forward search is implemented, but basically both the search indexable
 * and the searched indexable can be {@link CharIndexable#reverse() reversed} before searching.
 * <p>
 * Although based on integers this may also be used for searches in byte
 * or character sequences.
 *
 * @author <a href="mailto:rammi@caff.de">Rammi</a>
 * @since July 07, 2020
 */
public class FastCharSequenceSearch
{
  @NotNull
  private final CharIndexable term;
  @NotNull
  private final Map<Character, int[]> skipInfo = new HashMap<>();

  /**
   * Constructor.
   * @param searchTerm term to search for
   */
  public FastCharSequenceSearch(@NotNull CharIndexable searchTerm)
  {
    term = searchTerm;
    // prepare skip info
    final int length = searchTerm.size();
    for (int i = 0;  i < length;  ++i) {
      final Character item = searchTerm.get(i);
      int[] skip = skipInfo.get(item);
      if (skip == null) {
        skip = new int[] { -i };
      }
      else {
        final int len = skip.length;
        final int[] tmp = new int[len + 1];
        tmp[0] = -i;
        System.arraycopy(skip, 0, tmp, 1, len);
        skip = tmp;
      }
      skipInfo.put(item, skip);
    }
  }

  /**
   * Convenience constructor.
   * @param searchTerm search term
   */
  public FastCharSequenceSearch(@NotNull String searchTerm)
  {
    this(CharIndexable.viewString(searchTerm));
  }

  /**
   * Convenience constructor.
   * @param searchTerm search term
   */
  public FastCharSequenceSearch(@NotNull char ... searchTerm)
  {
    this(CharIndexable.viewArray(searchTerm));
  }

  /**
   * Get the search term.
   * @return search term
   */
  @NotNull
  public CharIndexable getSearchTerm()
  {
    return term;
  }

  /**
   * Find the first appearance of the search term in the given data.
   * @param data data to search through
   * @return index of first appearance of the search term in the given data,
   *         or {@code -1} if there was nothing found
   */
  public int findFirst(@NotNull CharIndexable data)
  {
    return findNext(data, 0);
  }

  /**
   * Find the next appearance of the search term after the given position
   * in the given data.
   * @param data data to search through
   * @param pos position to start the search
   * @return position of next appearance of the search term in the given data after the given position,
   *         or {@code -1} if there was nothing found between the position and the end of data
   */
  public int findNext(@NotNull CharIndexable data, int pos)
  {
    return findNext(data, pos, data.size());
  }

  private boolean checkSimpleMatch(@NotNull CharIndexable data, int pos)
  {
    for (char ch : term) {
      if (ch != data.get(pos)) {
        return false;
      }
      ++pos;
    }
    return true;
  }

  /**
   * Find the next appearance of the search term after the given start position
   * and before the given end position in the given data.
   * @param data data to search through
   * @param pos  position to start the search
   * @param end  position to end the search, the whole search term has to fit before this position to be found
   * @return position of next appearance of the search term in the given data after the given start position
   *         and before the given end position,
   *         or {@code -1} if there was nothing found between the position and the end of data
   */
  public int findNext(@NotNull CharIndexable data, int pos, final int end)
  {
    if (skipInfo.size() < 4) {
      // for small skip infos (i.e. looking for a word with many equal characters, or when looking for a bit sequence)
      // the skipping code degrades and it is faster to search directly
      final int last = end - term.size();
      while (pos < last) {
        if (checkSimpleMatch(data, pos)) {
          return pos;
        }
        ++pos;
      }
      return -1;
    }
    while (pos < end) {
      final int seekStart = Math.min(pos + term.size(), end) - 1;
      final int[] skips = skipInfo.get(data.get(seekStart));
      if (skips != null) {
        // possible match
        int biggestForwardSkip = 0;
        for (int s : skips) {
          int match = checkMatch(data, pos, seekStart, end, s);
          if (match >= 0) {
            return match;
          }
          biggestForwardSkip = Math.min(match, biggestForwardSkip);
        }
        pos += 1 - biggestForwardSkip;
      }
      else {
        pos += term.size();
      }
    }
    return -1;
  }

  /**
   * Check whether a match is found.
   * @param data      data to search through
   * @param startPos  minimal possible start position of current hit
   * @param pos       current search position, necessarily greater than ar equal to {@code startPos}
   * @param end       end position of search
   * @param skip      skip value (negative or {@code 0})
   * @return match position (positive or {@code 0}), or negative skip size if no match was found
   */
  private int checkMatch(@NotNull CharIndexable data, int startPos, int pos, int end, int skip)
  {
    if (skip == 0) {
      // possible match
      return pos + term.size() <= end &&
             equalsSequence(data, pos, term.size())
              ? pos
              : -term.size();
    }
    pos += skip;  // note: skip is negative
    if (pos < startPos) {
      return pos -startPos;
    }
    int biggestForwardSkip = -skip - term.size();
    final int[] skips = skipInfo.get(data.get(pos));
    if (skips == null) {
      return biggestForwardSkip;
    }
    for (int s : skips) {
      final int match = checkMatch(data, startPos, pos, end, s);
      if (match >= 0) {
        return match;
      }
      biggestForwardSkip = Math.min(match, biggestForwardSkip);
    }
    return biggestForwardSkip;
  }

  /**
   * Check whether the search term and a sequence in the data to search through are equal.
   * @param data    data to search through
   * @param pos     position inside {@code data}
   * @param length  length to search
   * @return {@code true} if the data sequence equals the search term<br>
   *         {@code false} if they differ
   */
  private boolean equalsSequence(@NotNull CharIndexable data, final int pos, final int length)
  {
    if (length > term.size()) {
      return false;
    }
    for (int i = 0;  i < length;  ++i) {
      if (term.get(i) != data.get(pos + i)) {
        return false;
      }
    }
    return true;
  }

  /**
   * Find all matches.
   * @param data          data to search
   * @param allowOverlaps allow overlapping sequences in the matches?
   * @return start position of all matches
   */
  @NotNull
  public IntIndexable findAllMatches(@NotNull CharIndexable data,
                                     boolean allowOverlaps)
  {
    ExpandableIntIndexable results = ExpandableIntIndexable.empty();
    int pos = 0;
    int find;
    while ((find = findNext(data, pos)) >= 0) {
      results = results.add(find);
      pos = find + (allowOverlaps
                            ? 1
                            : term.size());
    }
    return results;
  }
}
