import { Fragment, Node as PMNode } from "@tiptap/pm/model";

import { URL_OR_WORD_REGEX } from "./regex";
import { isValidURL } from "./url";
import { textBetween } from "./word-count";

/**
 * Create a set of unique words and the total word count for the given range in
 * a Node (and its decendants).
 */
export function tokeniseUniqueWords(
  doc: PMNode,
  from: number,
  to: number
): [Set<string>, number] {
  const words = new Set<string>();
  let wordCount = 0;

  const text = textBetween(doc, " ", from, to);
  const matches = wordRegexMatchAll(text);
  for (const match of matches) {
    words.add(match[0]);
    wordCount++;
  }

  return [words, wordCount];
}

// /**
//  * Create a set of unique words and the total word count for the given range in
//  * a Node (and its decendants).
//  */
// export function tokeniseUniqueWords(
//   doc: PMNode,
//   from: number,
//   to: number
// ): [Set<string>, number] {
//   const words = new Set<string>();
//   let wordCount = 0;
//   processTextBetween(doc, from, to, (text, _from, leftover) => {
//     if (leftover) wordCount++;
//     const matches = wordRegexMatchAll(text);
//     for (const match of matches) {
//       words.add(match[0]);
//       wordCount++;
//     }
//   });

//   return [words, wordCount];
// }

/**
 * Process chunks of tokenisable plain text under node `topNode` in the given range.
 *
 * A chunk of text is considered tokenisable if it contains "word" tokens. The
 * end boundary of such a chunk might contain additional leftover text which was
 * not considered tokenisable and should be considered as one opaque word token.
 *
 * This condition to decide what's an opaque leftover can be customised by
 * passing a custom `ignoreNode` predicate. The default heuristic is to check
 * for text nodes which should behave as 1 single word (like link nodes with a
 * valid URL).
 *
 * @param topNode start node whose descendants are iterated over
 * @param from start position inside the `topNode`
 * @param to end position inside the `topNode`
 * @param onProcessChunk callback receiving the tokenisable plain text, the start position of this text, and extra
 *   single token leftover if any.
 * @param ignoreNode optional predicate to provide the condition where a text
 *   node should be ignored or not considered tokenisable, and instead included in
 *   the leftover chunk. As mentioned, this node becomes the cutoff point for the
 *   current chunk.
 */
export function processTextBetween(
  topNode: PMNode | Fragment,
  from: number,
  to: number,
  onProcessChunk: (
    text: string,
    textFrom: number,
    leftover: string | null
  ) => void,
  ignoreNode: (node: PMNode) => boolean = isOneWordNode
) {
  let textFrom = from;
  let text = "";
  let startConcat = false;
  topNode.nodesBetween(from, to, (node, pos) => {
    if (node.isText) {
      if (startConcat) textFrom = pos;
      const isLeftover = ignoreNode(node);
      if (isLeftover) {
        onProcessChunk(text, textFrom, node.text ?? null);
        startConcat = true;
        text = "";
      } else {
        text += node.text;
        startConcat = false;
      }
    } else if (node.isBlock) {
      if (ignoreNode(node)) return false;
      onProcessChunk(text, textFrom, null);
      startConcat = true;
      text = "";
    } else if (node.type.name === "hardBreak") {
      text += "\n";
      startConcat = false;
    } else if (node.isInline) {
      const isLeftover = ignoreNode(node);
      if (isLeftover) {
        onProcessChunk(text, textFrom, null);
        startConcat = true;
        text = "";
        return false;
      } else {
        text += node.text;
        startConcat = false;
      }
    }
  });
  if (text !== "") {
    onProcessChunk(text, textFrom, null);
  }
}

function isOneWordNode(node: PMNode): boolean {
  const isLink = node.marks.some((mark) => mark.type.name === "link");
  if (isLink) {
    return isValidURL(node.text ?? "");
  }
  return false;
}

/** Run a Regexp `match` on a given string using a well-defined `WORD_REGEX`. */
export function wordRegexMatch(text: string): RegExpMatchArray | null {
  return text.match(URL_OR_WORD_REGEX);
}

/**  Run a Regexp `matchAll` on a given string using a well-defined `WORD_REGEX`. */
export function wordRegexMatchAll(
  text: string
): IterableIterator<RegExpMatchArray> {
  return text.matchAll(URL_OR_WORD_REGEX);
}
