// regex is taken from https://github.com/regexhq/word-regex
// split strings to a group of alphanumerical characters(including CJK) and/or

// -,',&,#,;,’ which translates to number of words
export const WORD_REGEX =
  /[a-zA-Z0-9_\-–—'’\u0392-\u03c9\u0400-\u04FF\u0027]+|[\u4E00-\u9FFF\u3400-\u4dbf\uf900-\ufaff\u3040-\u309f\uac00-\ud7af\u0400-\u04FF]+|[\u00E4\u00C4\u00E5\u00C5\u00F6\u00D6]+|[\u0531-\u0556\u0561-\u0586\u0559\u055A\u055B]+|\w+/gu;

/**
 * Extend a `basseRegex` with prefix, postfix, and flags, creating a new RegExp.
 */
export function buildRegex(
  baseRegex: RegExp,
  prefix = "",
  postfix = "",
  flags?: string
) {
  return new RegExp(
    `${prefix}${baseRegex.source}${postfix}`,
    flags ?? baseRegex.flags
  );
}

/**
 * Combine 2 regexps with a boolean OR.
 *
 * e.g.
 * const a = /regexa/
 * const b = /regexb/
 * const c = combineRegex(a, b) === /((regexa)|(regexb))/
 */
export function combineRegex(
  firstRegex: RegExp,
  secondRegex: RegExp,
  flags?: string
) {
  return new RegExp(`((${firstRegex.source})|(${secondRegex.source}))`, flags);
}

/**
 * RegExp for well formed URLs.
 * Taken from https://gist.github.com/dperini/729294
 */
export const BASE_URL_REGEX = new RegExp(
  // protocol identifier (optional)
  "(?:(?:(?:https?):)?\\/\\/)?" +
    // user:pass BasicAuth (optional)
    "(?:\\S+(?::\\S*)?@)?" +
    "(?:" +
    // IP address exclusion
    // private & local networks
    "(?!(?:10|127)(?:\\.\\d{1,3}){3})" +
    "(?!(?:169\\.254|192\\.168)(?:\\.\\d{1,3}){2})" +
    "(?!172\\.(?:1[6-9]|2\\d|3[0-1])(?:\\.\\d{1,3}){2})" +
    // IP address dotted notation octets
    // excludes loopback network 0.0.0.0
    // excludes reserved space >= 224.0.0.0
    // excludes network & broadcast addresses
    // (first & last IP address of each class)
    "(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])" +
    "(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}" +
    "(?:\\.(?:[1-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))" +
    "|" +
    // host & domain names, may end with dot
    // can be replaced by a shortest alternative
    // (?![-_])(?:[-\\w\\u00a1-\\uffff]{0,63}[^-_]\\.)+
    "(?:" +
    "(?:" +
    "[a-z0-9\\u00a1-\\uffff]" +
    "[a-z0-9\\u00a1-\\uffff_-]{0,62}" +
    ")?" +
    "[a-z0-9\\u00a1-\\uffff]\\." +
    ")+" +
    // TLD identifier name, may end with dot
    "(?:[a-z\\u00a1-\\uffff]{2,}\\.?)" +
    ")" +
    // port number (optional)
    "(?::\\d{2,5})?" +
    // resource path (optional)
    "(?:[/?#]\\S*)?",
  "i"
);

/** RegExp for a complete well formed URL. */
export const URL_REGEX = buildRegex(BASE_URL_REGEX, "^", "$", "gui");
export const URL_REGEX_EXACT = buildRegex(BASE_URL_REGEX, "^", "$", "ui");

export const URL_OR_WORD_REGEX = combineRegex(BASE_URL_REGEX, WORD_REGEX, "g");

/** RegEx for parsing Markdown style `CodeBlock`*/
export const MARKDOWN_CODEBLOCK_REGEX = new RegExp(
  /```([a-z]*)\n([\s\S]*?)\n```/,
  "ui"
);

/** RegEx to parse markdown style input for `CodeBlock` along with the language   */
export const CODEBLOCK_MARKDOWN_INPUT_REGEX = /^```([a-z]+)?[\s\n]$/;

/** RegEx to parse markdown style block equation `$$ <equation> $$` */
export const BLOCK_MATH_REGEX = new RegExp(
  /(?:^|\s)(?:\$\$)(((?!\$).)*)(?:\$\$)/,
  "ui"
);

/** RegEx to parse markdown style inline equation `$ <equation> $` */
export const INLINE_MATH_REGEX = new RegExp(
  /(?:^|\s)(?!\$\$(?:((?!\$).)+)\$\$)(?:\$(((?!\$).)+)((?!\$\$)\$))/,
  "ui"
);
