/**
 * `[\p{L}\p{M}*+]` represents a single letter in any script whether or not it is encoded using one
 * or two Unicode code points (see https://www.regular-expressions.info/unicode.html for details).
 *
 * `\p{Nd}` represents a digit zero through nine in any script except ideographic scripts.
 *
 * The group at the end is how I translated `\b` into match Unicode word boundaries, since
 * JavaScript does not respect Unicode letters for word boundaries.
 *
 * TODO: This expression allows Unicode letter characters before the hash sign, which it shouldn't.
 *       This is because I couldn't figure out how to convert `\B` properly. An example of this is
 *       `ä#test` matching this expression. [twl 13.Jul.20]
 *
 * You can explore my tester at https://regex101.com/r/78GYyD/5
 *
 * TODO: Add the XRegExp library for this, since Firefox doesn't support Unicode extensions:
 *       https://javascript.info/regexp-unicode . The alternate format basically allows anything
 *       that is not a basic ASCII character, which is too broad. [twl 13.Jul.20]
 */
//const tagFormat = /\B#([\p{L}\p{M}*+][-_\p{Nd}\p{L}\p{M}*+]+([^\p{L}\p{M}*+]|[^\p{L}\p{M}*+][\p{L}\p{M}*+]|[\p{L}\p{M}*+][^\p{L}\p{M}*+]))/gui;
export const tagFormat = /\B#(([^\x20-\x7F]|[a-z])(?:[^\x20-\x7F]|[\w-])+)/gui;

/**
 * Parses tags in the string specified with hashtags and returns the result as an array of tags.
 *
 * Tags must start with a letter (in any script) and then can contain letters, numbers, the dash or
 * underscore (in any script).
 *
 * @param value - A string that may contain tags
 *
 * @returns An array of the tags in the string, or an empty array if no tags are found
 */
export function parseTags(value: string): string[] {
  const matches = (typeof value === 'string' && value.match(tagFormat)) || [];
  const tags: string[] = [];

  for (const match of matches) {
    const tag = match.substring(1);                                 // remove starting #

    if (!tags.includes(tag)) {
      tags.push(tag);
    }
  }

  return tags;
}
