
export class TextUtils {

  /**
   * Allow only 0-255 set of characters
   * @param text 
   */
  static onlyLatin(text: any) {
    
    if (!text) {
      return false;
    }
    
    return text.replace(/[^\x00-\xFF]/g, '');
    
  }

  /**
   * This method extract text from any string (i.e. HTML, Markdown, etc.)
   * We do this by:
   * - Extracting only [a-zA-Z'?¿] characters
   * - Removing any extra spaces
   * - Remove any directive (i.e. [AAA](BBB) -> AAA, [AAA](BBB)CCC -> AAACCC, anyText[AAA]{BBB}(CCC) -> AAA) 
   */
  static onlyText(text: string): string {
    if (!text) {
      return '';
    }
    // First, split the text by spaces for further processing
    const words = text.split(' ');

    // Then, we filter out any non-text characters, first, the directive characters
    
    const parsed = words.map((word) => {
      return removeDirectives(word);
    })
    .filter((word) => word.length > 0)
    .map(w => {
      return w.replace(/[^a-zA-Z'?¿]/g, '');
    }).map(w => {
      return w.replace(/\s/g, '');
    }).join(' ');

    return parsed;
  }
}

/**
 * Convert any:
 * - i.e. [AAA](BBB) -> AAA 
 * - [AAA](BBB)CCC -> AAACCC
 * - anyText[AAA]{BBB}(CCC) -> AAA
 * 
 * - We first test for the presence of a directive
 * @param word 
 */
function removeDirectives(word: string): string {
  if (!word) {
    return '';
  }

  // First, check if starts with something like :anything[AAA]{BBB}
  const directive = word.match(/:.*\[(.*)\].*/);
  if (directive) {
    return directive[1];
  }

  // Otherwise, check if it has something like [AAA](BBB){CCC}
  const directive2 = word.match(/\[(.*)\].*/);
  if (directive2) {
    return directive2[1];
  }

  // Return the word as is
  return word;
}
