import deburr from 'lodash/deburr';
import kebabCase from 'lodash/kebabCase';

/**
 * List of words to ignore.
 */
const STOP_WORDS = ['and', 'by', 'it', 'of', 'or', 'the'];

const KEYWORD_SEPARATOR = ' ';

export function getKeywords(text: string): string[] {
  const normalizedText = kebabCase(deburr(text.replaceAll('.', '')));

  return (
    normalizedText
      .split('-')
      // Only keep words that are at least 2 letters long.
      .filter((word) => word.length > 1)
      // Ignore stop-words.
      .filter((word) => !STOP_WORDS.includes(word))
      // Take a maximum of 10 words.
      .slice(0, 10)
      // Keep first 10 letters of each word to save space and reduce
      // the chance for typos.
      .map((word) => word.substring(0, 10))
  );
}

export function getKeywordDatabase(text: string): string[] {
  const keywords = getKeywords(text);

  if (keywords.length < 2) {
    return keywords;
  }

  const db = [];

  for (let i = 0; i < keywords.length; i++) {
    db.push(keywords.slice(0, i + 1).join(KEYWORD_SEPARATOR));
  }

  return db;
}

export function getKeywordSearchQuery(text: string): string {
  return getKeywords(text).join(KEYWORD_SEPARATOR);
}
