"use strict";

Object.defineProperty(exports, "__esModule", {
  value: true
});
exports.tokenize = tokenize;
exports.searchify = searchify;
exports.generateNgrams = generateNgrams;
exports.generatePrefixNgrams = generatePrefixNgrams;

var _ngrams = _interopRequireDefault(require("./vendor/natural/ngrams/ngrams"));

var _regexp_tokenizer = require("./vendor/natural/tokenizers/regexp_tokenizer");

function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }

const debug = require('debug')('qm:natural');

const tokenizer = new _regexp_tokenizer.WordTokenizer();

const flatten = arr => arr.reduce((acc, val) => acc.concat(Array.isArray(val) ? flatten(val) : val), []);

const range = (min, max) => [...Array(max - min + 1)].map((_, i) => i + min); // Using a similar strategy to https://medium.com/xeneta/fuzzy-search-with-mongodb-and-python-57103928ee5d#.xdxu0stiq
// with some different tuning


function tokenize(phrase) {
  return tokenizer.tokenize(phrase);
}

function searchify(phrase) {
  const trimmed = phrase.trim();
  const noAnchor = trimmed.startsWith('*');
  const tokens = tokenizer.tokenize(trimmed.replace(/^[*]+|[*]+$/g, '').toLowerCase());
  return [noAnchor, tokens];
}

function generateNgrams(phrase, min = 3, max = 5, prefix = false) {
  const words = tokenize(phrase.toLowerCase());
  const ngrams = flatten(range(min, max).map(n => flatten(words.map(word => {
    return _ngrams.default.ngrams(word.split(''), n).map(letter => letter.join(''));
  }))));
  return prefix ? ngrams.filter(n => words.some(w => w.startsWith(n))) : ngrams;
}

function generatePrefixNgrams(phrase, min, max) {
  return generateNgrams(phrase, min, max, true);
}

debug('loaded');