keeweb/app/scripts/util/phonetic.js

'use strict';

/*
 * Phonetic
 * Copyright 2013 Tom Frost
 */

// removed node.js deps, making it available to load in browser

/**
 * Phonetics that sound best before a vowel.
 * @type {Array}
 */
var PHONETIC_PRE = [
    // Simple phonetics
    'b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p',
    'qu', 'r', 's', 't',
    // Complex phonetics
    'bl',
    'ch', 'cl', 'cr',
    'dr',
    'fl', 'fr',
    'gl', 'gr',
    'kl', 'kr',
    'ph', 'pr', 'pl',
    'sc', 'sh', 'sl', 'sn', 'sr', 'st', 'str', 'sw',
    'th', 'tr',
    'br',
    'v', 'w', 'y', 'z'
];

/**
 * The number of simple phonetics within the 'pre' set.
 * @type {number}
 */
var PHONETIC_PRE_SIMPLE_LENGTH = 16;

/**
 * Vowel sound phonetics.
 * @type {Array}
 */
var PHONETIC_MID = [
    // Simple phonetics
    'a', 'e', 'i', 'o', 'u',
    // Complex phonetics
    'ee', 'ie', 'oo', 'ou', 'ue'
];

/**
 * The number of simple phonetics within the 'mid' set.
 * @type {number}
 */
var PHONETIC_MID_SIMPLE_LENGTH = 5;

/**
 * Phonetics that sound best after a vowel.
 * @type {Array}
 */
var PHONETIC_POST = [
    // Simple phonetics
    'b', 'd', 'f', 'g', 'k', 'l', 'm', 'n', 'p', 'r', 's', 't', 'y',
    // Complex phonetics
    'ch', 'ck',
    'ln',
    'nk', 'ng',
    'rn',
    'sh', 'sk', 'st',
    'th',
    'x', 'z'
];

/**
 * The number of simple phonetics within the 'post' set.
 * @type {number}
 */
var PHONETIC_POST_SIMPLE_LENGTH = 13;

/**
 * A mapping of regular expressions to replacements, which will be run on the
 * resulting word before it gets returned.  The purpose of replacements is to
 * address language subtleties that the phonetic builder is incapable of
 * understanding, such as 've' more pronounceable than just 'v' at the end of
 * a word, 'ey' more pronounceable than 'iy', etc.
 * @type {{}}
 */
var REPLACEMENTS = {
    'quu': 'que',
    'qu([aeiou]){2}': 'qu$1',
    '[iu]y': 'ey',
    'eye': 'ye',
    '(.)ye$': '$1y',
    '(^|e)cie(?!$)': '$1cei',
    '([vz])$': '$1e',
    '[iu]w': 'ow'
};

/**
 * Adds a single syllable to the word contained in the wordObj.  A syllable
 * contains, at maximum, a phonetic from each the PRE, MID, and POST phonetic
 * sets.  Some syllables will omit pre or post based on the
 * options.compoundSimplicity.
 *
 * @param {{word, numeric, lastSkippedPre, lastSkippedPost, opts}} wordObj The
 *      word object on which to operate.
 */
function addSyllable(wordObj) {
    var deriv = getDerivative(wordObj.numeric),
        compound = deriv % wordObj.opts.compoundSimplicity === 0,
        first = wordObj.word === '',
        preOnFirst = deriv % 6 > 0;
    if ((first && preOnFirst) || wordObj.lastSkippedPost || compound) {
        wordObj.word += getNextPhonetic(PHONETIC_PRE,
            PHONETIC_PRE_SIMPLE_LENGTH, wordObj);
        wordObj.lastSkippedPre = false;
    } else {
        wordObj.lastSkippedPre = true;
    }
    wordObj.word += getNextPhonetic(PHONETIC_MID, PHONETIC_MID_SIMPLE_LENGTH,
        wordObj, first && wordObj.lastSkippedPre);
    if (wordObj.lastSkippedPre || compound) {
        wordObj.word += getNextPhonetic(PHONETIC_POST,
            PHONETIC_POST_SIMPLE_LENGTH, wordObj);
        wordObj.lastSkippedPost = false;
    } else {
        wordObj.lastSkippedPost = true;
    }
}

/**
 * Gets a derivative of a number by repeatedly dividing it by 7 and adding the
 * remainders together.  It's useful to base decisions on a derivative rather
 * than the wordObj's current numeric, as it avoids making the same decisions
 * around the same phonetics.
 *
 * @param {number} num A number from which a derivative should be calculated
 * @returns {number} The derivative.
 */
function getDerivative(num) {
    var derivative = 1;
    while (num) {
        derivative += num % 7;
        num = Math.floor(num / 7);
    }
    return derivative;
}

/**
 * Combines the option defaults with the provided overrides.  Available
 * options are:
 *  - seed: A string or number with which to seed the generator.  Using the
 *          same seed (with the same other options) will coerce the generator
 *          into producing the same word.  Default is random.
 *  - phoneticSimplicity: The greater this number, the simpler the phonetics.
 *          For example, 1 might produce 'str' while 5 might produce 's' for
 *          the same syllable.  Minimum is 1, default is 5.
 *  - compoundSimplicity: The greater this number, the less likely the
 *          resulting word will sound "compound", such as "ripkuth" instead of
 *          "riputh".  Minimum is 1, default is 5.
 *
 * @param {{}} overrides A set of options and values with which to override
 *      the defaults.
 * @returns {{seed, phoneticSimplicity, compoundSimplicity}}
 *      An options object.
 */
function getOptions(overrides) {
    var options = {};
    overrides = overrides || {};
    options.length = overrides.length || 16;
    options.seed = overrides.seed || Math.random();
    options.phoneticSimplicity = overrides.phoneticSimplicity ? Math.max(overrides.phoneticSimplicity, 1) : 5;
    options.compoundSimplicity = overrides.compoundSimplicity ? Math.max(overrides.compoundSimplicity, 1) : 5;
    return options;
}

/**
 * Gets the next pseudo-random phonetic from a given phonetic set,
 * intelligently determining whether to include "complex" phonetics in that
 * set based on the options.phoneticSimplicity.
 *
 * @param {Array} phoneticSet The array of phonetics from which to choose
 * @param {number} simpleCap The number of 'simple' phonetics at the beginning
 *      of the phoneticSet
 * @param {{word, numeric, lastSkippedPre, lastSkippedPost, opts}} wordObj The
 *      wordObj for which the phonetic is being chosen
 * @param {boolean} [forceSimple] true to force a simple phonetic to be
 *      chosen; otherwise, the function will choose whether to include complex
 *      phonetics based on the derivative of wordObj.numeric.
 * @returns {string} The chosen phonetic.
 */
function getNextPhonetic(phoneticSet, simpleCap, wordObj, forceSimple) {
    var deriv = getDerivative(wordObj.numeric),
        simple = (wordObj.numeric + deriv) % wordObj.opts.phoneticSimplicity > 0,
        cap = simple || forceSimple ? simpleCap : phoneticSet.length,
        phonetic = phoneticSet[wordObj.numeric % cap];
    wordObj.numeric = getNumericHash(wordObj.numeric + wordObj.word);
    return phonetic;
}

/**
 * Generates a numeric hash based on the input data.  The hash is an md5, with
 * each block of 32 bits converted to an integer and added together.
 *
 * @param {string|number} data The string or number to be hashed.
 * @returns {number}
 */
function getNumericHash(data) {
    let numeric = 0;
    data += '-Phonetic';
    for (let i = 0, len = data.length; i < len; i++) {
        let chr = data.charCodeAt(i);
        numeric = ((numeric << 5) - numeric) + chr;
        numeric >>>= 0;
    }
    return numeric;
}

/**
 * Applies post-processing to a word after it has already been generated.  In
 * this phase, the REPLACEMENTS are executed, applying language intelligence
 * that can make generated words more pronounceable.  The first letter is
 * also capitalized.
 *
 * @param {{word, numeric, lastSkippedPre, lastSkippedPost, opts}} wordObj The
 *      word object to be processed.
 * @returns {string} The processed word.
 */
function postProcess(wordObj) {
    var regex;
    for (var i in REPLACEMENTS) {
        if (REPLACEMENTS.hasOwnProperty(i)) {
            regex = new RegExp(i);
            wordObj.word = wordObj.word.replace(regex, REPLACEMENTS[i]);
        }
    }
    return wordObj.word;
}

/**
 * Generates a new word based on the given options.  For available options,
 * see getOptions.
 *
 * @param {*} [options] A collection of options to control the word generator.
 * @returns {string} A generated word.
 */
module.exports.generate = function(options) {
    options = getOptions(options);
    var length = options.length,
        wordObj = {
            numeric: getNumericHash(options.seed),
            lastSkippedPost: false,
            word: '',
            opts: options
        };
    while (wordObj.word.length < length) {
        addSyllable(wordObj);
    }
    return postProcess(wordObj).substr(0, length);
};
generator presets 2016-02-15 22:06:11 +01:00			`'use strict';`

			`/*`
			`* Phonetic`
			`* Copyright 2013 Tom Frost`
			`*/`

			`// removed node.js deps, making it available to load in browser`

			`/**`
			`* Phonetics that sound best before a vowel.`
			`* @type {Array}`
			`*/`
			`var PHONETIC_PRE = [`
			`// Simple phonetics`
			`'b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p',`
			`'qu', 'r', 's', 't',`
			`// Complex phonetics`
			`'bl',`
			`'ch', 'cl', 'cr',`
			`'dr',`
			`'fl', 'fr',`
			`'gl', 'gr',`
			`'kl', 'kr',`
			`'ph', 'pr', 'pl',`
			`'sc', 'sh', 'sl', 'sn', 'sr', 'st', 'str', 'sw',`
			`'th', 'tr',`
			`'br',`
			`'v', 'w', 'y', 'z'`
			`];`

			`/**`
			`* The number of simple phonetics within the 'pre' set.`
			`* @type {number}`
			`*/`
			`var PHONETIC_PRE_SIMPLE_LENGTH = 16;`

			`/**`
			`* Vowel sound phonetics.`
			`* @type {Array}`
			`*/`
			`var PHONETIC_MID = [`
			`// Simple phonetics`
			`'a', 'e', 'i', 'o', 'u',`
			`// Complex phonetics`
			`'ee', 'ie', 'oo', 'ou', 'ue'`
			`];`

			`/**`
			`* The number of simple phonetics within the 'mid' set.`
			`* @type {number}`
			`*/`
			`var PHONETIC_MID_SIMPLE_LENGTH = 5;`

			`/**`
			`* Phonetics that sound best after a vowel.`
			`* @type {Array}`
			`*/`
			`var PHONETIC_POST = [`
			`// Simple phonetics`
			`'b', 'd', 'f', 'g', 'k', 'l', 'm', 'n', 'p', 'r', 's', 't', 'y',`
			`// Complex phonetics`
			`'ch', 'ck',`
			`'ln',`
			`'nk', 'ng',`
			`'rn',`
			`'sh', 'sk', 'st',`
			`'th',`
			`'x', 'z'`
			`];`

			`/**`
			`* The number of simple phonetics within the 'post' set.`
			`* @type {number}`
			`*/`
			`var PHONETIC_POST_SIMPLE_LENGTH = 13;`

			`/**`
			`* A mapping of regular expressions to replacements, which will be run on the`
			`* resulting word before it gets returned. The purpose of replacements is to`
			`* address language subtleties that the phonetic builder is incapable of`
			`* understanding, such as 've' more pronounceable than just 'v' at the end of`
			`* a word, 'ey' more pronounceable than 'iy', etc.`
			`* @type {{}}`
			`*/`
			`var REPLACEMENTS = {`
			`'quu': 'que',`
			`'qu([aeiou]){2}': 'qu$1',`
			`'[iu]y': 'ey',`
			`'eye': 'ye',`
			`'(.)ye$': '$1y',`
			`'(^\|e)cie(?!$)': '$1cei',`
			`'([vz])$': '$1e',`
			`'[iu]w': 'ow'`
			`};`

			`/**`
			`* Adds a single syllable to the word contained in the wordObj. A syllable`
			`* contains, at maximum, a phonetic from each the PRE, MID, and POST phonetic`
			`* sets. Some syllables will omit pre or post based on the`
			`* options.compoundSimplicity.`
			`*`
			`* @param {{word, numeric, lastSkippedPre, lastSkippedPost, opts}} wordObj The`
			`* word object on which to operate.`
			`*/`
			`function addSyllable(wordObj) {`
			`var deriv = getDerivative(wordObj.numeric),`
			`compound = deriv % wordObj.opts.compoundSimplicity === 0,`
			`first = wordObj.word === '',`
			`preOnFirst = deriv % 6 > 0;`
			`if ((first && preOnFirst) \|\| wordObj.lastSkippedPost \|\| compound) {`
			`wordObj.word += getNextPhonetic(PHONETIC_PRE,`
			`PHONETIC_PRE_SIMPLE_LENGTH, wordObj);`
			`wordObj.lastSkippedPre = false;`
			`} else {`
			`wordObj.lastSkippedPre = true;`
			`}`
			`wordObj.word += getNextPhonetic(PHONETIC_MID, PHONETIC_MID_SIMPLE_LENGTH,`
			`wordObj, first && wordObj.lastSkippedPre);`
			`if (wordObj.lastSkippedPre \|\| compound) {`
			`wordObj.word += getNextPhonetic(PHONETIC_POST,`
			`PHONETIC_POST_SIMPLE_LENGTH, wordObj);`
			`wordObj.lastSkippedPost = false;`
			`} else {`
			`wordObj.lastSkippedPost = true;`
			`}`
			`}`

			`/**`
			`* Gets a derivative of a number by repeatedly dividing it by 7 and adding the`
			`* remainders together. It's useful to base decisions on a derivative rather`
			`* than the wordObj's current numeric, as it avoids making the same decisions`
			`* around the same phonetics.`
			`*`
			`* @param {number} num A number from which a derivative should be calculated`
			`* @returns {number} The derivative.`
			`*/`
			`function getDerivative(num) {`
			`var derivative = 1;`
			`while (num) {`
			`derivative += num % 7;`
			`num = Math.floor(num / 7);`
			`}`
			`return derivative;`
			`}`

			`/**`
			`* Combines the option defaults with the provided overrides. Available`
			`* options are:`
			`* - seed: A string or number with which to seed the generator. Using the`
			`* same seed (with the same other options) will coerce the generator`
			`* into producing the same word. Default is random.`
			`* - phoneticSimplicity: The greater this number, the simpler the phonetics.`
			`* For example, 1 might produce 'str' while 5 might produce 's' for`
			`* the same syllable. Minimum is 1, default is 5.`
			`* - compoundSimplicity: The greater this number, the less likely the`
			`* resulting word will sound "compound", such as "ripkuth" instead of`
			`* "riputh". Minimum is 1, default is 5.`
			`*`
			`* @param {{}} overrides A set of options and values with which to override`
			`* the defaults.`
			`* @returns {{seed, phoneticSimplicity, compoundSimplicity}}`
			`* An options object.`
			`*/`
			`function getOptions(overrides) {`
			`var options = {};`
			`overrides = overrides \|\| {};`
			`options.length = overrides.length \|\| 16;`
			`options.seed = overrides.seed \|\| Math.random();`
eslint 2016-07-17 13:30:38 +02:00			`options.phoneticSimplicity = overrides.phoneticSimplicity ? Math.max(overrides.phoneticSimplicity, 1) : 5;`
			`options.compoundSimplicity = overrides.compoundSimplicity ? Math.max(overrides.compoundSimplicity, 1) : 5;`
generator presets 2016-02-15 22:06:11 +01:00			`return options;`
			`}`

			`/**`
			`* Gets the next pseudo-random phonetic from a given phonetic set,`
			`* intelligently determining whether to include "complex" phonetics in that`
			`* set based on the options.phoneticSimplicity.`
			`*`
			`* @param {Array} phoneticSet The array of phonetics from which to choose`
			`* @param {number} simpleCap The number of 'simple' phonetics at the beginning`
			`* of the phoneticSet`
			`* @param {{word, numeric, lastSkippedPre, lastSkippedPost, opts}} wordObj The`
			`* wordObj for which the phonetic is being chosen`
			`* @param {boolean} [forceSimple] true to force a simple phonetic to be`
			`* chosen; otherwise, the function will choose whether to include complex`
			`* phonetics based on the derivative of wordObj.numeric.`
			`* @returns {string} The chosen phonetic.`
			`*/`
			`function getNextPhonetic(phoneticSet, simpleCap, wordObj, forceSimple) {`
			`var deriv = getDerivative(wordObj.numeric),`
			`simple = (wordObj.numeric + deriv) % wordObj.opts.phoneticSimplicity > 0,`
			`cap = simple \|\| forceSimple ? simpleCap : phoneticSet.length,`
			`phonetic = phoneticSet[wordObj.numeric % cap];`
			`wordObj.numeric = getNumericHash(wordObj.numeric + wordObj.word);`
			`return phonetic;`
			`}`

			`/**`
			`* Generates a numeric hash based on the input data. The hash is an md5, with`
			`* each block of 32 bits converted to an integer and added together.`
			`*`
			`* @param {string\|number} data The string or number to be hashed.`
			`* @returns {number}`
			`*/`
			`function getNumericHash(data) {`
fix #458: improved phonetic.js password generator 2016-12-04 06:43:57 +01:00			`let numeric = 0;`
generator presets 2016-02-15 22:06:11 +01:00			`data += '-Phonetic';`
fix #458: improved phonetic.js password generator 2016-12-04 06:43:57 +01:00			`for (let i = 0, len = data.length; i < len; i++) {`
			`let chr = data.charCodeAt(i);`
			`numeric = ((numeric << 5) - numeric) + chr;`
			`numeric >>>= 0;`
generator presets 2016-02-15 22:06:11 +01:00			`}`
			`return numeric;`
			`}`

			`/**`
			`* Applies post-processing to a word after it has already been generated. In`
			`* this phase, the REPLACEMENTS are executed, applying language intelligence`
			`* that can make generated words more pronounceable. The first letter is`
			`* also capitalized.`
			`*`
			`* @param {{word, numeric, lastSkippedPre, lastSkippedPost, opts}} wordObj The`
			`* word object to be processed.`
			`* @returns {string} The processed word.`
			`*/`
			`function postProcess(wordObj) {`
			`var regex;`
			`for (var i in REPLACEMENTS) {`
			`if (REPLACEMENTS.hasOwnProperty(i)) {`
			`regex = new RegExp(i);`
			`wordObj.word = wordObj.word.replace(regex, REPLACEMENTS[i]);`
			`}`
			`}`
			`return wordObj.word;`
			`}`

			`/**`
			`* Generates a new word based on the given options. For available options,`
			`* see getOptions.`
			`*`
			`* @param {*} [options] A collection of options to control the word generator.`
			`* @returns {string} A generated word.`
			`*/`
			`module.exports.generate = function(options) {`
			`options = getOptions(options);`
			`var length = options.length,`
			`wordObj = {`
			`numeric: getNumericHash(options.seed),`
			`lastSkippedPost: false,`
			`word: '',`
			`opts: options`
			`};`
			`while (wordObj.word.length < length) {`
			`addSyllable(wordObj);`
			`}`
			`return postProcess(wordObj).substr(0, length);`
			`};`