keeweb/app/scripts/util/phonetic.js

311 lines
8.2 KiB
JavaScript
Raw Normal View History

2016-02-15 22:06:11 +01:00
/*
* Phonetic
* Copyright 2013 Tom Frost
*/
// removed node.js deps, making it available to load in browser
/**
* Phonetics that sound best before a vowel.
* @type {Array}
*/
2017-01-31 07:50:28 +01:00
const PHONETIC_PRE = [
2016-02-15 22:06:11 +01:00
// Simple phonetics
2019-08-16 23:05:39 +02:00
'b',
'c',
'd',
'f',
'g',
'h',
'j',
'k',
'l',
'm',
'n',
'p',
'qu',
'r',
's',
't',
2016-02-15 22:06:11 +01:00
// Complex phonetics
'bl',
2019-08-16 23:05:39 +02:00
'ch',
'cl',
'cr',
2016-02-15 22:06:11 +01:00
'dr',
2019-08-16 23:05:39 +02:00
'fl',
'fr',
'gl',
'gr',
'kl',
'kr',
'ph',
'pr',
'pl',
'sc',
'sh',
'sl',
'sn',
'sr',
'st',
'str',
'sw',
'th',
'tr',
2016-02-15 22:06:11 +01:00
'br',
2019-08-16 23:05:39 +02:00
'v',
'w',
'y',
'z'
2016-02-15 22:06:11 +01:00
];
/**
* The number of simple phonetics within the 'pre' set.
* @type {number}
*/
2017-01-31 07:50:28 +01:00
const PHONETIC_PRE_SIMPLE_LENGTH = 16;
2016-02-15 22:06:11 +01:00
/**
* Vowel sound phonetics.
* @type {Array}
*/
2017-01-31 07:50:28 +01:00
const PHONETIC_MID = [
2016-02-15 22:06:11 +01:00
// Simple phonetics
2019-08-16 23:05:39 +02:00
'a',
'e',
'i',
'o',
'u',
2016-02-15 22:06:11 +01:00
// Complex phonetics
2019-08-16 23:05:39 +02:00
'ee',
'ie',
'oo',
'ou',
'ue'
2016-02-15 22:06:11 +01:00
];
/**
* The number of simple phonetics within the 'mid' set.
* @type {number}
*/
2017-01-31 07:50:28 +01:00
const PHONETIC_MID_SIMPLE_LENGTH = 5;
2016-02-15 22:06:11 +01:00
/**
* Phonetics that sound best after a vowel.
* @type {Array}
*/
2017-01-31 07:50:28 +01:00
const PHONETIC_POST = [
2016-02-15 22:06:11 +01:00
// Simple phonetics
2019-08-16 23:05:39 +02:00
'b',
'd',
'f',
'g',
'k',
'l',
'm',
'n',
'p',
'r',
's',
't',
'y',
2016-02-15 22:06:11 +01:00
// Complex phonetics
2019-08-16 23:05:39 +02:00
'ch',
'ck',
2016-02-15 22:06:11 +01:00
'ln',
2019-08-16 23:05:39 +02:00
'nk',
'ng',
2016-02-15 22:06:11 +01:00
'rn',
2019-08-16 23:05:39 +02:00
'sh',
'sk',
'st',
2016-02-15 22:06:11 +01:00
'th',
2019-08-16 23:05:39 +02:00
'x',
'z'
2016-02-15 22:06:11 +01:00
];
/**
* The number of simple phonetics within the 'post' set.
* @type {number}
*/
2017-01-31 07:50:28 +01:00
const PHONETIC_POST_SIMPLE_LENGTH = 13;
2016-02-15 22:06:11 +01:00
/**
* A mapping of regular expressions to replacements, which will be run on the
* resulting word before it gets returned. The purpose of replacements is to
* address language subtleties that the phonetic builder is incapable of
* understanding, such as 've' more pronounceable than just 'v' at the end of
* a word, 'ey' more pronounceable than 'iy', etc.
* @type {{}}
*/
2017-01-31 07:50:28 +01:00
const REPLACEMENTS = {
2016-02-15 22:06:11 +01:00
'quu': 'que',
'qu([aeiou]){2}': 'qu$1',
'[iu]y': 'ey',
'eye': 'ye',
'(.)ye$': '$1y',
'(^|e)cie(?!$)': '$1cei',
'([vz])$': '$1e',
'[iu]w': 'ow'
};
/**
* Adds a single syllable to the word contained in the wordObj. A syllable
* contains, at maximum, a phonetic from each the PRE, MID, and POST phonetic
* sets. Some syllables will omit pre or post based on the
* options.compoundSimplicity.
*
* @param {{word, numeric, lastSkippedPre, lastSkippedPost, opts}} wordObj The
* word object on which to operate.
*/
function addSyllable(wordObj) {
2017-01-31 07:50:28 +01:00
const deriv = getDerivative(wordObj.numeric);
const compound = deriv % wordObj.opts.compoundSimplicity === 0;
const first = wordObj.word === '';
const preOnFirst = deriv % 6 > 0;
2016-02-15 22:06:11 +01:00
if ((first && preOnFirst) || wordObj.lastSkippedPost || compound) {
2019-08-16 23:05:39 +02:00
wordObj.word += getNextPhonetic(PHONETIC_PRE, PHONETIC_PRE_SIMPLE_LENGTH, wordObj);
2016-02-15 22:06:11 +01:00
wordObj.lastSkippedPre = false;
} else {
wordObj.lastSkippedPre = true;
}
2019-08-16 23:05:39 +02:00
wordObj.word += getNextPhonetic(PHONETIC_MID, PHONETIC_MID_SIMPLE_LENGTH, wordObj, first && wordObj.lastSkippedPre);
2016-02-15 22:06:11 +01:00
if (wordObj.lastSkippedPre || compound) {
2019-08-16 23:05:39 +02:00
wordObj.word += getNextPhonetic(PHONETIC_POST, PHONETIC_POST_SIMPLE_LENGTH, wordObj);
2016-02-15 22:06:11 +01:00
wordObj.lastSkippedPost = false;
} else {
wordObj.lastSkippedPost = true;
}
}
/**
* Gets a derivative of a number by repeatedly dividing it by 7 and adding the
* remainders together. It's useful to base decisions on a derivative rather
* than the wordObj's current numeric, as it avoids making the same decisions
* around the same phonetics.
*
* @param {number} num A number from which a derivative should be calculated
* @returns {number} The derivative.
*/
function getDerivative(num) {
2017-01-31 07:50:28 +01:00
let derivative = 1;
2016-02-15 22:06:11 +01:00
while (num) {
derivative += num % 7;
num = Math.floor(num / 7);
}
return derivative;
}
/**
* Combines the option defaults with the provided overrides. Available
* options are:
* - seed: A string or number with which to seed the generator. Using the
* same seed (with the same other options) will coerce the generator
* into producing the same word. Default is random.
* - phoneticSimplicity: The greater this number, the simpler the phonetics.
* For example, 1 might produce 'str' while 5 might produce 's' for
* the same syllable. Minimum is 1, default is 5.
* - compoundSimplicity: The greater this number, the less likely the
* resulting word will sound "compound", such as "ripkuth" instead of
* "riputh". Minimum is 1, default is 5.
*
* @param {{}} overrides A set of options and values with which to override
* the defaults.
* @returns {{seed, phoneticSimplicity, compoundSimplicity}}
* An options object.
*/
function getOptions(overrides) {
2017-01-31 07:50:28 +01:00
const options = {};
2016-02-15 22:06:11 +01:00
overrides = overrides || {};
options.length = overrides.length || 16;
options.seed = overrides.seed || Math.random();
2016-07-17 13:30:38 +02:00
options.phoneticSimplicity = overrides.phoneticSimplicity ? Math.max(overrides.phoneticSimplicity, 1) : 5;
options.compoundSimplicity = overrides.compoundSimplicity ? Math.max(overrides.compoundSimplicity, 1) : 5;
2016-02-15 22:06:11 +01:00
return options;
}
/**
* Gets the next pseudo-random phonetic from a given phonetic set,
* intelligently determining whether to include "complex" phonetics in that
* set based on the options.phoneticSimplicity.
*
* @param {Array} phoneticSet The array of phonetics from which to choose
* @param {number} simpleCap The number of 'simple' phonetics at the beginning
* of the phoneticSet
* @param {{word, numeric, lastSkippedPre, lastSkippedPost, opts}} wordObj The
* wordObj for which the phonetic is being chosen
* @param {boolean} [forceSimple] true to force a simple phonetic to be
* chosen; otherwise, the function will choose whether to include complex
* phonetics based on the derivative of wordObj.numeric.
* @returns {string} The chosen phonetic.
*/
function getNextPhonetic(phoneticSet, simpleCap, wordObj, forceSimple) {
2017-01-31 07:50:28 +01:00
const deriv = getDerivative(wordObj.numeric);
const simple = (wordObj.numeric + deriv) % wordObj.opts.phoneticSimplicity > 0;
const cap = simple || forceSimple ? simpleCap : phoneticSet.length;
const phonetic = phoneticSet[wordObj.numeric % cap];
2016-02-15 22:06:11 +01:00
wordObj.numeric = getNumericHash(wordObj.numeric + wordObj.word);
return phonetic;
}
/**
* Generates a numeric hash based on the input data. The hash is an md5, with
* each block of 32 bits converted to an integer and added together.
*
* @param {string|number} data The string or number to be hashed.
* @returns {number}
*/
function getNumericHash(data) {
let numeric = 0;
2016-02-15 22:06:11 +01:00
data += '-Phonetic';
for (let i = 0, len = data.length; i < len; i++) {
2017-01-31 07:50:28 +01:00
const chr = data.charCodeAt(i);
2019-08-16 23:05:39 +02:00
numeric = (numeric << 5) - numeric + chr;
numeric >>>= 0;
2016-02-15 22:06:11 +01:00
}
return numeric;
}
/**
* Applies post-processing to a word after it has already been generated. In
* this phase, the REPLACEMENTS are executed, applying language intelligence
* that can make generated words more pronounceable. The first letter is
* also capitalized.
*
* @param {{word, numeric, lastSkippedPre, lastSkippedPost, opts}} wordObj The
* word object to be processed.
* @returns {string} The processed word.
*/
function postProcess(wordObj) {
2017-01-31 07:50:28 +01:00
let regex;
for (const i in REPLACEMENTS) {
2019-08-16 21:36:22 +02:00
if (Object.prototype.hasOwnProperty.call(REPLACEMENTS, i)) {
2016-02-15 22:06:11 +01:00
regex = new RegExp(i);
wordObj.word = wordObj.word.replace(regex, REPLACEMENTS[i]);
}
}
return wordObj.word;
}
/**
* Generates a new word based on the given options. For available options,
* see getOptions.
*
* @param {*} [options] A collection of options to control the word generator.
* @returns {string} A generated word.
*/
module.exports.generate = function(options) {
options = getOptions(options);
2017-01-31 07:50:28 +01:00
const length = options.length;
const wordObj = {
numeric: getNumericHash(options.seed),
lastSkippedPost: false,
word: '',
opts: options
};
2016-02-15 22:06:11 +01:00
while (wordObj.word.length < length) {
addSyllable(wordObj);
}
return postProcess(wordObj).substr(0, length);
};