Last active
August 29, 2015 14:15
-
-
Save keithhamilton/d47435e571b28c836083 to your computer and use it in GitHub Desktop.
Deriving Real Integers From Speech
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Fun Fact: when working in the realm of real, integer numbers,the English | |
* language follows a curious pattern: | |
* | |
* For each word in a scalar as spoken, such as 'three hundred thousand | |
* two hundred forty five', a real value can be derived by considering each | |
* number successively and applying the following formula: | |
* | |
* phrase.split(' ') => array with length l | |
* for i in phrase: | |
* if the integer value of phrase[i] > integer value of phrase[i+1] => | |
* reduce((a, b) => a * b) | |
* | |
* if the integer value of phrase[i] < integer value of phrase[i+1] => | |
* reduce((a, b) => a + b) | |
* | |
* Considering this, it's trivial to convert something like | |
* 'two hundred thousand three hundred forty five' into an integer | |
* (2 * 100 * 1000) + (3 * 100) + 40 + 5 == 200345 | |
* | |
* An edge case is found when irregular forms of speech occur, such as | |
* 'one million million'. This can be accounted for by reducing | |
* as a product when the first term is greater or equal to the second | |
* (1 * 1000000 * 1000000) | |
* | |
* To deal with hyphenations, it is important to first split on a space | |
* then reduce each term to an integer before reducing the terms as a whole | |
* | |
* "twenty-six hundred" => | |
* ["twenty-six", "hundred"] => | |
* [["twenty", "six"], ["hundred"]] => | |
* [26, 100] => | |
* 2600 | |
* | |
* As it turns out, there are only so many discreet words that are used | |
* when discussing real integers, and for the purposes of my implementation, | |
* I've chosen to stop counting at quadrillion, since that is what I'm | |
* considering to be near or at the upper-limit of common speech in English. | |
* Obviously more terms could be added if this were to be applied in | |
* case-specific situations. | |
* | |
* Below is an ES 6 implementation I'm using for a project. | |
*/ | |
const _ = require('lodash') | |
function resolveScalar(rawText){ | |
let text = rawText.split(' ') | |
let numberWords = { | |
zero: 0, one: 1, two: 2, three: 3, four: 4, five: 5, six: 6, seven: 7, eight: 8, | |
nine: 9, ten: 10, eleven: 11, twelve: 12, thirteen: 13, fourteen: 14, | |
fifteen: 15, sixteen: 16, seventeen: 17, eighteen: 18, nineteen: 19, | |
twenty: 20, thirty: 30, forty: 40, fifty: 50, sixty: 60, seventy: 70, | |
eighty: 80, ninety: 90, hundred: 100, thousand: 1000, million: 1000000, | |
billion: 1000000000, trillion: 1000000000000, quadrillion: 1000000000000000 | |
} | |
let reduceScalars = (m, n) => { | |
if(m.length === 0) | |
return n | |
if(n.length === 0) | |
return reduceScalars(_.rest(m), [_.first(m)]) | |
if(_.first(m) > _.last(n)){ | |
return reduceScalars(m.slice(1), | |
_.union(_.initial(n), [_.first(m) * _.last(n)])) | |
} | |
return reduceScalars(m.slice(1), _.union(n, [m[0]])) | |
} | |
let m = _.flatten(text.split(' ').map(w => { | |
let breakDown = w.replace(/[_\.-]/g, ' ').split(' ') | |
.map(m => numberWords[m]) | |
.filter(m => m !== undefined) | |
if(breakDown.length) | |
return reduceScalars(breakDown, []).reduce((a, b) => a + b) | |
return []})) | |
if(!m.length) return null | |
return reduceScalars(m, []) | |
.reduce((a, b) => a + b) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment