cletus/node_modules/date.js/lib/util.js
2025-05-09 15:53:19 -05:00

593 lines
14 KiB
JavaScript

/**
* Module Dependencies
*/
var _ = require('./subdash')
var maps = require('./maps.json')
/**
* The T string regex, e.g. "t:=9h,dt:12h", to encode T = <t> <dt>. Is case sensitive.
*/
var reT = /t\:\S*,dt\:\S*/g
/**
* The ordering of time units, large to small,
* 'mer' is the meridiem, 0 for am, 1 for pm
* and the units used for carrying
*/
var timeUnitOrder = ['y', 'M', 'w', 'd', 'h', 'm', 's', 'ms']
var canonTimeUnitOrder = []
for (var i = 0; i < timeUnitOrder.length; i++) {
var unit = timeUnitOrder[i]
canonTimeUnitOrder.push(lemma(unit).canon)
}
var tOrdering = ['y', 'M', 'd', 'h', 'm', 's']
var tFactor = [365, 30, 24, 60, 60]
/**
* Delimiters for stdT string
*/
var stdTdelim = ['-', '-', ' ', ':', ':', '']
/**
* Export `util`
*/
module.exports = {
TtoStdT: TtoStdT,
TtoStr: TtoStr,
delimSyms: delimSyms,
hasSym: hasSym,
has_dt: has_dt,
has_pureTimeUnit: has_pureTimeUnit,
has_t: has_t,
highestOverride: highestOverride,
isSym: isSym,
largestUnit: largestUnit,
lemma: lemma,
nextLargestUnit: nextLargestUnit,
nowT: nowT,
opType: opType,
orderChunks: orderChunks,
removeTnPlus: removeTnPlus,
reT: reT,
sName: sName,
splitByArr: splitByArr,
splitSyms: splitSyms,
splitT: splitT,
stdT: stdT,
stdTdelim: stdTdelim,
stdTtoT: stdTtoT,
tOrdering: tOrdering,
timeUnitOrder: timeUnitOrder,
canonTimeUnitOrder: canonTimeUnitOrder,
tokenToStr: tokenToStr,
unparsedStr: unparsedStr,
}
/**
* Convert a T string to stdT string, with default filled by nowT().
* @example
* TtoStdT('t:10M05d14h48m00.000s,dt:')
* // => 2016-10-05 14:48:00
*/
function TtoStdT (str, offset) {
if (typeof str != 'string') {
str = TtoStr(str)
}
var nowStr = nowT(offset),
nowArr = splitT(nowStr),
strArr = splitT(str)
var resArr = []
for (var i = 0; i < nowArr.length; i++) {
var val = parseFloat(strArr[i])
if (Number.isNaN(val)) { val = parseFloat(nowArr[i]) }
resArr.push(val)
}
var resStr = ''
for (var i = 0; i < stdTdelim.length; i++) {
var num = resArr[i].toString()
// e.g. '5.123' tends to be '05.123', fix it
var predecimal = /(\d+)(\.\d+)?/.exec(num)[1],
postdecimal = /(\d+)\.?(\d+)?/.exec(num)[2]
if (predecimal.length == 1) { num = '0' + num }
if (postdecimal != null) {
for (var j = 0; j < 3 - postdecimal.length; j++) {
num = num + '0'
}
}
resStr += (num + stdTdelim[i])
}
// console.log('resStr', resStr)
return resStr
}
// console.log(TtoStdT('t:10M05d14h48m00.010s,dt:'))
/**
* Convert a T symbol into its T string.
*/
function TtoStr (T) {
var tStr = 't:',
dtStr = ',dt:'
for (var i = 0; i < timeUnitOrder.length; i++) {
var tUnit = timeUnitOrder[i]
// if unit exist, write to str
if (T['t'][tUnit] != undefined) {
tStr += T['t'][tUnit] + tUnit
}
if (T['dt'][tUnit] != undefined) {
dtStr += T['dt'][tUnit] + tUnit
}
}
return tStr + dtStr
}
/**
* Delimit the array of timeChunk symbols by combining consecutive nulls (>3) into one, and dumping those shorter. Result is then delimited by 'trinull'.
* @param {Array} syms Of parsed symbols aka time chunks.
* @return {Array} symbols delimited by 'trinull'
*/
function delimSyms (syms) {
// 1.
// contract the nulls into trinulls in a single array
var newSyms = [],
count = 0
for (var i = 0; i < syms.length; i++) {
var s = syms[i]
if (s == null) {
count++
} else {
if (count > 2) {
newSyms.push('trinull')
}
newSyms.push(s)
count = 0
}
}
return newSyms
}
// console.log(delimSyms([1, 2, null, null, null, 3]))
/**
* Check if arr has symbol whose name is listen in symArr.
* @param {Array} arr Array of symbols.
* @param {Array} symArr Array of symbol names.
* @return {Boolean}
*/
function hasSym (syms, symArr) {
var found = false
for (var i = 0; i < syms.length; i++) {
if (isSym(syms[i], symArr)) {
found = true
break
}
}
return found
}
/**
* Check if T.dt is not empty
*/
function has_dt (T) {
return _.keys(T.dt).length > 0
}
/**
* Check if T has only t, dt with units from timeUnitOrder
*/
function has_pureTimeUnit (T) {
var dt = T.dt,
t = T.t
var pure = true
for (var k in dt) {
if (!_.includes(timeUnitOrder, k)) {
pure = false
break
}
}
for (var k in t) {
if (!_.includes(timeUnitOrder, k)) {
pure = false
break
}
}
return pure
}
/**
* Check if T.t is not empty
*/
function has_t (T) {
return _.keys(T.t).length > 0
}
/**
* find the lowest overridable unit in t or dt
*/
function highestOverride (t) {
var lowestOverable = null
for (var i = 0; i < tOrdering.length; i++) {
var unit = tOrdering[i]
if (/^=/.exec(t[unit])) {
lowestOverable = unit
break
}
}
return lowestOverable
}
/**
* Check if arr has the symbol name of s.
* @param {symbol} s symbol object
* @param {Array} arr Of string symbol names
* @return {Boolean}
*/
function isSym (s, arr) {
return _.includes(arr, sName(s))
}
/**
* Find the largest enumerated unit in T.t, or if none, in T.dt
*/
function largestUnit (T) {
var lu = _.find(tOrdering, function (unit) {
return T.t[unit]
})
if (lu == null) {
lu = _.find(tOrdering, function (unit) {
return T.dt[unit]
})
}
return lu
}
/**
* Return the lemma symbol of a word string, i.e. the name and value of the symbol it belongs to in the CFG. Uses ./maps.json.
* NLP Lemmatization refers here: htp://nlp.stanford.edu/Ir-book/html/htmledition/stemming-and-lemmatization-1.html. Inflections = all possible alternative words of a lemma.
* @param {string} str To lemmatize.
* @return {JSON} Lemma symbol {name, value} for CFG
* @example
* lemma('zero')
* // => { value: '0', name: 'n' }
*/
function lemma (str) {
// change all to lower case except for 'M' for month
str = (str == 'M') ? str : str.toLowerCase()
var lem = {},
name = null,
value = null,
canon = str
var mapsKeys = _.keys(maps)
for (var i = 0; i < mapsKeys.length; i++) {
var sMap = maps[mapsKeys[i]],
sMapKeys = _.keys(sMap)
for (var j = 0; j < sMapKeys.length; j++) {
var inflectionArr = sMap[sMapKeys[j]]
if (_.includes(inflectionArr, str)) {
// set the canonical form as the first in inflectionArr
canon = inflectionArr[0]
// if str is in inflections
value = sMapKeys[j]
break
}
}
if (value != null) {
name = mapsKeys[i]
break
}
}
// set value
lem['name'] = name
lem['value'] = value
lem['canon'] = canon
return lem
}
// console.log(lemma('zero'))
/**
* Find the next largest enumerated unit in T.t, or if none, in T.dt
*/
function nextLargestUnit (T) {
var lu = largestUnit(T)
return tOrdering[tOrdering.indexOf(lu) - 1]
}
/**
* Convenient method to get current time in T format.
* @return {string} T format string.
*/
function nowT (offset) {
var dateStr = (offset == undefined) ? stdT(new Date()) : stdT(offset)
return stdTtoT(dateStr)
}
/**
* Determine the op type based on arguments
*/
function opType (L, op, R) {
var LsName = sName(L) || '',
RsName = sName(R) || ''
var opsName = sName(op)
if (opsName != 'o' && opsName != 'r' && opsName != 'c') { opsName = '' }
return LsName + opsName + RsName
}
/**
* Order time chunks by not containing T, short to long, then containing T, short to long. Used for .pop() to get the candidate timechunk for parsing.
*/
function orderChunks (matrix) {
// 2.
// ok partition first then sort
var hasNoT = matrix.filter(function (row) {
return !hasSym(row, ['T'])
})
var hasT = matrix.filter(function (row) {
return hasSym(row, ['T'])
})
// matrix, sorted short to long
var lengthSortedNotTMat = hasNoT.sort(function (a, b) {
return a.length - b.length
})
var lengthSortedTMat = hasT.sort(function (a, b) {
return a.length - b.length
})
// 3.1 3.2 3.3
return lengthSortedNotTMat.concat(lengthSortedTMat)
}
/**
* !remove the defaul <o|op> that is 'plus' between <T>, <n> for defaulting to plus.
* !is a quickfix for mat
*/
function removeTnPlus (syms) {
for (var i = 0; i < syms.length; i++) {
var s = syms[i]
if (isSym(s, ['op']) && s.value == 'plus' && isSym(syms[i + 1], ['n'])) {
syms.splice(i, 1)
}
}
return syms
}
/**
* Return the name of a symbol: {op,c,r,n,T,f}
* @param {Symbol} symbol A CFG symbol.
* @return {string} name of the symbol.
*/
function sName (symbol) {
return symbol ? symbol.constructor.name : null
}
/**
* Split a string by an array of tokens.
* @param {string} str The input string.
* @param {Array} tokenArr Array of tokens to split the string by.
* @return {Array} The split string array.
*/
function splitByArr (str, tokenArr) {
var delim = '#{REPLACE}'
// inject into tokens
for (var i = 0; i < tokenArr.length; i++) {
var token = tokenArr[i]
str = str.replace(token, delim)
}
// split into arr
return str.split(delim)
}
// console.log(splitByArr('lorem 1 ipsum 2 dolor 3', [1,2,3]))
/**
* Split an array of symbols by delimiter into matrix.
* @param {Array} syms The input array
* @param {string|symbol} delimiter To split the array by
* @return {matrix} delimited arrays.
*/
function splitSyms (syms, delimiter) {
// split the single array into matrix
var matrix = [],
newRow = []
for (var i = 0; i < syms.length; i++) {
var s = syms[i]
if (s == delimiter || sName(s) == delimiter) {
// delimit and push to matrix
matrix.push(newRow)
newRow = []
} else if (i == syms.length - 1) {
// edge case, push res
newRow.push(s)
matrix.push(newRow)
} else {
// accumulate in row
newRow.push(s)
}
}
return matrix
}
/**
* Split a T string into array of [_y, _M, _d, _h, _m, _s]
*/
function splitT (str) {
if (!str.match(reT)) {
return null
}
var _y = (/(\d+(\.\d+)?)y/.exec(str) || [])[1],
_M = (/(\d+(\.\d+)?)M/.exec(str) || [])[1],
_w = (/(\d+(\.\d+)?)w/.exec(str) || [])[1],
_d = (/(\d+(\.\d+)?)d/.exec(str) || [])[1],
_h = (/(\d+(\.\d+)?)h/.exec(str) || [])[1],
_m = (/(\d+(\.\d+)?)m/.exec(str) || [])[1],
_s = (/(\d+(\.\d+)?)s/.exec(str) || [])[1]
// The Time Object
var TO = {
y: _y,
M: _M,
w: _w,
d: _d,
h: _h,
m: _m,
s: _s
}
// do the carries
TO = carry(TO)
// compose results
var res = []
for (var i = 0; i < tOrdering.length; i++) {
var k = tOrdering[i]
res.push(TO[k])
}
return res
}
/**
* Function to properly down- and up- carry Time Object
* 1. dumpweek, 2. carryDown, 3. carryUp
*/
function carry (TO) {
TO = dumpWeek(TO)
TO = carryDown(TO)
TO = carryUp(TO)
return TO
}
/**
* 1. dumpWeek
*/
function dumpWeek (TO) {
var _w = parseFloat(TO['w'] || '0'),
_d = parseFloat(TO['d'] || '0')
TO['d'] = _d + (_w * 7)
delete TO['w']
return TO
}
/**
* 2. carryDown
*/
function carryDown (TO) {
// shall reverse the ordering and factors for opp direction
var ordering = tOrdering,
factor = tFactor
var carry = 0
for (var i = 0; i < ordering.length; i++) {
// the time unit in the ordering
var u = ordering[i]
// skip the rest of loopbody if this unit is undefined and nothing to carry
if (TO[u] == undefined && carry == 0) {
continue
}
// carry
TO[u] = parseFloat(TO[u] || '0') + carry
// dont go in after the last one
if (i == ordering.length - 1) {
// overlong s decimal will be fixed in TtoStdT
break
}
var decimal = parseFloat(TO[u] || '0') - parseInt(TO[u] || '0')
if (decimal > 0) {
// set next carry
carry = decimal * factor[i]
// update current u
TO[u] = parseInt(TO[u])
} else {
// else reset to 0 if no carry
carry = 0
}
}
return TO
}
/**
* 3. carryUp
*/
function carryUp (TO) {
// shall reverse the ordering and factors for opp direction
var ordering = tOrdering.slice().reverse(),
factor = tFactor.slice().reverse()
var carry = 0
for (var i = 0; i < ordering.length; i++) {
// the time unit in the ordering
var u = ordering[i]
// skip the rest of loopbody if this unit is undefined and nothing to carry
if (TO[u] == undefined && carry == 0) {
continue
}
// carry
TO[u] = parseFloat(TO[u] || '0') + carry
// dont go in after the last one
if (i == ordering.length - 1) {
break
}
var deci = parseInt(parseFloat(TO[u] || '0') / factor[i])
if (deci > 0) {
// set next carry
carry = deci
// update current u
TO[u] = parseFloat(TO[u] || '0') % factor[i]
} else {
// else reset to 0 if no carry
carry = 0
}
}
return TO
}
/**
* Take a date or string, parse it into standard format as yyyy-MM-dd hh:mm:ss.sss
*/
function stdT (date) {
if (typeof date == 'string') {
date = new Date(date)
}
var _y = date.getFullYear(),
_M = date.getMonth() + 1,
_d = date.getDate(),
_date = [_y, _M, _d].join('-')
_time = /(\d\S+)/.exec(date.toTimeString())[1],
format = _date + ' ' + _time
return format
}
/**
* Convert std time string to T string.
* @example
* stdTtoT('2011-10-05T14:48:00.000')
* // => 't:2011y10M05d14h48m00.000s,dt:'
*/
function stdTtoT (str) {
var datetime = str.split(' ')
var date = datetime[0].split('-'),
time = datetime[1].split(':')
return 't:' + date[0] + 'y' + date[1] + 'M' + date[2] + 'd' + time[0] + 'h' + time[1] + 'm' + time[2] + 's,dt:'
}
// console.log(stdTtoT('2011-10-05T14:48:00.000Z'))
/**
* Recombine array of symbols back into str
*/
function tokenToStr (syms) {
var tokens = []
for (var i = 0; i < syms.length; i++) {
tokens.push(syms[i].token)
}
return tokens.join(' ')
}
/**
* Extract unparsedTokens from str and parsed syms then join them
*/
function unparsedStr (str, syms) {
var inputTokens = str.split(/\s+/)
var tokens = []
for (var i = 0; i < syms.length; i++) {
if (syms[i] == null) {
tokens.push(inputTokens[i])
}
}
return tokens.join(' ')
}