mirror of
https://github.com/IT4Change/Ocelot-Social.git
synced 2025-12-13 07:45:56 +00:00
refactored queryString, specs for queryString
This commit is contained in:
parent
46fca229ec
commit
b2ea4df294
@ -1,5 +1,5 @@
|
||||
import log from './helpers/databaseLogger'
|
||||
import queryString from './searches/queryString'
|
||||
import { queryString } from './searches/queryString'
|
||||
|
||||
// see http://lucene.apache.org/core/8_3_1/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#package.description
|
||||
|
||||
|
||||
@ -1,41 +1,50 @@
|
||||
export default function queryString(str) {
|
||||
// match the whole text exactly
|
||||
export function queryString(str) {
|
||||
const normalizedString = normalizeWhitespace(str)
|
||||
const escapedString = escapeSpecialCharacters(normalizedString)
|
||||
let result = quoteString(escapedString) + '^8'
|
||||
// match each word exactly
|
||||
if (escapedString.includes(' ')) {
|
||||
result += ' OR ('
|
||||
escapedString.split(' ').forEach((s, i) => {
|
||||
result += i === 0 ? quoteString(s) : ' AND ' + quoteString(s)
|
||||
})
|
||||
result += ')^4'
|
||||
}
|
||||
// match at least one word exactly
|
||||
if (escapedString.includes(' ')) {
|
||||
escapedString.split(' ').forEach(s => {
|
||||
result += ' OR ' + quoteString(s) + '^2'
|
||||
})
|
||||
}
|
||||
// start globbing ...
|
||||
escapedString.split(' ').forEach(s => {
|
||||
if (s.length > 3) {
|
||||
// at least 4 letters. So AND, OR and NOT are never used unquoted
|
||||
result += ' OR ' + s + '*'
|
||||
const escapedString = escapeSpecialCharacters(normalizedString)
|
||||
return `
|
||||
${matchWholeText(escapedString)}
|
||||
${matchEachWordExactly(escapedString)}
|
||||
${matchSomeWordsExactly(escapedString)}
|
||||
${matchBeginningOfWords(escapedString)}
|
||||
`
|
||||
}
|
||||
|
||||
const matchWholeText = (str, boost = 8) => {
|
||||
return `"${str}"^${boost}`
|
||||
}
|
||||
|
||||
const matchEachWordExactly = (str, boost = 4) => {
|
||||
if (str.includes(' ')) {
|
||||
let tmp = str.split(' ').map((s, i) => i === 0 ? `"${s}"` : `AND "${s}"`).join(' ')
|
||||
return `(${tmp})^${boost}`
|
||||
} else {
|
||||
return ''
|
||||
}
|
||||
})
|
||||
// now we could become fuzzy using ~
|
||||
return result
|
||||
}
|
||||
|
||||
const normalizeWhitespace = str => {
|
||||
return str.replace(/\s+/g, ' ')
|
||||
const matchSomeWordsExactly = (str, boost = 2) => {
|
||||
if (str.includes(' ')) {
|
||||
return str.split(' ').map(s => `"${s}"^${boost}`).join(' ')
|
||||
} else {
|
||||
return ''
|
||||
}
|
||||
}
|
||||
|
||||
const escapeSpecialCharacters = str => {
|
||||
const matchBeginningOfWords = str => {
|
||||
return normalizeWhitespace(str.split(' ').map(s => {
|
||||
if (s.length > 3) {
|
||||
// at least 4 letters. So AND, OR and NOT are never used unquoted
|
||||
return s + '*'
|
||||
} else {
|
||||
return ''
|
||||
}
|
||||
}).join(' '))
|
||||
}
|
||||
|
||||
export function normalizeWhitespace(str) {
|
||||
return str.replace(/\s+/g, ' ').trim()
|
||||
}
|
||||
|
||||
export function escapeSpecialCharacters(str) {
|
||||
return str.replace(/(["[\]&|\\{}+!()^~*?:/-])/g, '\\$1')
|
||||
}
|
||||
|
||||
const quoteString = str => {
|
||||
return '"' + str + '"'
|
||||
}
|
||||
|
||||
@ -1,10 +1,42 @@
|
||||
import queryString from './queryString'
|
||||
import { queryString, escapeSpecialCharacters, normalizeWhitespace } from './queryString'
|
||||
|
||||
describe('queryString', () => {
|
||||
describe('special characters', () => {
|
||||
it('does escaping correctly', () => {
|
||||
expect(escapeSpecialCharacters('+ - && || ! ( ) { } [ ] ^ " ~ * ? : \\ / '))
|
||||
.toEqual('\\+ \\- \\&\\& \\|\\| \\! \\( \\) \\{ \\} \\[ \\] \\^ \\" \\~ \\* \\? \\: \\\\ \\/ ')
|
||||
})
|
||||
})
|
||||
|
||||
describe('whitespace', () => {
|
||||
it('is normalized correctly', () => {
|
||||
expect(normalizeWhitespace(' a \t \n b \n '))
|
||||
.toEqual('a b')
|
||||
})
|
||||
})
|
||||
|
||||
describe('exact match', () => {
|
||||
it.skip('boosts score by factor 8', () => {
|
||||
it('boosts score by factor 8', () => {
|
||||
expect(queryString('a couple of words')).toContain('"a couple of words"^8')
|
||||
})
|
||||
it.todo('implement more cases here')
|
||||
})
|
||||
|
||||
describe('match all words exactly', () => {
|
||||
it('boosts score by factor 4', () => {
|
||||
expect(queryString('a couple of words')).toContain('("a" AND "couple" AND "of" AND "words")^4')
|
||||
})
|
||||
})
|
||||
|
||||
describe('match at least one word exactly', () => {
|
||||
it('boosts score by factor 2', () => {
|
||||
expect(queryString('a couple of words')).toContain('"a"^2 "couple"^2 "of"^2 "words"^2')
|
||||
})
|
||||
})
|
||||
|
||||
describe('globbing for longer words', () => {
|
||||
it('globs words with more than three characters', () => {
|
||||
expect(queryString('a couple of words')).toContain('couple* words*')
|
||||
})
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user