Specs are running and regex in searches.js is cleaned up

Matching the whole text entered exactly is boosted by 8.
Matching all the words entered exactly is boosted by 4.
Matching some words ebtered exactly is boosted by 2.
Glob matching is applied for words with more than three characters is not boosted.
To Do: Deal with @ and # symbols.
To Do: Find a way to match unicode, e.g. kyrillic letters.
This commit is contained in:
Moriz Wahl 2020-03-10 01:07:40 +01:00 committed by mattwr18
parent a6a2ac4fbe
commit 9cb489dce1
2 changed files with 107 additions and 38 deletions

View File

@ -12,7 +12,7 @@ export default {
CALL db.index.fulltext.queryNodes('post_fulltext_search', $query)
YIELD node as resource, score
MATCH (resource)<-[:WROTE]-(author:User)
WHERE score >= 0.2
WHERE score >= 0.0
AND NOT (
author.deleted = true OR author.disabled = true
OR resource.deleted = true OR resource.disabled = true
@ -35,7 +35,7 @@ export default {
CALL db.index.fulltext.queryNodes('user_fulltext_search', $query)
YIELD node as resource, score
MATCH (resource)
WHERE score >= 0.5
WHERE score >= 0.0
AND NOT (resource.deleted = true OR resource.disabled = true)
RETURN resource {.*, __typename: labels(resource)[0]}
LIMIT $limit
@ -70,32 +70,47 @@ export default {
},
}
function createUserQuery(str) {
// match the whole text
const createUserQuery = str => {
return createPostQuery(str)
}
const createPostQuery = str => {
// match the whole text exactly
const normalizedString = normalizeWhitespace(str)
const escapedString = escapeSpecialCharacters(normalizedString)
const result = normalizedString.includes(' ') ? quoteString(escapedString) : escapedString
// console.log('"' + + '"')
let result = quoteString(escapedString) + '^8'
// match each word exactly
if (escapedString.includes(' ')) {
result += ' OR ('
escapedString.split(' ').forEach((s, i) => {
result += i === 0 ? quoteString(s) : ' AND ' + quoteString(s)
})
result += ')^4'
}
// match at least one word exactly
if (escapedString.includes(' ')) {
escapedString.split(' ').forEach(s => {
result += ' OR ' + quoteString(s) + '^2'
})
}
// start globbing ...
escapedString.split(' ').forEach(s => {
if (s.length > 3)
// at least 4 letters. So AND, OR and NOT are never used unquoted
result += ' OR ' + s + '*'
})
// now we could become fuzzy using ~
return result
}
function createPostQuery(str) {
// match the whole text
const normalizedString = normalizeWhitespace(str)
const escapedString = escapeSpecialCharacters(normalizedString)
const result = normalizedString.includes(' ') ? quoteString(escapedString) : escapedString
// console.log('"' + + '"')
return result
}
function normalizeWhitespace(str) {
const normalizeWhitespace = str => {
return str.replace(/\s+/g, ' ')
}
function quoteString(str) {
const quoteString = str => {
return '"' + str + '"'
}
function escapeSpecialCharacters(str) {
const escapeSpecialCharacters = str => {
return str.replace(/(["[\]&|\\{}+!()^~*?:/-])/g, '\\$1')
}

View File

@ -3,6 +3,7 @@ import { gql } from '../../helpers/jest'
import { getNeode, getDriver } from '../../db/neo4j'
import createServer from '../../server'
import { createTestClient } from 'apollo-server-testing'
import cloneDeep from 'lodash/cloneDeep'
let query, authenticatedUser
@ -49,8 +50,18 @@ const searchQuery = gql`
const nothingFound = { data: { findResources: [] } }
const addBrAfterNewlinw = array => {
return array.map(obj => {
const tmp = cloneDeep(obj)
if (tmp.__typename === 'Post') {
tmp.content = tmp.content.replace(/\n/g, '<br>\n')
}
return tmp
})
}
const createExpectedObject = array => {
return { data: { findResources: array } }
return { data: { findResources: addBrAfterNewlinw(array) } }
}
const addPostToDB = post => {
@ -69,6 +80,14 @@ const addUserToDB = user => {
})
}
const dumpToDB = array => {
const result = []
array.forEach(obj => {
obj.__typename === 'Post' ? result.push(addPostToDB(obj)) : result.push(addUserToDB(obj))
})
return result
}
const createDataObject = (obj, type) => {
return { __typename: type, ...obj }
}
@ -97,7 +116,13 @@ describe('resolvers', () => {
describe('basic searches', () => {
it('finds the post', async () => {
variables = { query: 'Beitrag' }
variables = { query: 'beitrag' }
const expected = createExpectedObject([aPost])
await expect(query({ query: searchQuery, variables })).resolves.toMatchObject(expected)
})
it('finds the post searching only with capital letters', async () => {
variables = { query: 'BEITRAG' }
const expected = createExpectedObject([aPost])
await expect(query({ query: searchQuery, variables })).resolves.toMatchObject(expected)
})
@ -121,20 +146,22 @@ describe('resolvers', () => {
describe('more data added', () => {
beforeAll(async () => {
await Promise.all([
addPostToDB(bPost),
addPostToDB(cPost),
addPostToDB(dPost),
addPostToDB(ePost),
addPostToDB(fPost),
addPostToDB(gPost),
addUserToDB(bUser),
addUserToDB(cUser),
addUserToDB(dUser),
addUserToDB(eUser),
addUserToDB(fUser),
addUserToDB(gUser),
])
await Promise.all(
dumpToDB([
bPost,
cPost,
dPost,
ePost,
fPost,
gPost,
bUser,
cUser,
dUser,
eUser,
fUser,
gUser,
]),
)
})
it('finds the AK-47', async () => {
@ -151,15 +178,44 @@ describe('resolvers', () => {
it('finds more than one user by slug', async () => {
variables = { query: '-maria-' }
const expected = createExpectedObject([dUser, cUser])
const expected = [cUser, dUser]
await expect(query({ query: searchQuery, variables })).resolves.toMatchObject({
data: {
findResources: expect.arrayContaining(expected),
},
})
})
it('finds the binomial formula', async () => {
variables = { query: '(a - b)² = a² - 2ab + b²' }
const expected = createExpectedObject([cPost])
await expect(query({ query: searchQuery, variables })).resolves.toMatchObject(expected)
})
it('finds text over linebreak', async () => {
variables = { query: 'dreht, ist' }
const expected = createExpectedObject([dPost])
await expect(query({ query: searchQuery, variables })).resolves.toMatchObject(expected)
})
it('finds single words with lower score', async () => {
variables = { query: 'der Panther' }
const expected = createExpectedObject([dPost, ePost, fPost, bUser])
await expect(query({ query: searchQuery, variables })).resolves.toMatchObject(expected)
})
it('finds something that starts with the given text', async () => {
variables = { query: 'john' }
const expected = createExpectedObject([aUser, bUser])
await expect(query({ query: searchQuery, variables })).resolves.toMatchObject(expected)
})
/*
it('finds Russian text', async () => {
variables = { query: 'Калашникова' }
const expected = createExpectedObject([gPost])
await expect(query({ query: searchQuery, variables })).resolves.toMatchObject(expected)
})
}) */
})
})
})
@ -193,12 +249,10 @@ const dPost = createPostObject({
so müd geworden, daß er nichts mehr hält.
Ihm ist, als ob es tausend Stäbe gäbe
und hinter tausend Stäben keine Welt.
Der weiche Gang geschmeidig starker Schritte,
der sich im allerkleinsten Kreise dreht,
ist wie ein Tanz von Kraft um eine Mitte,
in der betäubt ein großer Wille steht.
Nur manchmal schiebt der Vorhang der Pupille
sich lautlos auf . Dann geht ein Bild hinein,
geht durch der Glieder angespannte Stille