diff --git a/backend/src/schema/resolvers/searches.js b/backend/src/schema/resolvers/searches.js index 918f914cd..ba67bb2d0 100644 --- a/backend/src/schema/resolvers/searches.js +++ b/backend/src/schema/resolvers/searches.js @@ -1,4 +1,5 @@ import log from './helpers/databaseLogger' +import queryString from './searches/queryString' // see http://lucene.apache.org/core/8_3_1/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#package.description @@ -44,12 +45,12 @@ export default { const session = context.driver.session() const searchResultPromise = session.readTransaction(async transaction => { const postTransactionResponse = transaction.run(postCypher, { - query: createPostQuery(query), + query: queryString(query), limit, thisUserId, }) const userTransactionResponse = transaction.run(userCypher, { - query: createUserQuery(query), + query: queryString(query), limit, thisUserId, }) @@ -69,49 +70,3 @@ export default { }, }, } - -const createUserQuery = str => { - return createPostQuery(str) -} - -const createPostQuery = str => { - // match the whole text exactly - const normalizedString = normalizeWhitespace(str) - const escapedString = escapeSpecialCharacters(normalizedString) - let result = quoteString(escapedString) + '^8' - // match each word exactly - if (escapedString.includes(' ')) { - result += ' OR (' - escapedString.split(' ').forEach((s, i) => { - result += i === 0 ? quoteString(s) : ' AND ' + quoteString(s) - }) - result += ')^4' - } - // match at least one word exactly - if (escapedString.includes(' ')) { - escapedString.split(' ').forEach(s => { - result += ' OR ' + quoteString(s) + '^2' - }) - } - // start globbing ... - escapedString.split(' ').forEach(s => { - if (s.length > 3) { - // at least 4 letters. So AND, OR and NOT are never used unquoted - result += ' OR ' + s + '*' - } - }) - // now we could become fuzzy using ~ - return result -} - -const normalizeWhitespace = str => { - return str.replace(/\s+/g, ' ') -} - -const quoteString = str => { - return '"' + str + '"' -} - -const escapeSpecialCharacters = str => { - return str.replace(/(["[\]&|\\{}+!()^~*?:/-])/g, '\\$1') -} diff --git a/backend/src/schema/resolvers/searches.spec.js b/backend/src/schema/resolvers/searches.spec.js index b955110e1..b0b2b1e4a 100644 --- a/backend/src/schema/resolvers/searches.spec.js +++ b/backend/src/schema/resolvers/searches.spec.js @@ -48,8 +48,6 @@ const searchQuery = gql` } ` -const nothingFound = { data: { findResources: [] } } - const addBrAfterNewline = array => { return array.map(obj => { const tmp = cloneDeep(obj) @@ -112,116 +110,151 @@ let user describe('resolvers', () => { describe('searches', () => { - beforeAll(async () => { - user = await addUserToDB(aUser) - await addPostToDB(aPost) - authenticatedUser = await user.toJson() - }) - let variables - describe('basic searches', () => { + describe('given one post and one user', () => { + beforeAll(async () => { + user = await addUserToDB(aUser) + await addPostToDB(aPost) + authenticatedUser = await user.toJson() + }) + it('finds the post', async () => { variables = { query: 'beitrag' } - const expected = createExpectedObject([aPost]) - await expect(query({ query: searchQuery, variables })).resolves.toMatchObject(expected) - }) - - it('finds the post searching only with capital letters', async () => { - variables = { query: 'BEITRAG' } - const expected = createExpectedObject([aPost]) - await expect(query({ query: searchQuery, variables })).resolves.toMatchObject(expected) - }) - - it('does not find the post', async () => { - variables = { query: 'Unfug' } - await expect(query({ query: searchQuery, variables })).resolves.toMatchObject(nothingFound) - }) - - it('finds the user', async () => { - variables = { query: 'John' } - const expected = createExpectedObject([aUser]) - await expect(query({ query: searchQuery, variables })).resolves.toMatchObject(expected) - }) - - it('does not find the user', async () => { - variables = { query: 'Unfug' } - await expect(query({ query: searchQuery, variables })).resolves.toMatchObject(nothingFound) - }) - }) - - describe('more data added', () => { - beforeAll(async () => { - await Promise.all( - dumpToDB([ - bPost, - cPost, - dPost, - ePost, - fPost, - gPost, - bUser, - cUser, - dUser, - eUser, - fUser, - gUser, - ]), - ) - }) - - it('finds the AK-47', async () => { - variables = { query: 'AK-47' } - const expected = createExpectedObject([gPost]) - await expect(query({ query: searchQuery, variables })).resolves.toMatchObject(expected) - }) - - it('finds more than one post', async () => { - variables = { query: 'Beitrag' } - const expected = createExpectedObject([aPost, bPost]) - await expect(query({ query: searchQuery, variables })).resolves.toMatchObject(expected) - }) - - it('finds more than one user by slug', async () => { - variables = { query: '-maria-' } - const expected = [cUser, dUser] await expect(query({ query: searchQuery, variables })).resolves.toMatchObject({ data: { - findResources: expect.arrayContaining(expected), + findResources: [ + { + __typename: 'Post', + id: 'a-post', + title: 'Beitrag', + content: 'Ein erster Beitrag', + }, + ], }, }) }) - it('finds the binomial formula', async () => { - variables = { query: '(a - b)² = a² - 2ab + b²' } - const expected = createExpectedObject([cPost]) - await expect(query({ query: searchQuery, variables })).resolves.toMatchObject(expected) + describe('casing', () => { + it('does not matter', async () => { + variables = { query: 'BEITRAG' } + await expect(query({ query: searchQuery, variables })).resolves.toMatchObject({ + data: { + findResources: [ + { + __typename: 'Post', + id: 'a-post', + title: 'Beitrag', + content: 'Ein erster Beitrag', + }, + ], + }, + }) + }) }) - it('finds text over linebreak', async () => { - variables = { query: 'dreht, ist' } - const expected = createExpectedObject([dPost]) - await expect(query({ query: searchQuery, variables })).resolves.toMatchObject(expected) + describe('query contains first name of user', () => { + it('finds the user', async () => { + variables = { query: 'John' } + const expected = createExpectedObject([aUser]) + await expect(query({ query: searchQuery, variables })).resolves.toMatchObject(expected) + }) }) - it('finds single words with lower score', async () => { - variables = { query: 'der Panther' } - const expected = createExpectedObject([dPost, ePost, fPost, bUser]) - await expect(query({ query: searchQuery, variables })).resolves.toMatchObject(expected) + describe('query consists of words not present in the corpus', () => { + it('returns empty search results', async () => { + await expect( + query({ query: searchQuery, variables: { query: 'Unfug' } }), + ).resolves.toMatchObject({ data: { findResources: [] } }) + }) }) - it('finds something that starts with the given text', async () => { - variables = { query: 'john' } - const expected = createExpectedObject([aUser, bUser]) - await expect(query({ query: searchQuery, variables })).resolves.toMatchObject(expected) - }) + describe('given more posts and users', () => { + beforeAll(async () => { + const factoryOptions = { + authorId: 'a-user', + } + await Promise.all([ + Factory.build( + 'post', + { + id: 'b-post', + title: 'Aufruf', + content: 'Jeder sollte seinen Beitrag leisten.', + }, + factoryOptions, + ), + ...dumpToDB([ + cPost, + dPost, + ePost, + fPost, + gPost, + bUser, + cUser, + dUser, + eUser, + fUser, + gUser, + ]), + ]) + }) - /* + describe('hyphens in query', () => { + it('will be treated as ordinary characters', async () => { + variables = { query: 'AK-47' } + const expected = createExpectedObject([gPost]) + await expect(query({ query: searchQuery, variables })).resolves.toMatchObject(expected) + }) + }) + + it('finds more than one post', async () => { + variables = { query: 'Beitrag' } + const expected = createExpectedObject([aPost, bPost]) + await expect(query({ query: searchQuery, variables })).resolves.toMatchObject(expected) + }) + + it('finds more than one user by slug', async () => { + variables = { query: '-maria-' } + const expected = [cUser, dUser] + await expect(query({ query: searchQuery, variables })).resolves.toMatchObject({ + data: { + findResources: expect.arrayContaining(expected), + }, + }) + }) + + it('finds the binomial formula', async () => { + variables = { query: '(a - b)² = a² - 2ab + b²' } + const expected = createExpectedObject([cPost]) + await expect(query({ query: searchQuery, variables })).resolves.toMatchObject(expected) + }) + + it('finds text over linebreak', async () => { + variables = { query: 'dreht, ist' } + const expected = createExpectedObject([dPost]) + await expect(query({ query: searchQuery, variables })).resolves.toMatchObject(expected) + }) + + it('finds single words with lower score', async () => { + variables = { query: 'der Panther' } + const expected = createExpectedObject([dPost, ePost, fPost, bUser]) + await expect(query({ query: searchQuery, variables })).resolves.toMatchObject(expected) + }) + + it('finds something that starts with the given text', async () => { + variables = { query: 'john' } + const expected = createExpectedObject([aUser, bUser]) + await expect(query({ query: searchQuery, variables })).resolves.toMatchObject(expected) + }) + + /* it('finds Russian text', async () => { variables = { query: 'Калашникова' } const expected = createExpectedObject([gPost]) await expect(query({ query: searchQuery, variables })).resolves.toMatchObject(expected) }) */ + }) }) }) }) diff --git a/backend/src/schema/resolvers/searches/queryString.js b/backend/src/schema/resolvers/searches/queryString.js new file mode 100644 index 000000000..6735b54c0 --- /dev/null +++ b/backend/src/schema/resolvers/searches/queryString.js @@ -0,0 +1,41 @@ +export default function queryString(str) { + // match the whole text exactly + const normalizedString = normalizeWhitespace(str) + const escapedString = escapeSpecialCharacters(normalizedString) + let result = quoteString(escapedString) + '^8' + // match each word exactly + if (escapedString.includes(' ')) { + result += ' OR (' + escapedString.split(' ').forEach((s, i) => { + result += i === 0 ? quoteString(s) : ' AND ' + quoteString(s) + }) + result += ')^4' + } + // match at least one word exactly + if (escapedString.includes(' ')) { + escapedString.split(' ').forEach(s => { + result += ' OR ' + quoteString(s) + '^2' + }) + } + // start globbing ... + escapedString.split(' ').forEach(s => { + if (s.length > 3) { + // at least 4 letters. So AND, OR and NOT are never used unquoted + result += ' OR ' + s + '*' + } + }) + // now we could become fuzzy using ~ + return result +} + +const normalizeWhitespace = str => { + return str.replace(/\s+/g, ' ') +} + +const escapeSpecialCharacters = str => { + return str.replace(/(["[\]&|\\{}+!()^~*?:/-])/g, '\\$1') +} + +const quoteString = str => { + return '"' + str + '"' +} diff --git a/backend/src/schema/resolvers/searches/queryString.spec.js b/backend/src/schema/resolvers/searches/queryString.spec.js new file mode 100644 index 000000000..c5133b631 --- /dev/null +++ b/backend/src/schema/resolvers/searches/queryString.spec.js @@ -0,0 +1,10 @@ +import queryString from './queryString' + +describe('queryString', () => { + describe('exact match', () => { + it.skip('boosts score by factor 8', () => { + expect(queryString('a couple of words')).toContain('"a couple of words"^8') + }) + it.todo('implement more cases here') + }) +})