diff --git a/backend/src/schema/resolvers/searches.js b/backend/src/schema/resolvers/searches.js index 994d19fa2..5c1e43952 100644 --- a/backend/src/schema/resolvers/searches.js +++ b/backend/src/schema/resolvers/searches.js @@ -1,22 +1,19 @@ import log from './helpers/databaseLogger' +import { queryString } from './searches/queryString' + +// see http://lucene.apache.org/core/8_3_1/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#package.description export default { Query: { findResources: async (_parent, args, context, _resolveInfo) => { const { query, limit } = args const { id: thisUserId } = context.user - // see http://lucene.apache.org/core/8_3_1/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#package.description - const myQuery = query - .replace(/\s+/g, ' ') - .replace(/[[@#:*~\\$|^\]?/"'(){}+?!,.-;]/g, '') - .split(' ') - .map(s => (s.toLowerCase().match(/^(not|and|or)$/) ? '"' + s + '"' : s + '*')) - .join(' ') + const postCypher = ` CALL db.index.fulltext.queryNodes('post_fulltext_search', $query) YIELD node as resource, score MATCH (resource)<-[:WROTE]-(author:User) - WHERE score >= 0.5 + WHERE score >= 0.0 AND NOT ( author.deleted = true OR author.disabled = true OR resource.deleted = true OR resource.disabled = true @@ -39,11 +36,12 @@ export default { CALL db.index.fulltext.queryNodes('user_fulltext_search', $query) YIELD node as resource, score MATCH (resource) - WHERE score >= 0.5 + WHERE score >= 0.0 AND NOT (resource.deleted = true OR resource.disabled = true) RETURN resource {.*, __typename: labels(resource)[0]} LIMIT $limit ` + const myQuery = queryString(query) const session = context.driver.session() const searchResultPromise = session.readTransaction(async transaction => { diff --git a/backend/src/schema/resolvers/searches.spec.js b/backend/src/schema/resolvers/searches.spec.js new file mode 100644 index 000000000..c454833b8 --- /dev/null +++ b/backend/src/schema/resolvers/searches.spec.js @@ -0,0 +1,444 @@ +import Factory, { cleanDatabase } from '../../db/factories' +import { gql } from '../../helpers/jest' +import { getNeode, getDriver } from '../../db/neo4j' +import createServer from '../../server' +import { createTestClient } from 'apollo-server-testing' + +let query, authenticatedUser, user + +const driver = getDriver() +const neode = getNeode() + +beforeAll(async () => { + await cleanDatabase() + const { server } = createServer({ + context: () => { + return { + driver, + neode, + user: authenticatedUser, + } + }, + }) + query = createTestClient(server).query +}) + +afterAll(async () => { + await cleanDatabase() +}) + +const searchQuery = gql` + query($query: String!) { + findResources(query: $query, limit: 5) { + __typename + ... on Post { + id + title + content + } + ... on User { + id + slug + name + } + } + } +` +describe('resolvers/searches', () => { + let variables + + describe('given one user', () => { + beforeAll(async () => { + user = await Factory.build('user', { + id: 'a-user', + name: 'John Doe', + slug: 'john-doe', + }) + authenticatedUser = await user.toJson() + }) + + describe('query contains first name of user', () => { + it('finds the user', async () => { + variables = { query: 'John' } + await expect(query({ query: searchQuery, variables })).resolves.toMatchObject({ + data: { + findResources: [ + { + id: 'a-user', + name: 'John Doe', + slug: 'john-doe', + }, + ], + }, + }) + }) + }) + + describe('adding one post', () => { + beforeAll(async () => { + await Factory.build( + 'post', + { + id: 'a-post', + title: 'Beitrag', + content: 'Ein erster Beitrag', + }, + { authorId: 'a-user' }, + ) + }) + + describe('query contains title of post', () => { + it('finds the post', async () => { + variables = { query: 'beitrag' } + await expect(query({ query: searchQuery, variables })).resolves.toMatchObject({ + data: { + findResources: [ + { + __typename: 'Post', + id: 'a-post', + title: 'Beitrag', + content: 'Ein erster Beitrag', + }, + ], + }, + }) + }) + }) + + describe('casing', () => { + it('does not matter', async () => { + variables = { query: 'BEITRAG' } + await expect(query({ query: searchQuery, variables })).resolves.toMatchObject({ + data: { + findResources: [ + { + __typename: 'Post', + id: 'a-post', + title: 'Beitrag', + content: 'Ein erster Beitrag', + }, + ], + }, + }) + }) + }) + + describe('query consists of words not present in the corpus', () => { + it('returns empty search results', async () => { + await expect( + query({ query: searchQuery, variables: { query: 'Unfug' } }), + ).resolves.toMatchObject({ data: { findResources: [] } }) + }) + }) + + describe('testing different post content', () => { + beforeAll(async () => { + return Promise.all([ + Factory.build( + 'post', + { + id: 'b-post', + title: 'Aufruf', + content: 'Jeder sollte seinen Beitrag leisten.', + }, + { authorId: 'a-user' }, + ), + Factory.build( + 'post', + { + id: 'g-post', + title: 'Zusammengesetzte Wörter', + content: `Ein Bindestrich kann zwischen zwei Substantiven auch dann gesetzt werden, wenn drei gleichlautende Buchstaben aufeinandertreffen. Das ist etwa bei einem „Teeei“ der Fall, das so korrekt geschrieben ist. Möglich ist hier auch die Schreibweise mit Bindestrich: Tee-Ei.`, + }, + { authorId: 'a-user' }, + ), + Factory.build( + 'post', + { + id: 'c-post', + title: 'Die binomischen Formeln', + content: `1. binomische Formel: (a + b)² = a² + 2ab + b² +2. binomische Formel: (a - b)² = a² - 2ab + b² +3. binomische Formel: (a + b)(a - b) = a² - b²`, + }, + { authorId: 'a-user' }, + ), + Factory.build( + 'post', + { + id: 'd-post', + title: 'Der Panther', + content: `Sein Blick ist vom Vorübergehn der Stäbe +so müd geworden, daß er nichts mehr hält. +Ihm ist, als ob es tausend Stäbe gäbe +und hinter tausend Stäben keine Welt.`, + }, + { authorId: 'a-user' }, + ), + ]) + }) + + describe('a post which content contains the title of the first post', () => { + describe('query contains the title of the first post', () => { + it('finds both posts', async () => { + variables = { query: 'beitrag' } + await expect(query({ query: searchQuery, variables })).resolves.toMatchObject({ + data: { + findResources: expect.arrayContaining([ + { + __typename: 'Post', + id: 'a-post', + title: 'Beitrag', + content: 'Ein erster Beitrag', + }, + { + __typename: 'Post', + id: 'b-post', + title: 'Aufruf', + content: 'Jeder sollte seinen Beitrag leisten.', + }, + ]), + }, + }) + }) + }) + }) + + describe('a post that contains a hyphen between two words and German quotation marks', () => { + describe('hyphens in query', () => { + it('will be treated as ordinary characters', async () => { + variables = { query: 'tee-ei' } + await expect(query({ query: searchQuery, variables })).resolves.toMatchObject({ + data: { + findResources: [ + { + __typename: 'Post', + id: 'g-post', + title: 'Zusammengesetzte Wörter', + content: `Ein Bindestrich kann zwischen zwei Substantiven auch dann gesetzt werden, wenn drei gleichlautende Buchstaben aufeinandertreffen. Das ist etwa bei einem „Teeei“ der Fall, das so korrekt geschrieben ist. Möglich ist hier auch die Schreibweise mit Bindestrich: Tee-Ei.`, + }, + ], + }, + }) + }) + }) + + describe('German quotation marks in query to test unicode characters (\u201E ... \u201C)', () => { + it('will be treated as ordinary characters', async () => { + variables = { query: '„teeei“' } + await expect(query({ query: searchQuery, variables })).resolves.toMatchObject({ + data: { + findResources: [ + { + __typename: 'Post', + id: 'g-post', + title: 'Zusammengesetzte Wörter', + content: `Ein Bindestrich kann zwischen zwei Substantiven auch dann gesetzt werden, wenn drei gleichlautende Buchstaben aufeinandertreffen. Das ist etwa bei einem „Teeei“ der Fall, das so korrekt geschrieben ist. Möglich ist hier auch die Schreibweise mit Bindestrich: Tee-Ei.`, + }, + ], + }, + }) + }) + }) + }) + + describe('a post that contains a simple mathematical exprssion and line breaks', () => { + describe('query a part of the mathematical expression', () => { + it('finds that post', async () => { + variables = { query: '(a - b)²' } + await expect(query({ query: searchQuery, variables })).resolves.toMatchObject({ + data: { + findResources: [ + { + __typename: 'Post', + id: 'c-post', + title: 'Die binomischen Formeln', + content: `1. binomische Formel: (a + b)² = a² + 2ab + b²
+2. binomische Formel: (a - b)² = a² - 2ab + b²
+3. binomische Formel: (a + b)(a - b) = a² - b²`, + }, + ], + }, + }) + }) + }) + + describe('query the same part of the mathematical expression without spaces', () => { + it('finds that post', async () => { + variables = { query: '(a-b)²' } + await expect(query({ query: searchQuery, variables })).resolves.toMatchObject({ + data: { + findResources: [ + { + __typename: 'Post', + id: 'c-post', + title: 'Die binomischen Formeln', + content: `1. binomische Formel: (a + b)² = a² + 2ab + b²
+2. binomische Formel: (a - b)² = a² - 2ab + b²
+3. binomische Formel: (a + b)(a - b) = a² - b²`, + }, + ], + }, + }) + }) + }) + + describe('query the mathematical expression over line break', () => { + it('finds that post', async () => { + variables = { query: '+ b² 2.' } + await expect(query({ query: searchQuery, variables })).resolves.toMatchObject({ + data: { + findResources: [ + { + __typename: 'Post', + id: 'c-post', + title: 'Die binomischen Formeln', + content: `1. binomische Formel: (a + b)² = a² + 2ab + b²
+2. binomische Formel: (a - b)² = a² - 2ab + b²
+3. binomische Formel: (a + b)(a - b) = a² - b²`, + }, + ], + }, + }) + }) + }) + }) + + describe('a post that contains a poem', () => { + describe('query for more than one word, e.g. the title of the poem', () => { + it('finds the poem and another post that contains only one word but with lower score', async () => { + variables = { query: 'der panther' } + await expect(query({ query: searchQuery, variables })).resolves.toMatchObject({ + data: { + findResources: [ + { + __typename: 'Post', + id: 'd-post', + title: 'Der Panther', + content: `Sein Blick ist vom Vorübergehn der Stäbe
+so müd geworden, daß er nichts mehr hält.
+Ihm ist, als ob es tausend Stäbe gäbe
+und hinter tausend Stäben keine Welt.`, + }, + { + __typename: 'Post', + id: 'g-post', + title: 'Zusammengesetzte Wörter', + content: `Ein Bindestrich kann zwischen zwei Substantiven auch dann gesetzt werden, wenn drei gleichlautende Buchstaben aufeinandertreffen. Das ist etwa bei einem „Teeei“ der Fall, das so korrekt geschrieben ist. Möglich ist hier auch die Schreibweise mit Bindestrich: Tee-Ei.`, + }, + ], + }, + }) + }) + }) + + describe('query for the first four letters of two longer words', () => { + it('finds the posts that contain words starting with these four letters', async () => { + variables = { query: 'Vorü Subs' } + await expect(query({ query: searchQuery, variables })).resolves.toMatchObject({ + data: { + findResources: expect.arrayContaining([ + { + __typename: 'Post', + id: 'd-post', + title: 'Der Panther', + content: `Sein Blick ist vom Vorübergehn der Stäbe
+so müd geworden, daß er nichts mehr hält.
+Ihm ist, als ob es tausend Stäbe gäbe
+und hinter tausend Stäben keine Welt.`, + }, + { + __typename: 'Post', + id: 'g-post', + title: 'Zusammengesetzte Wörter', + content: `Ein Bindestrich kann zwischen zwei Substantiven auch dann gesetzt werden, wenn drei gleichlautende Buchstaben aufeinandertreffen. Das ist etwa bei einem „Teeei“ der Fall, das so korrekt geschrieben ist. Möglich ist hier auch die Schreibweise mit Bindestrich: Tee-Ei.`, + }, + ]), + }, + }) + }) + }) + }) + }) + + describe('adding two users that have the same word in their slugs', () => { + beforeAll(async () => { + await Promise.all([ + Factory.build('user', { + id: 'c-user', + name: 'Rainer Maria Rilke', + slug: 'rainer-maria-rilke', + }), + Factory.build('user', { + id: 'd-user', + name: 'Erich Maria Remarque', + slug: 'erich-maria-remarque', + }), + ]) + }) + + describe('query the word that both slugs contain', () => { + it('finds both users', async () => { + variables = { query: '-maria-' } + await expect(query({ query: searchQuery, variables })).resolves.toMatchObject({ + data: { + findResources: expect.arrayContaining([ + { + __typename: 'User', + id: 'c-user', + name: 'Rainer Maria Rilke', + slug: 'rainer-maria-rilke', + }, + { + __typename: 'User', + id: 'd-user', + name: 'Erich Maria Remarque', + slug: 'erich-maria-remarque', + }, + ]), + }, + }) + }) + }) + }) + + describe('adding a post, written by a user who is muted by the authenticated user', () => { + beforeAll(async () => { + const mutedUser = await Factory.build('user', { + id: 'muted-user', + name: 'Muted', + slug: 'muted', + }) + await user.relateTo(mutedUser, 'muted') + await Factory.build( + 'post', + { + id: 'muted-post', + title: 'Beleidigender Beitrag', + content: 'Dieser Beitrag stammt von einem bleidigendem Nutzer.', + }, + { authorId: 'muted-user' }, + ) + }) + + describe('query for text in a post written by a muted user', () => { + it('does not include the post of the muted user in the results', async () => { + variables = { query: 'beitrag' } + await expect(query({ query: searchQuery, variables })).resolves.toMatchObject({ + data: { + findResources: expect.not.arrayContaining([ + { + __typename: 'Post', + id: 'muted-post', + title: 'Beleidigender Beitrag', + content: 'Dieser Beitrag stammt von einem bleidigendem Nutzer.', + }, + ]), + }, + }) + }) + }) + }) + }) + }) +}) diff --git a/backend/src/schema/resolvers/searches/queryString.js b/backend/src/schema/resolvers/searches/queryString.js new file mode 100644 index 000000000..c3500188c --- /dev/null +++ b/backend/src/schema/resolvers/searches/queryString.js @@ -0,0 +1,47 @@ +export function queryString(str) { + const normalizedString = normalizeWhitespace(str) + const escapedString = escapeSpecialCharacters(normalizedString) + return ` +${matchWholeText(escapedString)} +${matchEachWordExactly(escapedString)} +${matchSomeWordsExactly(escapedString)} +${matchBeginningOfWords(escapedString)} +` +} + +const matchWholeText = (str, boost = 8) => { + return `"${str}"^${boost}` +} + +const matchEachWordExactly = (str, boost = 4) => { + if (!str.includes(' ')) return '' + const tmp = str + .split(' ') + .map((s, i) => (i === 0 ? `"${s}"` : `AND "${s}"`)) + .join(' ') + return `(${tmp})^${boost}` +} + +const matchSomeWordsExactly = (str, boost = 2) => { + if (!str.includes(' ')) return '' + return str + .split(' ') + .map(s => `"${s}"^${boost}`) + .join(' ') +} + +const matchBeginningOfWords = str => { + return str + .split(' ') + .filter(s => s.length > 3) + .map(s => s + '*') + .join(' ') +} + +export function normalizeWhitespace(str) { + return str.replace(/\s+/g, ' ').trim() +} + +export function escapeSpecialCharacters(str) { + return str.replace(/(["[\]&|\\{}+!()^~*?:/-])/g, '\\$1') +} diff --git a/backend/src/schema/resolvers/searches/queryString.spec.js b/backend/src/schema/resolvers/searches/queryString.spec.js new file mode 100644 index 000000000..23a746be1 --- /dev/null +++ b/backend/src/schema/resolvers/searches/queryString.spec.js @@ -0,0 +1,43 @@ +import { queryString, escapeSpecialCharacters, normalizeWhitespace } from './queryString' + +describe('queryString', () => { + describe('special characters', () => { + it('does escaping correctly', () => { + expect(escapeSpecialCharacters('+ - && || ! ( ) { } [ ] ^ " ~ * ? : \\ / ')).toEqual( + '\\+ \\- \\&\\& \\|\\| \\! \\( \\) \\{ \\} \\[ \\] \\^ \\" \\~ \\* \\? \\: \\\\ \\/ ', + ) + }) + }) + + describe('whitespace', () => { + it('normalizes correctly', () => { + expect(normalizeWhitespace(' a \t \n b \n ')).toEqual('a b') + }) + }) + + describe('exact match', () => { + it('boosts score by factor 8', () => { + expect(queryString('a couple of words')).toContain('"a couple of words"^8') + }) + }) + + describe('match all words exactly', () => { + it('boosts score by factor 4', () => { + expect(queryString('a couple of words')).toContain( + '("a" AND "couple" AND "of" AND "words")^4', + ) + }) + }) + + describe('match at least one word exactly', () => { + it('boosts score by factor 2', () => { + expect(queryString('a couple of words')).toContain('"a"^2 "couple"^2 "of"^2 "words"^2') + }) + }) + + describe('globbing for longer words', () => { + it('globs words with more than three characters', () => { + expect(queryString('a couple of words')).toContain('couple* words*') + }) + }) +}) diff --git a/cypress/integration/search/Search.feature b/cypress/integration/search/Search.feature index e83f58477..b77b45d8e 100644 --- a/cypress/integration/search/Search.feature +++ b/cypress/integration/search/Search.feature @@ -7,7 +7,7 @@ Feature: Search Given I have a user account And we have the following posts in our database: | id | title | content | - | p1 | 101 Essays that will change the way you think | 101 Essays, of course! | + | p1 | 101 Essays that will change the way you think | 101 Essays, of course (PR)! | | p2 | No searched for content | will be found in this post, I guarantee | And we have the following user accounts: | slug | name | id | @@ -24,7 +24,7 @@ Feature: Search | 101 Essays that will change the way you think | Scenario: Press enter starts search - When I type "Es" and press Enter + When I type "PR" and press Enter Then I should have one item in the select dropdown Then I should see the following posts in the select dropdown: | title |