From f3847e5c9707f5d39cd5f63d6eddfcd012d4261d Mon Sep 17 00:00:00 2001 From: Moriz Wahl Date: Tue, 19 Jan 2021 02:38:24 +0100 Subject: [PATCH 1/6] setup middleware for language detection --- backend/package.json | 1 + backend/src/middleware/index.js | 3 + backend/src/middleware/languages/languages.js | 24 ++++++ .../middleware/languages/languages.spec.js | 74 +++++++++++++++++++ backend/yarn.lock | 5 ++ 5 files changed, 107 insertions(+) create mode 100644 backend/src/middleware/languages/languages.js create mode 100644 backend/src/middleware/languages/languages.spec.js diff --git a/backend/package.json b/backend/package.json index f6b66b07e..786fe6641 100644 --- a/backend/package.json +++ b/backend/package.json @@ -69,6 +69,7 @@ "helmet": "~3.22.0", "ioredis": "^4.16.1", "jsonwebtoken": "~8.5.1", + "languagedetect": "^2.0.0", "linkifyjs": "~2.1.8", "lodash": "~4.17.14", "merge-graphql-schemas": "^1.7.7", diff --git a/backend/src/middleware/index.js b/backend/src/middleware/index.js index 83b0104ec..0ad8cb1ae 100644 --- a/backend/src/middleware/index.js +++ b/backend/src/middleware/index.js @@ -14,6 +14,7 @@ import notifications from './notifications/notificationsMiddleware' import hashtags from './hashtags/hashtagsMiddleware' import email from './email/emailMiddleware' import sentry from './sentryMiddleware' +import languages from './languages/languages' export default (schema) => { const middlewares = { @@ -30,6 +31,7 @@ export default (schema) => { softDelete, includedFields, orderBy, + languages, } let order = [ @@ -39,6 +41,7 @@ export default (schema) => { // 'activityPub', disabled temporarily 'validation', 'sluggify', + 'languages', 'excerpt', 'email', 'notifications', diff --git a/backend/src/middleware/languages/languages.js b/backend/src/middleware/languages/languages.js new file mode 100644 index 000000000..64471969e --- /dev/null +++ b/backend/src/middleware/languages/languages.js @@ -0,0 +1,24 @@ +import LanguageDetect from 'languagedetect' + +const setPostLanguage = (text) => { + console.log(text) + const lngDetector = new LanguageDetect() + lngDetector.setLanguageType('iso2') + const result = lngDetector.detect(text, 2) + console.log(result) + return result[0][0] +} + +export default { + Mutation: { + CreatePost: async (resolve, root, args, context, info) => { + console.log('CreatePost, language', args) + args.language = await setPostLanguage(args.content) + return resolve(root, args, context, info) + }, + UpdatePost: async (resolve, root, args, context, info) => { + args.language = await setPostLanguage(args.content) + return resolve(root, args, context, info) + }, + }, +} diff --git a/backend/src/middleware/languages/languages.spec.js b/backend/src/middleware/languages/languages.spec.js new file mode 100644 index 000000000..8eba5eccd --- /dev/null +++ b/backend/src/middleware/languages/languages.spec.js @@ -0,0 +1,74 @@ +import Factory, { cleanDatabase } from '../../db/factories' +import { gql } from '../../helpers/jest' +import { getNeode, getDriver } from '../../db/neo4j' +import createServer from '../../server' +import { createTestClient } from 'apollo-server-testing' + + +let mutate +let authenticatedUser +let variables + +const driver = getDriver() +const neode = getNeode() + +beforeAll(async () => { + const { server } = createServer({ + context: () => { + return { + driver, + neode, + user: authenticatedUser, + } + }, + }) + mutate = createTestClient(server).mutate + await cleanDatabase() + variables = {} + const user = await Factory.build('user') + authenticatedUser = await user.toJson() + await Factory.build('category', { + id: 'cat9', + name: 'Democracy & Politics', + icon: 'university', + }) +}) + + +afterAll(async () => { + //await cleanDatabase() +}) + + +const createPostMutation = gql` + mutation($title: String!, $content: String!, $categoryIds: [ID]) { + CreatePost(title: $title, content: $content, categoryIds: $categoryIds) { + language + } + } +` + +describe('languagesMiddleware', () => { + variables = { + title: 'Test post languages', + categoryIds: ['cat9'], + } + + it('detects German', async () => { + variables = { + ...variables, + content: 'Jeder sollte vor seiner eigenen Tür kehren.', + } + await expect(mutate({ + mutation: createPostMutation, + variables, + })).resolves.toMatchObject({ + data: { + CreatePost: { + language: 'de', + }, + }, + }) + }) + +}) diff --git a/backend/yarn.lock b/backend/yarn.lock index 0bbc62515..7d6558da0 100644 --- a/backend/yarn.lock +++ b/backend/yarn.lock @@ -6302,6 +6302,11 @@ knuth-shuffle-seeded@^1.0.6: dependencies: seed-random "~2.2.0" +languagedetect@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/languagedetect/-/languagedetect-2.0.0.tgz#4b8fa2b7593b2a3a02fb1100891041c53238936c" + integrity sha512-AZb/liiQ+6ZoTj4f1J0aE6OkzhCo8fyH+tuSaPfSo8YHCWLFJrdSixhtO2TYdIkjcDQNaR4RmGaV2A5FJklDMQ== + latest-version@^3.0.0: version "3.1.0" resolved "https://registry.yarnpkg.com/latest-version/-/latest-version-3.1.0.tgz#a205383fea322b33b5ae3b18abee0dc2f356ee15" From 3bd8a531f10ba173c59b8faeb980f655adf73f79 Mon Sep 17 00:00:00 2001 From: Moriz Wahl Date: Tue, 19 Jan 2021 02:54:14 +0100 Subject: [PATCH 2/6] basic tests for language detection --- backend/src/middleware/languages/languages.js | 3 -- .../middleware/languages/languages.spec.js | 53 +++++++++++++++---- 2 files changed, 44 insertions(+), 12 deletions(-) diff --git a/backend/src/middleware/languages/languages.js b/backend/src/middleware/languages/languages.js index 64471969e..b82470885 100644 --- a/backend/src/middleware/languages/languages.js +++ b/backend/src/middleware/languages/languages.js @@ -1,18 +1,15 @@ import LanguageDetect from 'languagedetect' const setPostLanguage = (text) => { - console.log(text) const lngDetector = new LanguageDetect() lngDetector.setLanguageType('iso2') const result = lngDetector.detect(text, 2) - console.log(result) return result[0][0] } export default { Mutation: { CreatePost: async (resolve, root, args, context, info) => { - console.log('CreatePost, language', args) args.language = await setPostLanguage(args.content) return resolve(root, args, context, info) }, diff --git a/backend/src/middleware/languages/languages.spec.js b/backend/src/middleware/languages/languages.spec.js index 8eba5eccd..432cb0508 100644 --- a/backend/src/middleware/languages/languages.spec.js +++ b/backend/src/middleware/languages/languages.spec.js @@ -23,15 +23,6 @@ beforeAll(async () => { }, }) mutate = createTestClient(server).mutate - await cleanDatabase() - variables = {} - const user = await Factory.build('user') - authenticatedUser = await user.toJson() - await Factory.build('category', { - id: 'cat9', - name: 'Democracy & Politics', - icon: 'university', - }) }) @@ -54,6 +45,17 @@ describe('languagesMiddleware', () => { categoryIds: ['cat9'], } + beforeAll(async () => { + await cleanDatabase() + const user = await Factory.build('user') + authenticatedUser = await user.toJson() + await Factory.build('category', { + id: 'cat9', + name: 'Democracy & Politics', + icon: 'university', + }) + }) + it('detects German', async () => { variables = { ...variables, @@ -71,4 +73,37 @@ describe('languagesMiddleware', () => { }) }) + it('detects English', async () => { + variables = { + ...variables, + content: 'A journey of a thousand miles begins with a single step.', + } + await expect(mutate({ + mutation: createPostMutation, + variables, + })).resolves.toMatchObject({ + data: { + CreatePost: { + language: 'en', + }, + }, + }) + }) + + it('detects Spanish', async () => { + variables = { + ...variables, + content: 'A caballo regalado, no le mires el diente.', + } + await expect(mutate({ + mutation: createPostMutation, + variables, + })).resolves.toMatchObject({ + data: { + CreatePost: { + language: 'es', + }, + }, + }) + }) }) From 5559a9bc06b8f84900a8cdfe2b2653f31ba4b516 Mon Sep 17 00:00:00 2001 From: Moriz Wahl Date: Tue, 19 Jan 2021 03:18:51 +0100 Subject: [PATCH 3/6] detect languages removes html tags before detection --- backend/src/middleware/languages/languages.js | 10 ++- .../middleware/languages/languages.spec.js | 61 +++++++++++++------ 2 files changed, 51 insertions(+), 20 deletions(-) diff --git a/backend/src/middleware/languages/languages.js b/backend/src/middleware/languages/languages.js index b82470885..7b9678bbc 100644 --- a/backend/src/middleware/languages/languages.js +++ b/backend/src/middleware/languages/languages.js @@ -1,9 +1,17 @@ import LanguageDetect from 'languagedetect' +import sanitizeHtml from 'sanitize-html' + +const removeHtmlTags = (input) => { + return sanitizeHtml(input, { + allowedTags: [], + allowedAttributes: {}, + }) +} const setPostLanguage = (text) => { const lngDetector = new LanguageDetect() lngDetector.setLanguageType('iso2') - const result = lngDetector.detect(text, 2) + const result = lngDetector.detect(removeHtmlTags(text), 2) return result[0][0] } diff --git a/backend/src/middleware/languages/languages.spec.js b/backend/src/middleware/languages/languages.spec.js index 432cb0508..a2f264d40 100644 --- a/backend/src/middleware/languages/languages.spec.js +++ b/backend/src/middleware/languages/languages.spec.js @@ -4,7 +4,6 @@ import { getNeode, getDriver } from '../../db/neo4j' import createServer from '../../server' import { createTestClient } from 'apollo-server-testing' - let mutate let authenticatedUser let variables @@ -25,12 +24,10 @@ beforeAll(async () => { mutate = createTestClient(server).mutate }) - afterAll(async () => { - //await cleanDatabase() + // await cleanDatabase() }) - const createPostMutation = gql` mutation($title: String!, $content: String!, $categoryIds: [ID]) { CreatePost(title: $title, content: $content, categoryIds: $categoryIds) { @@ -55,16 +52,18 @@ describe('languagesMiddleware', () => { icon: 'university', }) }) - + it('detects German', async () => { variables = { ...variables, content: 'Jeder sollte vor seiner eigenen Tür kehren.', } - await expect(mutate({ - mutation: createPostMutation, - variables, - })).resolves.toMatchObject({ + await expect( + mutate({ + mutation: createPostMutation, + variables, + }), + ).resolves.toMatchObject({ data: { CreatePost: { language: 'de', @@ -72,16 +71,18 @@ describe('languagesMiddleware', () => { }, }) }) - + it('detects English', async () => { variables = { ...variables, content: 'A journey of a thousand miles begins with a single step.', } - await expect(mutate({ - mutation: createPostMutation, - variables, - })).resolves.toMatchObject({ + await expect( + mutate({ + mutation: createPostMutation, + variables, + }), + ).resolves.toMatchObject({ data: { CreatePost: { language: 'en', @@ -95,15 +96,37 @@ describe('languagesMiddleware', () => { ...variables, content: 'A caballo regalado, no le mires el diente.', } - await expect(mutate({ - mutation: createPostMutation, - variables, - })).resolves.toMatchObject({ + await expect( + mutate({ + mutation: createPostMutation, + variables, + }), + ).resolves.toMatchObject({ data: { CreatePost: { language: 'es', }, }, }) - }) + }) + + it('detects German in between lots of html tags', async () => { + variables = { + ...variables, + content: + 'Jeder sollte vor seiner eigenen
Tür
kehren.', + } + await expect( + mutate({ + mutation: createPostMutation, + variables, + }), + ).resolves.toMatchObject({ + data: { + CreatePost: { + language: 'de', + }, + }, + }) + }) }) From 250d231a74793ccaf903f4bf013d2f371b3ae57f Mon Sep 17 00:00:00 2001 From: Moriz Wahl Date: Tue, 19 Jan 2021 21:52:10 +0100 Subject: [PATCH 4/6] auto detect language. Input field in contribution form removed. --- backend/src/middleware/languages/languages.js | 19 +++++- .../middleware/languages/languages.spec.js | 61 +++++++++++-------- backend/src/schema/resolvers/posts.spec.js | 13 ---- backend/src/schema/types/type/Post.gql | 3 + .../ContributionForm/ContributionForm.spec.js | 31 +--------- .../ContributionForm/ContributionForm.vue | 23 +------ webapp/graphql/PostMutations.js | 18 +----- 7 files changed, 59 insertions(+), 109 deletions(-) diff --git a/backend/src/middleware/languages/languages.js b/backend/src/middleware/languages/languages.js index 7b9678bbc..0cfe6a0e9 100644 --- a/backend/src/middleware/languages/languages.js +++ b/backend/src/middleware/languages/languages.js @@ -12,17 +12,30 @@ const setPostLanguage = (text) => { const lngDetector = new LanguageDetect() lngDetector.setLanguageType('iso2') const result = lngDetector.detect(removeHtmlTags(text), 2) - return result[0][0] + return { + language: result[0][0], + languageScore: result[0][1], + secondaryLanguage: result[1][0], + secondaryLanguageScore: result[1][1], + } } export default { Mutation: { CreatePost: async (resolve, root, args, context, info) => { - args.language = await setPostLanguage(args.content) + const languages = await setPostLanguage(args.content) + args = { + ...args, + ...languages, + } return resolve(root, args, context, info) }, UpdatePost: async (resolve, root, args, context, info) => { - args.language = await setPostLanguage(args.content) + const languages = await setPostLanguage(args.content) + args = { + ...args, + ...languages, + } return resolve(root, args, context, info) }, }, diff --git a/backend/src/middleware/languages/languages.spec.js b/backend/src/middleware/languages/languages.spec.js index a2f264d40..66f8c5f2b 100644 --- a/backend/src/middleware/languages/languages.spec.js +++ b/backend/src/middleware/languages/languages.spec.js @@ -25,13 +25,16 @@ beforeAll(async () => { }) afterAll(async () => { - // await cleanDatabase() + await cleanDatabase() }) const createPostMutation = gql` mutation($title: String!, $content: String!, $categoryIds: [ID]) { CreatePost(title: $title, content: $content, categoryIds: $categoryIds) { language + languageScore + secondaryLanguage + secondaryLanguageScore } } ` @@ -58,15 +61,17 @@ describe('languagesMiddleware', () => { ...variables, content: 'Jeder sollte vor seiner eigenen Tür kehren.', } - await expect( - mutate({ - mutation: createPostMutation, - variables, - }), - ).resolves.toMatchObject({ + const response = await mutate({ + mutation: createPostMutation, + variables, + }) + expect(response).toMatchObject({ data: { CreatePost: { language: 'de', + languageScore: 0.5134188034188034, + secondaryLanguage: 'no', + secondaryLanguageScore: 0.3655555555555555, }, }, }) @@ -77,15 +82,17 @@ describe('languagesMiddleware', () => { ...variables, content: 'A journey of a thousand miles begins with a single step.', } - await expect( - mutate({ - mutation: createPostMutation, - variables, - }), - ).resolves.toMatchObject({ + const response = await mutate({ + mutation: createPostMutation, + variables, + }) + expect(response).toMatchObject({ data: { CreatePost: { language: 'en', + languageScore: 0.3430188679245283, + secondaryLanguage: 'da', + secondaryLanguageScore: 0.19968553459119498, }, }, }) @@ -96,15 +103,17 @@ describe('languagesMiddleware', () => { ...variables, content: 'A caballo regalado, no le mires el diente.', } - await expect( - mutate({ - mutation: createPostMutation, - variables, - }), - ).resolves.toMatchObject({ + const response = await mutate({ + mutation: createPostMutation, + variables, + }) + expect(response).toMatchObject({ data: { CreatePost: { language: 'es', + languageScore: 0.46589743589743593, + secondaryLanguage: 'pt', + secondaryLanguageScore: 0.3834188034188034, }, }, }) @@ -116,15 +125,17 @@ describe('languagesMiddleware', () => { content: 'Jeder sollte vor seiner eigenen
Tür
kehren.', } - await expect( - mutate({ - mutation: createPostMutation, - variables, - }), - ).resolves.toMatchObject({ + const response = await mutate({ + mutation: createPostMutation, + variables, + }) + expect(response).toMatchObject({ data: { CreatePost: { language: 'de', + languageScore: 0.5134188034188034, + secondaryLanguage: 'no', + secondaryLanguageScore: 0.3655555555555555, }, }, }) diff --git a/backend/src/schema/resolvers/posts.spec.js b/backend/src/schema/resolvers/posts.spec.js index b24383fba..f0c57b8fb 100644 --- a/backend/src/schema/resolvers/posts.spec.js +++ b/backend/src/schema/resolvers/posts.spec.js @@ -317,19 +317,6 @@ describe('CreatePost', () => { expected, ) }) - - describe('language', () => { - beforeEach(() => { - variables = { ...variables, language: 'es' } - }) - - it('allows a user to set the language of the post', async () => { - const expected = { data: { CreatePost: { language: 'es' } } } - await expect(mutate({ mutation: createPostMutation, variables })).resolves.toMatchObject( - expected, - ) - }) - }) }) }) diff --git a/backend/src/schema/types/type/Post.gql b/backend/src/schema/types/type/Post.gql index 37f9dd176..2d8e39719 100644 --- a/backend/src/schema/types/type/Post.gql +++ b/backend/src/schema/types/type/Post.gql @@ -122,6 +122,9 @@ type Post { createdAt: String updatedAt: String language: String + languageScore: Float + secondaryLanguage: String + secondaryLanguageScore: Float pinnedAt: String @cypher( statement: "MATCH (this)<-[pinned:PINNED]-(:User) WHERE NOT this.deleted = true AND NOT this.disabled = true RETURN pinned.createdAt" ) diff --git a/webapp/components/ContributionForm/ContributionForm.spec.js b/webapp/components/ContributionForm/ContributionForm.spec.js index 0644c1321..a18dacb28 100644 --- a/webapp/components/ContributionForm/ContributionForm.spec.js +++ b/webapp/components/ContributionForm/ContributionForm.spec.js @@ -23,9 +23,7 @@ describe('ContributionForm.vue', () => { cancelBtn, mocks, propsData, - categoryIds, - englishLanguage, - deutschLanguage + categoryIds const postTitle = 'this is a title for a post' const postTitleTooShort = 'xx' let postTitleTooLong = '' @@ -52,7 +50,6 @@ describe('ContributionForm.vue', () => { slug: 'this-is-a-title-for-a-post', content: postContent, contentExcerpt: postContent, - language: 'en', categoryIds, }, }, @@ -109,10 +106,6 @@ describe('ContributionForm.vue', () => { postTitleInput = wrapper.find('.ds-input') postTitleInput.setValue(postTitle) await wrapper.vm.updateEditorContent(postContent) - englishLanguage = wrapper - .findAll('li') - .filter((language) => language.text() === 'English') - englishLanguage.trigger('click') }) it('title cannot be empty', async () => { @@ -147,7 +140,6 @@ describe('ContributionForm.vue', () => { variables: { title: postTitle, content: postContent, - language: 'en', id: null, image: null, }, @@ -155,10 +147,6 @@ describe('ContributionForm.vue', () => { postTitleInput = wrapper.find('.ds-input') postTitleInput.setValue(postTitle) await wrapper.vm.updateEditorContent(postContent) - englishLanguage = wrapper - .findAll('li') - .filter((language) => language.text() === 'English') - englishLanguage.trigger('click') await Vue.nextTick() await Vue.nextTick() }) @@ -168,16 +156,6 @@ describe('ContributionForm.vue', () => { expect(mocks.$apollo.mutate).toHaveBeenCalledWith(expect.objectContaining(expectedParams)) }) - it('supports changing the language', async () => { - expectedParams.variables.language = 'de' - deutschLanguage = wrapper - .findAll('li') - .filter((language) => language.text() === 'Deutsch') - deutschLanguage.trigger('click') - wrapper.find('form').trigger('submit') - expect(mocks.$apollo.mutate).toHaveBeenCalledWith(expect.objectContaining(expectedParams)) - }) - it('supports adding a teaser image', async () => { expectedParams.variables.image = { aspectRatio: null, @@ -236,10 +214,6 @@ describe('ContributionForm.vue', () => { postTitleInput.setValue(postTitle) await wrapper.vm.updateEditorContent(postContent) categoryIds = ['cat12'] - englishLanguage = wrapper - .findAll('li') - .filter((language) => language.text() === 'English') - englishLanguage.trigger('click') await Vue.nextTick() await Vue.nextTick() }) @@ -260,7 +234,6 @@ describe('ContributionForm.vue', () => { slug: 'dies-ist-ein-post', title: 'dies ist ein Post', content: 'auf Deutsch geschrieben', - language: 'de', image, categories: [ { @@ -290,7 +263,6 @@ describe('ContributionForm.vue', () => { slug: 'this-is-a-title-for-a-post', content: postContent, contentExcerpt: postContent, - language: 'en', categoryIds, }, }, @@ -301,7 +273,6 @@ describe('ContributionForm.vue', () => { variables: { title: propsData.contribution.title, content: propsData.contribution.content, - language: propsData.contribution.language, id: propsData.contribution.id, image: { sensitive: false, diff --git a/webapp/components/ContributionForm/ContributionForm.vue b/webapp/components/ContributionForm/ContributionForm.vue index 8db89173f..42ed2799e 100644 --- a/webapp/components/ContributionForm/ContributionForm.vue +++ b/webapp/components/ContributionForm/ContributionForm.vue @@ -50,17 +50,6 @@ {{ contentLength }} - - - -
{{ $t('actions.cancel') }} @@ -76,10 +65,8 @@