From f8f0ff3d06a51a9b1fba54d169894f734054c2d3 Mon Sep 17 00:00:00 2001 From: Grzegorz Leoniec Date: Fri, 28 Dec 2018 16:35:36 +0100 Subject: [PATCH] added sources to results from which data was aggregated --- src/schema.graphql | 1 + src/scraper.js | 33 ++++++++++++++++++++++----------- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/src/schema.graphql b/src/schema.graphql index 229745bb6..e7108d3ef 100644 --- a/src/schema.graphql +++ b/src/schema.graphql @@ -12,6 +12,7 @@ type Embed { lang: String logo: String embed: String + sources: [String] } type Query { diff --git a/src/scraper.js b/src/scraper.js index 8dc80effa..9399cad6c 100644 --- a/src/scraper.js +++ b/src/scraper.js @@ -26,6 +26,8 @@ const request = require('request-promise-native') const find = require('lodash/find') const isEmpty = require('lodash/isEmpty') const each = require('lodash/each') +const isArray = require('lodash/isArray') +const mergeWith = require('lodash/mergeWith') const urlParser = require('url') // quick in memory cache @@ -52,12 +54,15 @@ const removeEmptyAttrs = obj => { const scraper = { async fetch(targetUrl) { - if (targetUrl.indexOf('//youtu.be/')) { // replace youtu.be to get proper results targetUrl = targetUrl.replace('//youtu.be/', '//youtube.com/') } + if (cache[targetUrl]) { + return cache[targetUrl] + } + const url = parseUrl.parse(targetUrl, true) let meta = {} @@ -89,10 +94,15 @@ const scraper = { }) ]) - const output = { - ...(removeEmptyAttrs(meta)), - ...(removeEmptyAttrs(embed)) - } + const output = mergeWith( + meta, + embed, + (objValue, srcValue) => { + if (isArray(objValue)) { + return objValue.concat(srcValue); + } + } + ) if (isEmpty(output)) { throw new ApolloError('Not found', 404) @@ -105,6 +115,9 @@ const scraper = { output.url += `&start=${YouTubeStartParam}` } + // write to cache + cache[targetUrl] = output + return output }, async fetchEmbed(targetUrl) { @@ -133,6 +146,8 @@ const scraper = { date: data.upload_date ? new Date(data.upload_date).toISOString() : null } + output.sources = ['oembed'] + return output } return {} @@ -142,16 +157,12 @@ const scraper = { // const parsedURL = urlParser.parse(targetUrl) // console.log(parsedURL) - // get from cache - if (cache[targetUrl]) { - return cache[targetUrl] - } + // get from cach const { body: html, url } = await got(targetUrl) const metadata = await metascraper({ html, url }) - // write to cache - cache[targetUrl] = metadata + metadata.sources = ['resource'] return metadata }