diff --git a/backend/src/schema/resolvers/embeds.js b/backend/src/schema/resolvers/embeds.js index f1c9e53e6..6ac3284a3 100644 --- a/backend/src/schema/resolvers/embeds.js +++ b/backend/src/schema/resolvers/embeds.js @@ -1,9 +1,9 @@ -import scraper from './embeds/scraper.js' +import scrape from './embeds/scraper.js' export default { Query: { embed: async (object, {url} , context, resolveInfo) => { - return await scraper.fetch(url) + return await scrape(url) } } } diff --git a/backend/src/schema/resolvers/embeds/scraper.js b/backend/src/schema/resolvers/embeds/scraper.js index 8a86aa7b8..42a739bae 100644 --- a/backend/src/schema/resolvers/embeds/scraper.js +++ b/backend/src/schema/resolvers/embeds/scraper.js @@ -1,5 +1,5 @@ import Metascraper from 'metascraper' -import * as nodeFetch from 'node-fetch' +import fetch from 'node-fetch' import { ApolloError } from 'apollo-server' import parseUrl from 'url' @@ -56,8 +56,51 @@ const removeEmptyAttrs = obj => { return output } -const scraper = { - async fetch(targetUrl) { +const fetchEmbed = async (targetUrl) => { + const url = urlParser.parse(targetUrl) + const embedMeta = find(oEmbedProviders, provider => { + return provider.provider_url.indexOf(url.hostname) >= 0 + }) + if (!embedMeta) { + return {} + } + const embedUrl = embedMeta.endpoints[0].url.replace('{format}', 'json') + + let data + try { + data = await request(`${embedUrl}?url=${targetUrl}`) + data = JSON.parse(data) + } catch (err) { + data = await request(`${embedUrl}?url=${targetUrl}&format=json`) + data = JSON.parse(data) + } + if (data) { + let output = { + type: data.type || 'link', + embed: data.html, + author: data.author_name, + date: data.upload_date ? new Date(data.upload_date).toISOString() : null + } + + output.sources = ['oembed'] + + return output + } + return {} +} +const fetchMeta = async (targetUrl) => { + + const response = await fetch(targetUrl) + const html = await response.text() + const metadata = await metascraper({ html, url: targetUrl }) + + metadata.sources = ['resource'] + metadata.type = 'link' + + return metadata +} + +export default async function scrape(targetUrl) { if (targetUrl.indexOf('//youtu.be/')) { // replace youtu.be to get proper results targetUrl = targetUrl.replace('//youtu.be/', '//youtube.com/') @@ -76,7 +119,7 @@ const scraper = { await Promise.all([ new Promise(async (resolve, reject) => { try { - meta = await scraper.fetchMeta(targetUrl) + meta = await fetchMeta(targetUrl) resolve() } catch(err) { if (process.env.DEBUG) { @@ -87,7 +130,7 @@ const scraper = { }), new Promise(async (resolve, reject) => { try { - embed = await scraper.fetchEmbed(targetUrl) + embed = await fetchEmbed(targetUrl) resolve() } catch(err) { if (process.env.DEBUG) { @@ -123,50 +166,4 @@ const scraper = { cache[targetUrl] = output return output - }, - async fetchEmbed(targetUrl) { - const url = urlParser.parse(targetUrl) - const embedMeta = find(oEmbedProviders, provider => { - return provider.provider_url.indexOf(url.hostname) >= 0 - }) - if (!embedMeta) { - return {} - } - const embedUrl = embedMeta.endpoints[0].url.replace('{format}', 'json') - - let data - try { - data = await request(`${embedUrl}?url=${targetUrl}`) - data = JSON.parse(data) - } catch (err) { - data = await request(`${embedUrl}?url=${targetUrl}&format=json`) - data = JSON.parse(data) - } - if (data) { - let output = { - type: data.type || 'link', - embed: data.html, - author: data.author_name, - date: data.upload_date ? new Date(data.upload_date).toISOString() : null - } - - output.sources = ['oembed'] - - return output - } - return {} - }, - async fetchMeta(targetUrl) { - - const response = await nodeFetch(targetUrl) - const html = await response.text() - const metadata = await metascraper({ html, url: targetUrl }) - - metadata.sources = ['resource'] - metadata.type = 'link' - - return metadata - } } - -module.exports = scraper