Refactor scraper.js with ES6 imports

This commit is contained in:
Robert Schäfer 2019-07-19 21:29:07 +02:00
parent 2e6ea62144
commit f82b2c37b3
2 changed files with 50 additions and 53 deletions

View File

@ -1,9 +1,9 @@
import scraper from './embeds/scraper.js' import scrape from './embeds/scraper.js'
export default { export default {
Query: { Query: {
embed: async (object, {url} , context, resolveInfo) => { embed: async (object, {url} , context, resolveInfo) => {
return await scraper.fetch(url) return await scrape(url)
} }
} }
} }

View File

@ -1,5 +1,5 @@
import Metascraper from 'metascraper' import Metascraper from 'metascraper'
import * as nodeFetch from 'node-fetch' import fetch from 'node-fetch'
import { ApolloError } from 'apollo-server' import { ApolloError } from 'apollo-server'
import parseUrl from 'url' import parseUrl from 'url'
@ -56,8 +56,51 @@ const removeEmptyAttrs = obj => {
return output return output
} }
const scraper = { const fetchEmbed = async (targetUrl) => {
async fetch(targetUrl) { const url = urlParser.parse(targetUrl)
const embedMeta = find(oEmbedProviders, provider => {
return provider.provider_url.indexOf(url.hostname) >= 0
})
if (!embedMeta) {
return {}
}
const embedUrl = embedMeta.endpoints[0].url.replace('{format}', 'json')
let data
try {
data = await request(`${embedUrl}?url=${targetUrl}`)
data = JSON.parse(data)
} catch (err) {
data = await request(`${embedUrl}?url=${targetUrl}&format=json`)
data = JSON.parse(data)
}
if (data) {
let output = {
type: data.type || 'link',
embed: data.html,
author: data.author_name,
date: data.upload_date ? new Date(data.upload_date).toISOString() : null
}
output.sources = ['oembed']
return output
}
return {}
}
const fetchMeta = async (targetUrl) => {
const response = await fetch(targetUrl)
const html = await response.text()
const metadata = await metascraper({ html, url: targetUrl })
metadata.sources = ['resource']
metadata.type = 'link'
return metadata
}
export default async function scrape(targetUrl) {
if (targetUrl.indexOf('//youtu.be/')) { if (targetUrl.indexOf('//youtu.be/')) {
// replace youtu.be to get proper results // replace youtu.be to get proper results
targetUrl = targetUrl.replace('//youtu.be/', '//youtube.com/') targetUrl = targetUrl.replace('//youtu.be/', '//youtube.com/')
@ -76,7 +119,7 @@ const scraper = {
await Promise.all([ await Promise.all([
new Promise(async (resolve, reject) => { new Promise(async (resolve, reject) => {
try { try {
meta = await scraper.fetchMeta(targetUrl) meta = await fetchMeta(targetUrl)
resolve() resolve()
} catch(err) { } catch(err) {
if (process.env.DEBUG) { if (process.env.DEBUG) {
@ -87,7 +130,7 @@ const scraper = {
}), }),
new Promise(async (resolve, reject) => { new Promise(async (resolve, reject) => {
try { try {
embed = await scraper.fetchEmbed(targetUrl) embed = await fetchEmbed(targetUrl)
resolve() resolve()
} catch(err) { } catch(err) {
if (process.env.DEBUG) { if (process.env.DEBUG) {
@ -123,50 +166,4 @@ const scraper = {
cache[targetUrl] = output cache[targetUrl] = output
return output return output
},
async fetchEmbed(targetUrl) {
const url = urlParser.parse(targetUrl)
const embedMeta = find(oEmbedProviders, provider => {
return provider.provider_url.indexOf(url.hostname) >= 0
})
if (!embedMeta) {
return {}
}
const embedUrl = embedMeta.endpoints[0].url.replace('{format}', 'json')
let data
try {
data = await request(`${embedUrl}?url=${targetUrl}`)
data = JSON.parse(data)
} catch (err) {
data = await request(`${embedUrl}?url=${targetUrl}&format=json`)
data = JSON.parse(data)
}
if (data) {
let output = {
type: data.type || 'link',
embed: data.html,
author: data.author_name,
date: data.upload_date ? new Date(data.upload_date).toISOString() : null
}
output.sources = ['oembed']
return output
}
return {}
},
async fetchMeta(targetUrl) {
const response = await nodeFetch(targetUrl)
const html = await response.text()
const metadata = await metascraper({ html, url: targetUrl })
metadata.sources = ['resource']
metadata.type = 'link'
return metadata
}
} }
module.exports = scraper