This commit is contained in:
Robert Schäfer 2019-07-19 22:25:17 +02:00
parent 17ccbcefa6
commit 73e879eb68
2 changed files with 66 additions and 81 deletions

View File

@ -2,8 +2,8 @@ import scrape from './embeds/scraper.js'
export default {
Query: {
embed: async (object, {url} , context, resolveInfo) => {
return await scrape(url)
}
}
embed: async (object, { url }, context, resolveInfo) => {
return scrape(url)
},
},
}

View File

@ -2,17 +2,15 @@ import Metascraper from 'metascraper'
import fetch from 'node-fetch'
import { ApolloError } from 'apollo-server'
import parseUrl from 'url'
import request from 'request-promise-native'
import find from 'lodash/find'
import isEmpty from 'lodash/isEmpty'
import each from 'lodash/each'
import isArray from 'lodash/isArray'
import mergeWith from 'lodash/mergeWith'
import urlParser from 'url'
const error = require('debug')('embed:error')
const metascraper = Metascraper ([
const metascraper = Metascraper([
require('metascraper-author')(),
require('metascraper-date')(),
require('metascraper-description')(),
@ -28,12 +26,11 @@ const metascraper = Metascraper ([
require('metascraper-audio')(),
require('metascraper-soundcloud')(),
require('metascraper-video')(),
require('metascraper-youtube')()
require('metascraper-youtube')(),
// require('./rules/metascraper-embed')()
])
// quick in memory cache
let cache = {}
@ -46,18 +43,8 @@ const getEmbedProviders = async () => {
}
getEmbedProviders()
const removeEmptyAttrs = obj => {
let output = {}
each(obj, (o, k) => {
if (!isEmpty(o)) {
output[k] = o
}
})
return output
}
const fetchEmbed = async (targetUrl) => {
const url = urlParser.parse(targetUrl)
const fetchEmbed = async targetUrl => {
const url = new URL(targetUrl)
const embedMeta = find(oEmbedProviders, provider => {
return provider.provider_url.indexOf(url.hostname) >= 0
})
@ -79,7 +66,7 @@ const fetchEmbed = async (targetUrl) => {
type: data.type || 'link',
embed: data.html,
author: data.author_name,
date: data.upload_date ? new Date(data.upload_date).toISOString() : null
date: data.upload_date ? new Date(data.upload_date).toISOString() : null,
}
output.sources = ['oembed']
@ -88,8 +75,7 @@ const fetchEmbed = async (targetUrl) => {
}
return {}
}
const fetchMeta = async (targetUrl) => {
const fetchMeta = async targetUrl => {
const response = await fetch(targetUrl)
const html = await response.text()
const metadata = await metascraper({ html, url: targetUrl })
@ -101,69 +87,68 @@ const fetchMeta = async (targetUrl) => {
}
export default async function scrape(targetUrl) {
if (targetUrl.indexOf('//youtu.be/')) {
// replace youtu.be to get proper results
targetUrl = targetUrl.replace('//youtu.be/', '//youtube.com/')
}
if (targetUrl.hostname === 'youtu.be') {
// replace youtu.be to get proper results
targetUrl.hostname = 'youtube.com'
}
if (cache[targetUrl]) {
return cache[targetUrl]
}
if (cache[targetUrl]) {
return cache[targetUrl]
}
const url = parseUrl.parse(targetUrl, true)
const url = new URL(targetUrl)
let meta = {}
let embed = {}
let meta = {}
let embed = {}
// only get data from requested services
await Promise.all([
new Promise(async (resolve, reject) => {
try {
meta = await fetchMeta(targetUrl)
resolve()
} catch(err) {
if (process.env.DEBUG) {
console.error(`ERROR at fetchMeta | ${err.message}`)
}
resolve()
}
}),
new Promise(async (resolve, reject) => {
try {
embed = await fetchEmbed(targetUrl)
resolve()
} catch(err) {
if (process.env.DEBUG) {
console.error(`ERROR at fetchEmbed | ${err.message}`)
}
resolve()
}
})
])
const output = mergeWith(
meta,
embed,
(objValue, srcValue) => {
if (isArray(objValue)) {
return objValue.concat(srcValue);
// only get data from requested services
await Promise.all([
new Promise(async (resolve, reject) => {
try {
meta = await fetchMeta(targetUrl)
resolve()
} catch (err) {
if (process.env.DEBUG) {
error(`ERROR at fetchMeta | ${err.message}`)
}
resolve()
}
}),
new Promise(async (resolve, reject) => {
try {
embed = await fetchEmbed(targetUrl)
resolve()
} catch (err) {
if (process.env.DEBUG) {
error(`ERROR at fetchEmbed | ${err.message}`)
}
resolve()
}
}),
])
const output = mergeWith(meta, embed, (objValue, srcValue) => {
if (isArray(objValue)) {
return objValue.concat(srcValue)
}
})
if (isEmpty(output)) {
throw new ApolloError('Not found', 'NOT_FOUND')
}
// fix youtube start parameter
const YouTubeStartParam = url.searchParams.t || url.searchParams.start
if (output.publisher === 'YouTube' && YouTubeStartParam) {
output.embed = output.embed.replace(
'?feature=oembed',
`?feature=oembed&start=${YouTubeStartParam}`,
)
output.url += `&start=${YouTubeStartParam}`
}
if (isEmpty(output)) {
throw new ApolloError('Not found', 'NOT_FOUND')
}
// write to cache
cache[targetUrl] = output
// fix youtube start parameter
const YouTubeStartParam = url.query.t || url.query.start
if (output.publisher === 'YouTube' && YouTubeStartParam) {
output.embed = output.embed.replace('?feature=oembed', `?feature=oembed&start=${YouTubeStartParam}`)
output.url += `&start=${YouTubeStartParam}`
}
// write to cache
cache[targetUrl] = output
return output
return output
}