mirror of
https://github.com/Ocelot-Social-Community/Ocelot-Social.git
synced 2025-12-13 07:46:06 +00:00
Refactor scraper.js with ES6 imports
This commit is contained in:
parent
2e6ea62144
commit
f82b2c37b3
@ -1,9 +1,9 @@
|
|||||||
import scraper from './embeds/scraper.js'
|
import scrape from './embeds/scraper.js'
|
||||||
|
|
||||||
export default {
|
export default {
|
||||||
Query: {
|
Query: {
|
||||||
embed: async (object, {url} , context, resolveInfo) => {
|
embed: async (object, {url} , context, resolveInfo) => {
|
||||||
return await scraper.fetch(url)
|
return await scrape(url)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
import Metascraper from 'metascraper'
|
import Metascraper from 'metascraper'
|
||||||
import * as nodeFetch from 'node-fetch'
|
import fetch from 'node-fetch'
|
||||||
|
|
||||||
import { ApolloError } from 'apollo-server'
|
import { ApolloError } from 'apollo-server'
|
||||||
import parseUrl from 'url'
|
import parseUrl from 'url'
|
||||||
@ -56,8 +56,51 @@ const removeEmptyAttrs = obj => {
|
|||||||
return output
|
return output
|
||||||
}
|
}
|
||||||
|
|
||||||
const scraper = {
|
const fetchEmbed = async (targetUrl) => {
|
||||||
async fetch(targetUrl) {
|
const url = urlParser.parse(targetUrl)
|
||||||
|
const embedMeta = find(oEmbedProviders, provider => {
|
||||||
|
return provider.provider_url.indexOf(url.hostname) >= 0
|
||||||
|
})
|
||||||
|
if (!embedMeta) {
|
||||||
|
return {}
|
||||||
|
}
|
||||||
|
const embedUrl = embedMeta.endpoints[0].url.replace('{format}', 'json')
|
||||||
|
|
||||||
|
let data
|
||||||
|
try {
|
||||||
|
data = await request(`${embedUrl}?url=${targetUrl}`)
|
||||||
|
data = JSON.parse(data)
|
||||||
|
} catch (err) {
|
||||||
|
data = await request(`${embedUrl}?url=${targetUrl}&format=json`)
|
||||||
|
data = JSON.parse(data)
|
||||||
|
}
|
||||||
|
if (data) {
|
||||||
|
let output = {
|
||||||
|
type: data.type || 'link',
|
||||||
|
embed: data.html,
|
||||||
|
author: data.author_name,
|
||||||
|
date: data.upload_date ? new Date(data.upload_date).toISOString() : null
|
||||||
|
}
|
||||||
|
|
||||||
|
output.sources = ['oembed']
|
||||||
|
|
||||||
|
return output
|
||||||
|
}
|
||||||
|
return {}
|
||||||
|
}
|
||||||
|
const fetchMeta = async (targetUrl) => {
|
||||||
|
|
||||||
|
const response = await fetch(targetUrl)
|
||||||
|
const html = await response.text()
|
||||||
|
const metadata = await metascraper({ html, url: targetUrl })
|
||||||
|
|
||||||
|
metadata.sources = ['resource']
|
||||||
|
metadata.type = 'link'
|
||||||
|
|
||||||
|
return metadata
|
||||||
|
}
|
||||||
|
|
||||||
|
export default async function scrape(targetUrl) {
|
||||||
if (targetUrl.indexOf('//youtu.be/')) {
|
if (targetUrl.indexOf('//youtu.be/')) {
|
||||||
// replace youtu.be to get proper results
|
// replace youtu.be to get proper results
|
||||||
targetUrl = targetUrl.replace('//youtu.be/', '//youtube.com/')
|
targetUrl = targetUrl.replace('//youtu.be/', '//youtube.com/')
|
||||||
@ -76,7 +119,7 @@ const scraper = {
|
|||||||
await Promise.all([
|
await Promise.all([
|
||||||
new Promise(async (resolve, reject) => {
|
new Promise(async (resolve, reject) => {
|
||||||
try {
|
try {
|
||||||
meta = await scraper.fetchMeta(targetUrl)
|
meta = await fetchMeta(targetUrl)
|
||||||
resolve()
|
resolve()
|
||||||
} catch(err) {
|
} catch(err) {
|
||||||
if (process.env.DEBUG) {
|
if (process.env.DEBUG) {
|
||||||
@ -87,7 +130,7 @@ const scraper = {
|
|||||||
}),
|
}),
|
||||||
new Promise(async (resolve, reject) => {
|
new Promise(async (resolve, reject) => {
|
||||||
try {
|
try {
|
||||||
embed = await scraper.fetchEmbed(targetUrl)
|
embed = await fetchEmbed(targetUrl)
|
||||||
resolve()
|
resolve()
|
||||||
} catch(err) {
|
} catch(err) {
|
||||||
if (process.env.DEBUG) {
|
if (process.env.DEBUG) {
|
||||||
@ -123,50 +166,4 @@ const scraper = {
|
|||||||
cache[targetUrl] = output
|
cache[targetUrl] = output
|
||||||
|
|
||||||
return output
|
return output
|
||||||
},
|
|
||||||
async fetchEmbed(targetUrl) {
|
|
||||||
const url = urlParser.parse(targetUrl)
|
|
||||||
const embedMeta = find(oEmbedProviders, provider => {
|
|
||||||
return provider.provider_url.indexOf(url.hostname) >= 0
|
|
||||||
})
|
|
||||||
if (!embedMeta) {
|
|
||||||
return {}
|
|
||||||
}
|
|
||||||
const embedUrl = embedMeta.endpoints[0].url.replace('{format}', 'json')
|
|
||||||
|
|
||||||
let data
|
|
||||||
try {
|
|
||||||
data = await request(`${embedUrl}?url=${targetUrl}`)
|
|
||||||
data = JSON.parse(data)
|
|
||||||
} catch (err) {
|
|
||||||
data = await request(`${embedUrl}?url=${targetUrl}&format=json`)
|
|
||||||
data = JSON.parse(data)
|
|
||||||
}
|
|
||||||
if (data) {
|
|
||||||
let output = {
|
|
||||||
type: data.type || 'link',
|
|
||||||
embed: data.html,
|
|
||||||
author: data.author_name,
|
|
||||||
date: data.upload_date ? new Date(data.upload_date).toISOString() : null
|
|
||||||
}
|
|
||||||
|
|
||||||
output.sources = ['oembed']
|
|
||||||
|
|
||||||
return output
|
|
||||||
}
|
|
||||||
return {}
|
|
||||||
},
|
|
||||||
async fetchMeta(targetUrl) {
|
|
||||||
|
|
||||||
const response = await nodeFetch(targetUrl)
|
|
||||||
const html = await response.text()
|
|
||||||
const metadata = await metascraper({ html, url: targetUrl })
|
|
||||||
|
|
||||||
metadata.sources = ['resource']
|
|
||||||
metadata.type = 'link'
|
|
||||||
|
|
||||||
return metadata
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = scraper
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user