Inital minimal working version with graqhql, metascraper and oembed

This commit is contained in:
Grzegorz Leoniec 2018-12-27 16:06:40 +01:00
commit cc06d53a57
No known key found for this signature in database
GPG Key ID: 3AA43686D4EB1377
11 changed files with 4139 additions and 0 deletions

12
.babelrc Normal file
View File

@ -0,0 +1,12 @@
{
"presets": [
[
"@babel/preset-env",
{
"targets": {
"node": "10"
}
}
]
]
}

18
.eslintrc.js Normal file
View File

@ -0,0 +1,18 @@
module.exports = {
"extends": "standard",
"parser": "babel-eslint",
"env": {
"es6": true,
"node": true,
},
"rules": {
"indent": [
"error",
2
],
"quotes": [
"error",
"single"
]
}
};

61
.gitignore vendored Normal file
View File

@ -0,0 +1,61 @@
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
# Runtime data
pids
*.pid
*.seed
*.pid.lock
# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov
# Coverage directory used by tools like istanbul
coverage
# nyc test coverage
.nyc_output
# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
.grunt
# Bower dependency directory (https://bower.io/)
bower_components
# node-waf configuration
.lock-wscript
# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release
# Dependency directories
node_modules/
jspm_packages/
# TypeScript v1 declaration files
typings/
# Optional npm cache directory
.npm
# Optional eslint cache
.eslintcache
# Optional REPL history
.node_repl_history
# Output of 'npm pack'
*.tgz
# Yarn Integrity file
.yarn-integrity
# dotenv environment variables file
.env
# next.js build output
.next

3
.graphqlconfig Normal file
View File

@ -0,0 +1,3 @@
{
"schemaPath": "./src/schema.graphql"
}

21
LICENSE Executable file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2018 Human-Connection
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

53
package.json Normal file
View File

@ -0,0 +1,53 @@
{
"name": "nitro-embed",
"version": "0.0.1",
"description": "API Service for fetching URL Information like images, icons, descriptions etc. thourgh OpenGraph, oEmbed and other standards.",
"main": "src/index.js",
"scripts": {
"dev": "nodemon src/",
"start": "node src/",
"test": "test"
},
"repository": {
"type": "git",
"url": "git+https://github.com/Human-Connection/Nitro-Embed.git"
},
"keywords": [
"pengraph",
"oembed"
],
"author": "Grzegorz Leoniec <greg@app-interactive.de> (http://app-interactive.de)",
"license": "MIT",
"bugs": {
"url": "https://github.com/Human-Connection/Nitro-Embed/issues"
},
"homepage": "https://github.com/Human-Connection/Nitro-Embed#readme",
"dependencies": {
"@metascraper/helpers": "^4.8.5",
"apollo-server": "^2.3.1",
"got": "^9.5.0",
"graphql": "^14.0.2",
"lodash": "^4.17.11",
"metascraper": "^4.8.5",
"metascraper-audio": "^4.8.5",
"metascraper-author": "^4.8.5",
"metascraper-clearbit-logo": "^4.8.5",
"metascraper-date": "^4.8.5",
"metascraper-description": "^4.8.5",
"metascraper-image": "^4.8.5",
"metascraper-lang": "^4.8.5",
"metascraper-lang-detector": "^4.8.5",
"metascraper-logo": "^4.8.5",
"metascraper-logo-favicon": "^4.8.5",
"metascraper-publisher": "^4.8.5",
"metascraper-title": "^4.8.5",
"metascraper-url": "^4.8.5",
"metascraper-video": "^4.8.5",
"metascraper-youtube": "^4.8.5",
"request": "^2.88.0",
"request-promise-native": "^1.0.5"
},
"devDependencies": {
"nodemon": "^1.18.9"
}
}

8
src/graphql-schema.js Normal file
View File

@ -0,0 +1,8 @@
const fs = require('fs')
const path = require('path')
const { gql } = require('apollo-server')
module.exports = gql(fs
.readFileSync(path.join(__dirname, 'schema.graphql'))
.toString('utf-8'))

17
src/index.js Normal file
View File

@ -0,0 +1,17 @@
const { ApolloServer, gql } = require('apollo-server')
const scraper = require('./scraper.js')
const typeDefs = require('./graphql-schema.js')
const resolvers = {
Query: {
async embed(obj, {url}, ctx, info) {
return await scraper.fetch(url)
}
}
}
const server = new ApolloServer({ typeDefs, resolvers })
server.listen({ port: 3050 }).then(({ url }) => {
console.log(`🚀 Nitro Embed - Server is ready at ${url}`)
})

19
src/schema.graphql Normal file
View File

@ -0,0 +1,19 @@
type Embed {
type: String
title: String
author: String
publisher: String
date: String
description: String
url: String
image: String
audio: String
video: String
lang: String
logo: String
embed: String
}
type Query {
embed(url: String!): Embed
}

128
src/scraper.js Normal file
View File

@ -0,0 +1,128 @@
const metascraper = require('metascraper')([
require('metascraper-author')(),
require('metascraper-date')(),
require('metascraper-description')(),
require('metascraper-image')(),
require('metascraper-lang')(),
require('metascraper-lang-detector')(),
require('metascraper-logo')(),
require('metascraper-logo-favicon')(),
// require('metascraper-clearbit-logo')(),
require('metascraper-publisher')(),
require('metascraper-title')(),
require('metascraper-url')(),
require('metascraper-video')(),
require('metascraper-youtube')()
// require('./rules/metascraper-embed')()
])
const got = require('got')
const request = require('request-promise-native')
const find = require('lodash/find')
const isEmpty = require('lodash/isEmpty')
const each = require('lodash/each')
const urlParser = require('url')
// quick in memory cache
let cache = {}
let oEmbedProviders = []
const getEmbedProviders = async () => {
let providers = await request('https://oembed.com/providers.json')
providers = JSON.parse(providers)
oEmbedProviders = providers
return providers
}
getEmbedProviders()
const removeEmptyAttrs = obj => {
let output = {}
each(obj, (o, k) => {
if (!isEmpty(o)) {
output[k] = o
}
})
return output
}
const scraper = {
async fetch(targetUrl) {
if (targetUrl.indexOf('//youtu.be/')) {
// replace youtu.be to get proper results
targetUrl = targetUrl.replace('//youtu.be/', '//youtube.com/')
}
let meta = {}
let embed = {}
// only get data from requested services
await Promise.all([
new Promise(async (resolve, reject) => {
try {
meta = await scraper.fetchMeta(targetUrl)
resolve()
} catch(err) {
resolve()
}
}),
new Promise(async (resolve, reject) => {
try {
embed = await scraper.fetchEmbed(targetUrl)
resolve()
} catch(err) {
resolve()
}
})
])
return {
...(removeEmptyAttrs(meta)),
...(removeEmptyAttrs(embed))
}
},
async fetchEmbed(targetUrl) {
const url = urlParser.parse(targetUrl)
const embedMeta = find(oEmbedProviders, provider => {
return provider.provider_url.indexOf(url.hostname) >= 0
})
if (!embedMeta) {
return {}
}
const embedUrl = embedMeta.endpoints[0].url.replace('{format}', 'json')
let data = await request(`${embedUrl}?url=${targetUrl}`)
if (data) {
data = JSON.parse(data)
let output = {
type: data.type,
embed: data.html,
author: data.author_name,
date: data.upload_date ? new Date(data.upload_date).toISOString() : null
}
return output
}
return {}
},
async fetchMeta(targetUrl) {
// const parsedURL = urlParser.parse(targetUrl)
// console.log(parsedURL)
// get from cache
if (cache[targetUrl]) {
return cache[targetUrl]
}
const { body: html, url } = await got(targetUrl)
const metadata = await metascraper({ html, url })
// write to cache
cache[targetUrl] = metadata
return metadata
}
}
module.exports = scraper

3799
yarn.lock Normal file

File diff suppressed because it is too large Load Diff