Merging embed to master

This commit is contained in:
Matt Rider 2019-07-03 08:47:45 -03:00
commit 7a3694aff4
13 changed files with 4380 additions and 0 deletions

12
embed/.babelrc Normal file
View File

@ -0,0 +1,12 @@
{
"presets": [
[
"@babel/preset-env",
{
"targets": {
"node": "10"
}
}
]
]
}

18
embed/.eslintrc.js Normal file
View File

@ -0,0 +1,18 @@
module.exports = {
"extends": "standard",
"parser": "babel-eslint",
"env": {
"es6": true,
"node": true,
},
"rules": {
"indent": [
"error",
2
],
"quotes": [
"error",
"single"
]
}
};

61
embed/.gitignore vendored Normal file
View File

@ -0,0 +1,61 @@
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
# Runtime data
pids
*.pid
*.seed
*.pid.lock
# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov
# Coverage directory used by tools like istanbul
coverage
# nyc test coverage
.nyc_output
# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
.grunt
# Bower dependency directory (https://bower.io/)
bower_components
# node-waf configuration
.lock-wscript
# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release
# Dependency directories
node_modules/
jspm_packages/
# TypeScript v1 declaration files
typings/
# Optional npm cache directory
.npm
# Optional eslint cache
.eslintcache
# Optional REPL history
.node_repl_history
# Output of 'npm pack'
*.tgz
# Yarn Integrity file
.yarn-integrity
# dotenv environment variables file
.env
# next.js build output
.next

3
embed/.graphqlconfig Normal file
View File

@ -0,0 +1,3 @@
{
"schemaPath": "./src/schema.graphql"
}

21
embed/LICENSE Executable file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2018 Human-Connection
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

53
embed/README.md Normal file
View File

@ -0,0 +1,53 @@
# Nitro-Embed
API Service for fetching URL Information like images, icons, descriptions etc. thourgh OpenGraph, oEmbed and other standards.
> early version of simpler embed api with Metascraper and oEmbed for better results.
![API Screenshot](screenshot.png)
---
## Todo`s
- [x] Metascraper
- [x] oEmbed
- [ ] Temporary API Cache in some DB?
- [ ] Scrape for meta tags
- [ ] Image Caching
---
## Install and start development server
Install dependencies
```shell
yarn install
```
Start development server
```shell
yarn dev
```
## Example Request
Use the following request by posting it against the endpoint or open the url the `yarn dev` script did gave you and fire it there to get your first result.
```grapql
{
embed(url: "https://human-connection.org") {
type
embed
title
description
author
publisher
url
date
image
audio
video
lang
logo
sources
}
}
```

54
embed/package.json Normal file
View File

@ -0,0 +1,54 @@
{
"name": "nitro-embed",
"version": "0.0.1",
"description": "API Service for fetching URL Information like images, icons, descriptions etc. thourgh OpenGraph, oEmbed and other standards.",
"main": "src/index.js",
"scripts": {
"dev": "nodemon src/",
"start": "node src/",
"test": "test"
},
"repository": {
"type": "git",
"url": "git+https://github.com/Human-Connection/Nitro-Embed.git"
},
"keywords": [
"pengraph",
"oembed"
],
"author": "Grzegorz Leoniec <greg@app-interactive.de> (http://app-interactive.de)",
"license": "MIT",
"bugs": {
"url": "https://github.com/Human-Connection/Nitro-Embed/issues"
},
"homepage": "https://github.com/Human-Connection/Nitro-Embed#readme",
"dependencies": {
"@metascraper/helpers": "^4.8.5",
"apollo-server": "^2.6.7",
"got": "^9.6.0",
"graphql": "^14.0.2",
"lodash": "^4.17.11",
"metascraper": "^4.10.3",
"metascraper-audio": "^5.5.0",
"metascraper-author": "^4.8.5",
"metascraper-clearbit-logo": "^5.3.0",
"metascraper-date": "^4.8.5",
"metascraper-description": "^5.5.0",
"metascraper-image": "^4.8.5",
"metascraper-lang": "^4.8.5",
"metascraper-lang-detector": "^4.8.5",
"metascraper-logo": "^5.5.0",
"metascraper-logo-favicon": "^4.8.5",
"metascraper-publisher": "^4.8.5",
"metascraper-soundcloud": "^5.5.3",
"metascraper-title": "^4.8.5",
"metascraper-url": "^5.5.0",
"metascraper-video": "^4.8.5",
"metascraper-youtube": "^4.8.5",
"request": "^2.88.0",
"request-promise-native": "^1.0.5"
},
"devDependencies": {
"nodemon": "^1.18.9"
}
}

BIN
embed/screenshot.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

View File

@ -0,0 +1,8 @@
const fs = require('fs')
const path = require('path')
const { gql } = require('apollo-server')
module.exports = gql(fs
.readFileSync(path.join(__dirname, 'schema.graphql'))
.toString('utf-8'))

21
embed/src/index.js Normal file
View File

@ -0,0 +1,21 @@
const { ApolloServer, gql } = require('apollo-server')
const scraper = require('./scraper.js')
const typeDefs = require('./graphql-schema.js')
const resolvers = {
Query: {
async embed(obj, {url}, ctx, info) {
return await scraper.fetch(url)
}
}
}
const server = new ApolloServer({ typeDefs, resolvers })
if (process.env.NODE_ENV !== 'production') {
process.env.DEBUG = true
}
server.listen({ port: 3050 }).then(({ url }) => {
console.log(`🚀 Nitro Embed - Server is ready at ${url}`)
})

20
embed/src/schema.graphql Normal file
View File

@ -0,0 +1,20 @@
type Embed {
type: String
title: String
author: String
publisher: String
date: String
description: String
url: String
image: String
audio: String
video: String
lang: String
logo: String
embed: String
sources: [String]
}
type Query {
embed(url: String!): Embed
}

172
embed/src/scraper.js Normal file
View File

@ -0,0 +1,172 @@
const metascraper = require('metascraper')([
require('metascraper-author')(),
require('metascraper-date')(),
require('metascraper-description')(),
require('metascraper-image')(),
require('metascraper-lang')(),
require('metascraper-lang-detector')(),
require('metascraper-logo')(),
require('metascraper-logo-favicon')(),
// require('metascraper-clearbit-logo')(),
require('metascraper-publisher')(),
require('metascraper-title')(),
require('metascraper-url')(),
require('metascraper-audio')(),
require('metascraper-soundcloud')(),
require('metascraper-video')(),
require('metascraper-youtube')()
// require('./rules/metascraper-embed')()
])
const { ApolloError } = require('apollo-server')
const parseUrl = require('url')
const got = require('got')
const request = require('request-promise-native')
const find = require('lodash/find')
const isEmpty = require('lodash/isEmpty')
const each = require('lodash/each')
const isArray = require('lodash/isArray')
const mergeWith = require('lodash/mergeWith')
const urlParser = require('url')
// quick in memory cache
let cache = {}
let oEmbedProviders = []
const getEmbedProviders = async () => {
let providers = await request('https://oembed.com/providers.json')
providers = JSON.parse(providers)
oEmbedProviders = providers
return providers
}
getEmbedProviders()
const removeEmptyAttrs = obj => {
let output = {}
each(obj, (o, k) => {
if (!isEmpty(o)) {
output[k] = o
}
})
return output
}
const scraper = {
async fetch(targetUrl) {
if (targetUrl.indexOf('//youtu.be/')) {
// replace youtu.be to get proper results
targetUrl = targetUrl.replace('//youtu.be/', '//youtube.com/')
}
if (cache[targetUrl]) {
return cache[targetUrl]
}
const url = parseUrl.parse(targetUrl, true)
let meta = {}
let embed = {}
// only get data from requested services
await Promise.all([
new Promise(async (resolve, reject) => {
try {
meta = await scraper.fetchMeta(targetUrl)
resolve()
} catch(err) {
if (process.env.DEBUG) {
console.error(`ERROR at fetchMeta | ${err.message}`)
}
resolve()
}
}),
new Promise(async (resolve, reject) => {
try {
embed = await scraper.fetchEmbed(targetUrl)
resolve()
} catch(err) {
if (process.env.DEBUG) {
console.error(`ERROR at fetchEmbed | ${err.message}`)
}
resolve()
}
})
])
const output = mergeWith(
meta,
embed,
(objValue, srcValue) => {
if (isArray(objValue)) {
return objValue.concat(srcValue);
}
}
)
if (isEmpty(output)) {
throw new ApolloError('Not found', 'NOT_FOUND')
}
// fix youtube start parameter
const YouTubeStartParam = url.query.t || url.query.start
if (output.publisher === 'YouTube' && YouTubeStartParam) {
output.embed = output.embed.replace('?feature=oembed', `?feature=oembed&start=${YouTubeStartParam}`)
output.url += `&start=${YouTubeStartParam}`
}
// write to cache
cache[targetUrl] = output
return output
},
async fetchEmbed(targetUrl) {
const url = urlParser.parse(targetUrl)
const embedMeta = find(oEmbedProviders, provider => {
return provider.provider_url.indexOf(url.hostname) >= 0
})
if (!embedMeta) {
return {}
}
const embedUrl = embedMeta.endpoints[0].url.replace('{format}', 'json')
let data
try {
data = await request(`${embedUrl}?url=${targetUrl}`)
data = JSON.parse(data)
} catch (err) {
data = await request(`${embedUrl}?url=${targetUrl}&format=json`)
data = JSON.parse(data)
}
if (data) {
let output = {
type: data.type || 'link',
embed: data.html,
author: data.author_name,
date: data.upload_date ? new Date(data.upload_date).toISOString() : null
}
output.sources = ['oembed']
return output
}
return {}
},
async fetchMeta(targetUrl) {
// const parsedURL = urlParser.parse(targetUrl)
// console.log(parsedURL)
// get from cach
const { body: html, url } = await got(targetUrl)
const metadata = await metascraper({ html, url })
metadata.sources = ['resource']
metadata.type = 'link'
return metadata
}
}
module.exports = scraper

3937
embed/yarn.lock Normal file

File diff suppressed because it is too large Load Diff