mirror of
https://github.com/IT4Change/Ocelot-Social.git
synced 2025-12-13 07:45:56 +00:00
Inital minimal working version with graqhql, metascraper and oembed
This commit is contained in:
commit
cc06d53a57
12
.babelrc
Normal file
12
.babelrc
Normal file
@ -0,0 +1,12 @@
|
||||
{
|
||||
"presets": [
|
||||
[
|
||||
"@babel/preset-env",
|
||||
{
|
||||
"targets": {
|
||||
"node": "10"
|
||||
}
|
||||
}
|
||||
]
|
||||
]
|
||||
}
|
||||
18
.eslintrc.js
Normal file
18
.eslintrc.js
Normal file
@ -0,0 +1,18 @@
|
||||
module.exports = {
|
||||
"extends": "standard",
|
||||
"parser": "babel-eslint",
|
||||
"env": {
|
||||
"es6": true,
|
||||
"node": true,
|
||||
},
|
||||
"rules": {
|
||||
"indent": [
|
||||
"error",
|
||||
2
|
||||
],
|
||||
"quotes": [
|
||||
"error",
|
||||
"single"
|
||||
]
|
||||
}
|
||||
};
|
||||
61
.gitignore
vendored
Normal file
61
.gitignore
vendored
Normal file
@ -0,0 +1,61 @@
|
||||
# Logs
|
||||
logs
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
|
||||
# Runtime data
|
||||
pids
|
||||
*.pid
|
||||
*.seed
|
||||
*.pid.lock
|
||||
|
||||
# Directory for instrumented libs generated by jscoverage/JSCover
|
||||
lib-cov
|
||||
|
||||
# Coverage directory used by tools like istanbul
|
||||
coverage
|
||||
|
||||
# nyc test coverage
|
||||
.nyc_output
|
||||
|
||||
# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
|
||||
.grunt
|
||||
|
||||
# Bower dependency directory (https://bower.io/)
|
||||
bower_components
|
||||
|
||||
# node-waf configuration
|
||||
.lock-wscript
|
||||
|
||||
# Compiled binary addons (https://nodejs.org/api/addons.html)
|
||||
build/Release
|
||||
|
||||
# Dependency directories
|
||||
node_modules/
|
||||
jspm_packages/
|
||||
|
||||
# TypeScript v1 declaration files
|
||||
typings/
|
||||
|
||||
# Optional npm cache directory
|
||||
.npm
|
||||
|
||||
# Optional eslint cache
|
||||
.eslintcache
|
||||
|
||||
# Optional REPL history
|
||||
.node_repl_history
|
||||
|
||||
# Output of 'npm pack'
|
||||
*.tgz
|
||||
|
||||
# Yarn Integrity file
|
||||
.yarn-integrity
|
||||
|
||||
# dotenv environment variables file
|
||||
.env
|
||||
|
||||
# next.js build output
|
||||
.next
|
||||
3
.graphqlconfig
Normal file
3
.graphqlconfig
Normal file
@ -0,0 +1,3 @@
|
||||
{
|
||||
"schemaPath": "./src/schema.graphql"
|
||||
}
|
||||
21
LICENSE
Executable file
21
LICENSE
Executable file
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2018 Human-Connection
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
53
package.json
Normal file
53
package.json
Normal file
@ -0,0 +1,53 @@
|
||||
{
|
||||
"name": "nitro-embed",
|
||||
"version": "0.0.1",
|
||||
"description": "API Service for fetching URL Information like images, icons, descriptions etc. thourgh OpenGraph, oEmbed and other standards.",
|
||||
"main": "src/index.js",
|
||||
"scripts": {
|
||||
"dev": "nodemon src/",
|
||||
"start": "node src/",
|
||||
"test": "test"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "git+https://github.com/Human-Connection/Nitro-Embed.git"
|
||||
},
|
||||
"keywords": [
|
||||
"pengraph",
|
||||
"oembed"
|
||||
],
|
||||
"author": "Grzegorz Leoniec <greg@app-interactive.de> (http://app-interactive.de)",
|
||||
"license": "MIT",
|
||||
"bugs": {
|
||||
"url": "https://github.com/Human-Connection/Nitro-Embed/issues"
|
||||
},
|
||||
"homepage": "https://github.com/Human-Connection/Nitro-Embed#readme",
|
||||
"dependencies": {
|
||||
"@metascraper/helpers": "^4.8.5",
|
||||
"apollo-server": "^2.3.1",
|
||||
"got": "^9.5.0",
|
||||
"graphql": "^14.0.2",
|
||||
"lodash": "^4.17.11",
|
||||
"metascraper": "^4.8.5",
|
||||
"metascraper-audio": "^4.8.5",
|
||||
"metascraper-author": "^4.8.5",
|
||||
"metascraper-clearbit-logo": "^4.8.5",
|
||||
"metascraper-date": "^4.8.5",
|
||||
"metascraper-description": "^4.8.5",
|
||||
"metascraper-image": "^4.8.5",
|
||||
"metascraper-lang": "^4.8.5",
|
||||
"metascraper-lang-detector": "^4.8.5",
|
||||
"metascraper-logo": "^4.8.5",
|
||||
"metascraper-logo-favicon": "^4.8.5",
|
||||
"metascraper-publisher": "^4.8.5",
|
||||
"metascraper-title": "^4.8.5",
|
||||
"metascraper-url": "^4.8.5",
|
||||
"metascraper-video": "^4.8.5",
|
||||
"metascraper-youtube": "^4.8.5",
|
||||
"request": "^2.88.0",
|
||||
"request-promise-native": "^1.0.5"
|
||||
},
|
||||
"devDependencies": {
|
||||
"nodemon": "^1.18.9"
|
||||
}
|
||||
}
|
||||
8
src/graphql-schema.js
Normal file
8
src/graphql-schema.js
Normal file
@ -0,0 +1,8 @@
|
||||
const fs = require('fs')
|
||||
const path = require('path')
|
||||
|
||||
const { gql } = require('apollo-server')
|
||||
|
||||
module.exports = gql(fs
|
||||
.readFileSync(path.join(__dirname, 'schema.graphql'))
|
||||
.toString('utf-8'))
|
||||
17
src/index.js
Normal file
17
src/index.js
Normal file
@ -0,0 +1,17 @@
|
||||
const { ApolloServer, gql } = require('apollo-server')
|
||||
const scraper = require('./scraper.js')
|
||||
const typeDefs = require('./graphql-schema.js')
|
||||
|
||||
const resolvers = {
|
||||
Query: {
|
||||
async embed(obj, {url}, ctx, info) {
|
||||
return await scraper.fetch(url)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const server = new ApolloServer({ typeDefs, resolvers })
|
||||
|
||||
server.listen({ port: 3050 }).then(({ url }) => {
|
||||
console.log(`🚀 Nitro Embed - Server is ready at ${url}`)
|
||||
})
|
||||
19
src/schema.graphql
Normal file
19
src/schema.graphql
Normal file
@ -0,0 +1,19 @@
|
||||
type Embed {
|
||||
type: String
|
||||
title: String
|
||||
author: String
|
||||
publisher: String
|
||||
date: String
|
||||
description: String
|
||||
url: String
|
||||
image: String
|
||||
audio: String
|
||||
video: String
|
||||
lang: String
|
||||
logo: String
|
||||
embed: String
|
||||
}
|
||||
|
||||
type Query {
|
||||
embed(url: String!): Embed
|
||||
}
|
||||
128
src/scraper.js
Normal file
128
src/scraper.js
Normal file
@ -0,0 +1,128 @@
|
||||
const metascraper = require('metascraper')([
|
||||
require('metascraper-author')(),
|
||||
require('metascraper-date')(),
|
||||
require('metascraper-description')(),
|
||||
require('metascraper-image')(),
|
||||
require('metascraper-lang')(),
|
||||
require('metascraper-lang-detector')(),
|
||||
require('metascraper-logo')(),
|
||||
require('metascraper-logo-favicon')(),
|
||||
// require('metascraper-clearbit-logo')(),
|
||||
require('metascraper-publisher')(),
|
||||
require('metascraper-title')(),
|
||||
require('metascraper-url')(),
|
||||
require('metascraper-video')(),
|
||||
require('metascraper-youtube')()
|
||||
|
||||
// require('./rules/metascraper-embed')()
|
||||
])
|
||||
const got = require('got')
|
||||
const request = require('request-promise-native')
|
||||
const find = require('lodash/find')
|
||||
const isEmpty = require('lodash/isEmpty')
|
||||
const each = require('lodash/each')
|
||||
const urlParser = require('url')
|
||||
|
||||
// quick in memory cache
|
||||
let cache = {}
|
||||
|
||||
let oEmbedProviders = []
|
||||
const getEmbedProviders = async () => {
|
||||
let providers = await request('https://oembed.com/providers.json')
|
||||
providers = JSON.parse(providers)
|
||||
oEmbedProviders = providers
|
||||
return providers
|
||||
}
|
||||
getEmbedProviders()
|
||||
|
||||
const removeEmptyAttrs = obj => {
|
||||
let output = {}
|
||||
each(obj, (o, k) => {
|
||||
if (!isEmpty(o)) {
|
||||
output[k] = o
|
||||
}
|
||||
})
|
||||
return output
|
||||
}
|
||||
|
||||
const scraper = {
|
||||
async fetch(targetUrl) {
|
||||
|
||||
if (targetUrl.indexOf('//youtu.be/')) {
|
||||
// replace youtu.be to get proper results
|
||||
targetUrl = targetUrl.replace('//youtu.be/', '//youtube.com/')
|
||||
}
|
||||
|
||||
let meta = {}
|
||||
let embed = {}
|
||||
|
||||
// only get data from requested services
|
||||
await Promise.all([
|
||||
new Promise(async (resolve, reject) => {
|
||||
try {
|
||||
meta = await scraper.fetchMeta(targetUrl)
|
||||
resolve()
|
||||
} catch(err) {
|
||||
resolve()
|
||||
}
|
||||
}),
|
||||
new Promise(async (resolve, reject) => {
|
||||
try {
|
||||
embed = await scraper.fetchEmbed(targetUrl)
|
||||
resolve()
|
||||
} catch(err) {
|
||||
resolve()
|
||||
}
|
||||
})
|
||||
])
|
||||
return {
|
||||
...(removeEmptyAttrs(meta)),
|
||||
...(removeEmptyAttrs(embed))
|
||||
}
|
||||
},
|
||||
async fetchEmbed(targetUrl) {
|
||||
const url = urlParser.parse(targetUrl)
|
||||
const embedMeta = find(oEmbedProviders, provider => {
|
||||
return provider.provider_url.indexOf(url.hostname) >= 0
|
||||
})
|
||||
if (!embedMeta) {
|
||||
return {}
|
||||
}
|
||||
const embedUrl = embedMeta.endpoints[0].url.replace('{format}', 'json')
|
||||
|
||||
let data = await request(`${embedUrl}?url=${targetUrl}`)
|
||||
if (data) {
|
||||
data = JSON.parse(data)
|
||||
|
||||
let output = {
|
||||
type: data.type,
|
||||
embed: data.html,
|
||||
author: data.author_name,
|
||||
date: data.upload_date ? new Date(data.upload_date).toISOString() : null
|
||||
}
|
||||
|
||||
return output
|
||||
}
|
||||
return {}
|
||||
},
|
||||
async fetchMeta(targetUrl) {
|
||||
|
||||
// const parsedURL = urlParser.parse(targetUrl)
|
||||
// console.log(parsedURL)
|
||||
|
||||
// get from cache
|
||||
if (cache[targetUrl]) {
|
||||
return cache[targetUrl]
|
||||
}
|
||||
|
||||
const { body: html, url } = await got(targetUrl)
|
||||
const metadata = await metascraper({ html, url })
|
||||
|
||||
// write to cache
|
||||
cache[targetUrl] = metadata
|
||||
|
||||
return metadata
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = scraper
|
||||
Loading…
x
Reference in New Issue
Block a user