Merging embed to master

2026-03-01 12:44:37 +00:00 · 2019-07-03 08:47:45 -03:00 · 2019-07-03 08:47:45 -03:00 · 7a3694aff4
commit 7a3694aff4
parent 089b07c3e0 5091fe5759
13 changed files with 4380 additions and 0 deletions
--- a/embed/.babelrc
+++ b/embed/.babelrc
@ -0,0 +1,12 @@
+{
+  "presets": [
+    [
+      "@babel/preset-env",
+      {
+        "targets": {
+          "node": "10"
+        }
+      }
+    ]
+  ]
+}
--- a/embed/.eslintrc.js
+++ b/embed/.eslintrc.js
@ -0,0 +1,18 @@
+module.exports = {
+  "extends": "standard",
+  "parser": "babel-eslint",
+  "env": {
+    "es6": true,
+    "node": true,
+  },
+  "rules": {
+    "indent": [
+      "error",
+      2
+    ],
+    "quotes": [
+      "error",
+      "single"
+    ]
+  }
+};
--- a/embed/.gitignore
+++ b/embed/.gitignore
@ -0,0 +1,61 @@
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+
+# Runtime data
+pids
+*.pid
+*.seed
+*.pid.lock
+
+# Directory for instrumented libs generated by jscoverage/JSCover
+lib-cov
+
+# Coverage directory used by tools like istanbul
+coverage
+
+# nyc test coverage
+.nyc_output
+
+# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
+.grunt
+
+# Bower dependency directory (https://bower.io/)
+bower_components
+
+# node-waf configuration
+.lock-wscript
+
+# Compiled binary addons (https://nodejs.org/api/addons.html)
+build/Release
+
+# Dependency directories
+node_modules/
+jspm_packages/
+
+# TypeScript v1 declaration files
+typings/
+
+# Optional npm cache directory
+.npm
+
+# Optional eslint cache
+.eslintcache
+
+# Optional REPL history
+.node_repl_history
+
+# Output of 'npm pack'
+*.tgz
+
+# Yarn Integrity file
+.yarn-integrity
+
+# dotenv environment variables file
+.env
+
+# next.js build output
+.next
--- a/embed/.graphqlconfig
+++ b/embed/.graphqlconfig
@ -0,0 +1,3 @@
+{
+  "schemaPath": "./src/schema.graphql"
+}
--- a/embed/LICENSE
+++ b/embed/LICENSE
@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2018 Human-Connection
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/embed/README.md
+++ b/embed/README.md
@ -0,0 +1,53 @@
+# Nitro-Embed
+API Service for fetching URL Information like images, icons, descriptions etc. thourgh OpenGraph, oEmbed and other standards.
+
+> early version of simpler embed api with Metascraper and oEmbed for better results.
+
+![API Screenshot](screenshot.png)
+
+---
+
+## Todo`s
+- [x] Metascraper
+- [x] oEmbed
+- [ ] Temporary API Cache in some DB?
+- [ ] Scrape for meta tags
+- [ ] Image Caching
+
+---
+
+## Install and start development server
+
+Install dependencies
+```shell
+yarn install
+```
+
+Start development server
+```shell
+yarn dev
+```
+
+## Example Request
+Use the following request by posting it against the endpoint or open the url the `yarn dev` script did gave you and fire it there to get your first result.
+
+```grapql
+{
+  embed(url: "https://human-connection.org") {
+    type
+    embed
+    title
+    description
+    author
+    publisher
+    url
+    date
+    image
+    audio
+    video
+    lang
+    logo
+    sources
+  }
+}
+```
--- a/embed/package.json
+++ b/embed/package.json
@ -0,0 +1,54 @@
+{
+  "name": "nitro-embed",
+  "version": "0.0.1",
+  "description": "API Service for fetching URL Information like images, icons, descriptions etc. thourgh OpenGraph, oEmbed and other standards.",
+  "main": "src/index.js",
+  "scripts": {
+    "dev": "nodemon src/",
+    "start": "node src/",
+    "test": "test"
+  },
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/Human-Connection/Nitro-Embed.git"
+  },
+  "keywords": [
+    "pengraph",
+    "oembed"
+  ],
+  "author": "Grzegorz Leoniec <greg@app-interactive.de> (http://app-interactive.de)",
+  "license": "MIT",
+  "bugs": {
+    "url": "https://github.com/Human-Connection/Nitro-Embed/issues"
+  },
+  "homepage": "https://github.com/Human-Connection/Nitro-Embed#readme",
+  "dependencies": {
+    "@metascraper/helpers": "^4.8.5",
+    "apollo-server": "^2.6.7",
+    "got": "^9.6.0",
+    "graphql": "^14.0.2",
+    "lodash": "^4.17.11",
+    "metascraper": "^4.10.3",
+    "metascraper-audio": "^5.5.0",
+    "metascraper-author": "^4.8.5",
+    "metascraper-clearbit-logo": "^5.3.0",
+    "metascraper-date": "^4.8.5",
+    "metascraper-description": "^5.5.0",
+    "metascraper-image": "^4.8.5",
+    "metascraper-lang": "^4.8.5",
+    "metascraper-lang-detector": "^4.8.5",
+    "metascraper-logo": "^5.5.0",
+    "metascraper-logo-favicon": "^4.8.5",
+    "metascraper-publisher": "^4.8.5",
+    "metascraper-soundcloud": "^5.5.3",
+    "metascraper-title": "^4.8.5",
+    "metascraper-url": "^5.5.0",
+    "metascraper-video": "^4.8.5",
+    "metascraper-youtube": "^4.8.5",
+    "request": "^2.88.0",
+    "request-promise-native": "^1.0.5"
+  },
+  "devDependencies": {
+    "nodemon": "^1.18.9"
+  }
+}
--- a/embed/screenshot.png
+++ b/embed/screenshot.png
--- a/embed/src/graphql-schema.js
+++ b/embed/src/graphql-schema.js
@ -0,0 +1,8 @@
+const fs = require('fs')
+const path = require('path')
+
+const { gql } = require('apollo-server')
+
+module.exports = gql(fs
+  .readFileSync(path.join(__dirname, 'schema.graphql'))
+  .toString('utf-8'))
--- a/embed/src/index.js
+++ b/embed/src/index.js
@ -0,0 +1,21 @@
+const { ApolloServer, gql } = require('apollo-server')
+const scraper = require('./scraper.js')
+const typeDefs = require('./graphql-schema.js')
+
+const resolvers = {
+  Query: {
+    async embed(obj, {url}, ctx, info) {
+      return await scraper.fetch(url)
+    }
+  }
+}
+
+const server = new ApolloServer({ typeDefs, resolvers })
+
+if (process.env.NODE_ENV !== 'production') {
+  process.env.DEBUG = true
+}
+
+server.listen({ port: 3050 }).then(({ url }) => {
+  console.log(`🚀 Nitro Embed - Server is ready at ${url}`)
+})
--- a/embed/src/schema.graphql
+++ b/embed/src/schema.graphql
@ -0,0 +1,20 @@
+type Embed {
+  type: String
+  title: String
+  author: String
+  publisher: String
+  date: String
+  description: String
+  url: String
+  image: String
+  audio: String
+  video: String
+  lang: String
+  logo: String
+  embed: String
+  sources: [String]
+}
+
+type Query {
+  embed(url: String!): Embed
+}
--- a/embed/src/scraper.js
+++ b/embed/src/scraper.js
@ -0,0 +1,172 @@
+const metascraper = require('metascraper')([
+  require('metascraper-author')(),
+  require('metascraper-date')(),
+  require('metascraper-description')(),
+  require('metascraper-image')(),
+  require('metascraper-lang')(),
+  require('metascraper-lang-detector')(),
+  require('metascraper-logo')(),
+  require('metascraper-logo-favicon')(),
+  // require('metascraper-clearbit-logo')(),
+  require('metascraper-publisher')(),
+  require('metascraper-title')(),
+  require('metascraper-url')(),
+  require('metascraper-audio')(),
+  require('metascraper-soundcloud')(),
+  require('metascraper-video')(),
+  require('metascraper-youtube')()
+
+  // require('./rules/metascraper-embed')()
+])
+const { ApolloError } = require('apollo-server')
+const parseUrl = require('url')
+
+const got = require('got')
+const request = require('request-promise-native')
+const find = require('lodash/find')
+const isEmpty = require('lodash/isEmpty')
+const each = require('lodash/each')
+const isArray = require('lodash/isArray')
+const mergeWith = require('lodash/mergeWith')
+const urlParser = require('url')
+
+// quick in memory cache
+let cache = {}
+
+let oEmbedProviders = []
+const getEmbedProviders = async () => {
+  let providers = await request('https://oembed.com/providers.json')
+  providers = JSON.parse(providers)
+  oEmbedProviders = providers
+  return providers
+}
+getEmbedProviders()
+
+const removeEmptyAttrs = obj => {
+  let output = {}
+  each(obj, (o, k) => {
+    if (!isEmpty(o)) {
+      output[k] = o
+    }
+  })
+  return output
+}
+
+const scraper = {
+  async fetch(targetUrl) {
+    if (targetUrl.indexOf('//youtu.be/')) {
+      // replace youtu.be to get proper results
+      targetUrl = targetUrl.replace('//youtu.be/', '//youtube.com/')
+    }
+
+    if (cache[targetUrl]) {
+      return cache[targetUrl]
+    }
+
+    const url = parseUrl.parse(targetUrl, true)
+
+    let meta = {}
+    let embed = {}
+
+    // only get data from requested services
+    await Promise.all([
+      new Promise(async (resolve, reject) => {
+        try {
+          meta = await scraper.fetchMeta(targetUrl)
+          resolve()
+        } catch(err) {
+          if (process.env.DEBUG) {
+            console.error(`ERROR at fetchMeta | ${err.message}`)
+          }
+          resolve()
+        }
+      }),
+      new Promise(async (resolve, reject) => {
+        try {
+          embed = await scraper.fetchEmbed(targetUrl)
+          resolve()
+        } catch(err) {
+          if (process.env.DEBUG) {
+            console.error(`ERROR at fetchEmbed | ${err.message}`)
+          }
+          resolve()
+        }
+      })
+    ])
+
+    const output = mergeWith(
+      meta,
+      embed,
+      (objValue, srcValue) => {
+        if (isArray(objValue)) {
+          return objValue.concat(srcValue);
+        }
+      }
+    )
+
+    if (isEmpty(output)) {
+      throw new ApolloError('Not found', 'NOT_FOUND')
+    }
+
+    // fix youtube start parameter
+    const YouTubeStartParam = url.query.t || url.query.start
+    if (output.publisher === 'YouTube' && YouTubeStartParam) {
+      output.embed = output.embed.replace('?feature=oembed', `?feature=oembed&start=${YouTubeStartParam}`)
+      output.url += `&start=${YouTubeStartParam}`
+    }
+
+    // write to cache
+    cache[targetUrl] = output
+
+    return output
+  },
+  async fetchEmbed(targetUrl) {
+    const url = urlParser.parse(targetUrl)
+    const embedMeta = find(oEmbedProviders, provider => {
+      return provider.provider_url.indexOf(url.hostname) >= 0
+    })
+    if (!embedMeta) {
+      return {}
+    }
+    const embedUrl = embedMeta.endpoints[0].url.replace('{format}', 'json')
+
+    let data
+    try {
+      data = await request(`${embedUrl}?url=${targetUrl}`)
+      data = JSON.parse(data)
+    } catch (err) {
+      data = await request(`${embedUrl}?url=${targetUrl}&format=json`)
+      data = JSON.parse(data)
+    }
+    if (data) {
+      let output = {
+        type: data.type || 'link',
+        embed: data.html,
+        author: data.author_name,
+        date: data.upload_date ? new Date(data.upload_date).toISOString() : null
+      }
+
+      output.sources = ['oembed']
+
+      return output
+    }
+    return {}
+  },
+  async fetchMeta(targetUrl) {
+
+    // const parsedURL = urlParser.parse(targetUrl)
+    // console.log(parsedURL)
+
+    // get from cach
+
+    const { body: html, url } = await got(targetUrl)
+    const metadata = await metascraper({ html, url })
+
+    metadata.sources = ['resource']
+    metadata.type = 'link'
+
+    return metadata
+  }
+}
+
+module.exports = scraper
--- a/embed/yarn.lock
+++ b/embed/yarn.lock