mirror of
https://github.com/IT4Change/Ocelot-Social.git
synced 2025-12-13 07:45:56 +00:00
Refactor the extraction of hashtag out of a posts content
Co-Authored-By: mattwr18 <mattwr18@gmail.com>
This commit is contained in:
parent
0bc4c558ae
commit
327105da7a
@ -1,17 +1,21 @@
|
|||||||
import cheerio from 'cheerio'
|
import cheerio from 'cheerio'
|
||||||
|
import {
|
||||||
|
exec,
|
||||||
|
build
|
||||||
|
} from 'xregexp/xregexp-all.js'
|
||||||
// formats of a Hashtag:
|
// formats of a Hashtag:
|
||||||
// https://en.wikipedia.org/w/index.php?title=Hashtag&oldid=905141980#Style
|
// https://en.wikipedia.org/w/index.php?title=Hashtag&oldid=905141980#Style
|
||||||
// here:
|
// here:
|
||||||
// 0. Search for whole string.
|
// 0. Search for whole string.
|
||||||
// 1. Hashtag has only 'a-z', 'A-Z', and '0-9'.
|
// 1. Hashtag has only all unicode characters and '0-9'.
|
||||||
// 2. If it starts with a digit '0-9' than 'a-z', or 'A-Z' has to follow.
|
// 2. If it starts with a digit '0-9' than 'a-z', or 'A-Z' has to follow.
|
||||||
const ID_REGEX = /^\/search\/hashtag\/((\p{L}+[\p{L}0-9]*)|([0-9]+\p{L}+[\p{L}0-9]*))$/g
|
const regX = build('^\/search\/hashtag\/((\\pL+[\\pL0-9]*)|([0-9]+\\pL+[\\pL0-9]*))$')
|
||||||
|
|
||||||
export default function (content) {
|
export default function (content) {
|
||||||
if (!content) return []
|
if (!content) return []
|
||||||
const $ = cheerio.load(content)
|
const $ = cheerio.load(content)
|
||||||
// We can not search for class '.hashtag', because the classes are removed at the 'xss' middleware.
|
// We can not search for class '.hashtag', because the classes are removed at the 'xss' middleware.
|
||||||
// But we have to know, which Hashtags are removed from the content es well, so we search for the 'a' html-tag.
|
// But we have to know, which Hashtags are removed from the content as well, so we search for the 'a' html-tag.
|
||||||
const urls = $('a')
|
const urls = $('a')
|
||||||
.map((_, el) => {
|
.map((_, el) => {
|
||||||
return $(el).attr('href')
|
return $(el).attr('href')
|
||||||
@ -20,7 +24,7 @@ export default function (content) {
|
|||||||
const hashtags = []
|
const hashtags = []
|
||||||
urls.forEach(url => {
|
urls.forEach(url => {
|
||||||
let match
|
let match
|
||||||
while ((match = ID_REGEX.exec(url)) != null) {
|
if ((match = exec(url, regX)) != null) {
|
||||||
hashtags.push(match[1])
|
hashtags.push(match[1])
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user