Add Open Graph scraper module

This commit is contained in:
2023-10-09 16:08:39 -06:00
parent 1a6a4cf1de
commit eb7d0dc1f8
6 changed files with 203 additions and 0 deletions

View File

@@ -32,6 +32,7 @@
"cache-manager": "^5.2.3",
"hbs": "^4.2.0",
"minio": "^7.1.3",
"open-graph-scraper": "^6.3.0",
"ramda": "^0.29.0",
"reflect-metadata": "^0.1.13",
"rxjs": "^7.8.1",

View File

@@ -12,6 +12,7 @@ import { CacheModule } from '@nestjs/cache-manager';
import { IrcbotModule } from './ircbot/ircbot.module';
import { IrcbotService } from './ircbot/ircbot.service';
import { DinosaurwetModule } from './dinosaurwet/dinosaurwet.module';
import { OgScraperModule } from './ogscraper/ogscraper.module';
@Module({
imports: [
@@ -27,6 +28,7 @@ import { DinosaurwetModule } from './dinosaurwet/dinosaurwet.module';
DomainrproxyModule,
IrcbotModule,
DinosaurwetModule,
OgScraperModule,
],
controllers: [AppController],
providers: [AppService],

View File

@@ -0,0 +1,25 @@
import { Body, Controller, Post } from '@nestjs/common';
import { ApiProperty, ApiTags } from '@nestjs/swagger';
const ogs = require('open-graph-scraper');
import { SuccessResult } from 'open-graph-scraper';
import { OgScraperService } from './ogscraper.service';
class ScrapeOgDto {
@ApiProperty({
description: 'URL of the page to fetch Open Graph metadata of',
example:
'https://qz.com/1903322/why-pivot-tables-are-the-spreadsheets-most-powerful-tool',
})
url: string;
}
@Controller('ogscraper')
@ApiTags('open-graph-scraper')
export class OgScraperController {
constructor(private readonly ogScraperService: OgScraperService) {}
@Post('')
async scrapeOg(@Body() body: ScrapeOgDto): Promise<SuccessResult> {
return this.ogScraperService.getOg(body.url);
}
}

View File

@@ -0,0 +1,10 @@
import { Module } from '@nestjs/common';
import { OgScraperController } from './ogscraper.controller';
import { OgScraperService } from './ogscraper.service';
@Module({
controllers: [OgScraperController],
providers: [OgScraperService],
exports: [OgScraperService],
})
export class OgScraperModule {}

View File

@@ -0,0 +1,20 @@
import { Injectable } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
const ogs = require('open-graph-scraper');
import { SuccessResult } from 'open-graph-scraper';
@Injectable()
export class OgScraperService {
constructor(public readonly configService: ConfigService) {}
async getOg(url: string): Promise<SuccessResult> {
return ogs({
url,
fetchOptions: {
headers: {
'user-agent': this.configService.get<string>('userAgent') || '',
},
},
});
}
}

145
yarn.lock
View File

@@ -402,6 +402,11 @@
resolved "https://registry.yarnpkg.com/@eslint/js/-/js-8.48.0.tgz#642633964e217905436033a2bd08bf322849b7fb"
integrity sha512-ZSjtmelB7IJfWD2Fvb7+Z+ChTIKWq6kjda95fLcQKNS5aheVHn4IkfgRQE3sIIzTcSLwLcLZUD9UBt+V7+h+Pw==
"@fastify/busboy@^2.0.0":
version "2.0.0"
resolved "https://registry.yarnpkg.com/@fastify/busboy/-/busboy-2.0.0.tgz#f22824caff3ae506b18207bad4126dbc6ccdb6b8"
integrity sha512-JUFJad5lv7jxj926GPgymrWQxxjPYuJNiNjNMzqT+HiuP6Vl3dk5xzG+8sTX96np0ZAluvaMzPsjhHZ5rNuNQQ==
"@humanwhocodes/config-array@^0.11.10":
version "0.11.11"
resolved "https://registry.yarnpkg.com/@humanwhocodes/config-array/-/config-array-0.11.11.tgz#88a04c570dbbc7dd943e4712429c3df09bc32844"
@@ -1715,6 +1720,11 @@ body-parser@1.20.2:
type-is "~1.6.18"
unpipe "1.0.0"
boolbase@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/boolbase/-/boolbase-1.0.0.tgz#68dff5fbe60c51eb37725ea9e3ed310dcc1e776e"
integrity sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==
bplist-parser@^0.2.0:
version "0.2.0"
resolved "https://registry.yarnpkg.com/bplist-parser/-/bplist-parser-0.2.0.tgz#43a9d183e5bf9d545200ceac3e712f79ebbe8d0e"
@@ -1878,6 +1888,36 @@ chardet@^0.7.0:
resolved "https://registry.yarnpkg.com/chardet/-/chardet-0.7.0.tgz#90094849f0937f2eedc2425d0d28a9e5f0cbad9e"
integrity sha512-mT8iDcrh03qDGRRmoA2hmBJnxpllMR+0/0qlzjqZES6NdiWDcZkCNAk4rPFZ9Q85r27unkiNNg8ZOiwZXBHwcA==
chardet@^2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/chardet/-/chardet-2.0.0.tgz#24a74bd3954965ba2d30f490a915fc3cf053051f"
integrity sha512-xVgPpulCooDjY6zH4m9YW3jbkaBe3FKIAvF5sj5t7aBNsVl2ljIE+xwJ4iNgiDZHFQvNIpjdKdVOQvvk5ZfxbQ==
cheerio-select@^2.1.0:
version "2.1.0"
resolved "https://registry.yarnpkg.com/cheerio-select/-/cheerio-select-2.1.0.tgz#4d8673286b8126ca2a8e42740d5e3c4884ae21b4"
integrity sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==
dependencies:
boolbase "^1.0.0"
css-select "^5.1.0"
css-what "^6.1.0"
domelementtype "^2.3.0"
domhandler "^5.0.3"
domutils "^3.0.1"
cheerio@^1.0.0-rc.12:
version "1.0.0-rc.12"
resolved "https://registry.yarnpkg.com/cheerio/-/cheerio-1.0.0-rc.12.tgz#788bf7466506b1c6bf5fae51d24a2c4d62e47683"
integrity sha512-VqR8m68vM46BNnuZ5NtnGBKIE/DfN0cRIzg9n40EIq9NOv90ayxLBXA8fXC5gquFRGJSTRqBq25Jt2ECLR431Q==
dependencies:
cheerio-select "^2.1.0"
dom-serializer "^2.0.0"
domhandler "^5.0.3"
domutils "^3.0.1"
htmlparser2 "^8.0.1"
parse5 "^7.0.0"
parse5-htmlparser2-tree-adapter "^7.0.0"
chokidar@3.5.3, chokidar@^3.5.3:
version "3.5.3"
resolved "https://registry.yarnpkg.com/chokidar/-/chokidar-3.5.3.tgz#1cf37c8707b932bd1af1ae22c0432e2acd1903bd"
@@ -2125,6 +2165,22 @@ cross-spawn@^7.0.0, cross-spawn@^7.0.2, cross-spawn@^7.0.3:
shebang-command "^2.0.0"
which "^2.0.1"
css-select@^5.1.0:
version "5.1.0"
resolved "https://registry.yarnpkg.com/css-select/-/css-select-5.1.0.tgz#b8ebd6554c3637ccc76688804ad3f6a6fdaea8a6"
integrity sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg==
dependencies:
boolbase "^1.0.0"
css-what "^6.1.0"
domhandler "^5.0.2"
domutils "^3.0.1"
nth-check "^2.0.1"
css-what@^6.1.0:
version "6.1.0"
resolved "https://registry.yarnpkg.com/css-what/-/css-what-6.1.0.tgz#fb5effcf76f1ddea2c81bdfaa4de44e79bac70f4"
integrity sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==
debug@2.6.9:
version "2.6.9"
resolved "https://registry.yarnpkg.com/debug/-/debug-2.6.9.tgz#5d128515df134ff327e90a4c93f4e077a536341f"
@@ -2251,6 +2307,36 @@ doctrine@^3.0.0:
dependencies:
esutils "^2.0.2"
dom-serializer@^2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/dom-serializer/-/dom-serializer-2.0.0.tgz#e41b802e1eedf9f6cae183ce5e622d789d7d8e53"
integrity sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==
dependencies:
domelementtype "^2.3.0"
domhandler "^5.0.2"
entities "^4.2.0"
domelementtype@^2.3.0:
version "2.3.0"
resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-2.3.0.tgz#5c45e8e869952626331d7aab326d01daf65d589d"
integrity sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==
domhandler@^5.0.2, domhandler@^5.0.3:
version "5.0.3"
resolved "https://registry.yarnpkg.com/domhandler/-/domhandler-5.0.3.tgz#cc385f7f751f1d1fc650c21374804254538c7d31"
integrity sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==
dependencies:
domelementtype "^2.3.0"
domutils@^3.0.1:
version "3.1.0"
resolved "https://registry.yarnpkg.com/domutils/-/domutils-3.1.0.tgz#c47f551278d3dc4b0b1ab8cbb42d751a6f0d824e"
integrity sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA==
dependencies:
dom-serializer "^2.0.0"
domelementtype "^2.3.0"
domhandler "^5.0.3"
dotenv-expand@10.0.0:
version "10.0.0"
resolved "https://registry.yarnpkg.com/dotenv-expand/-/dotenv-expand-10.0.0.tgz#12605d00fb0af6d0a592e6558585784032e4ef37"
@@ -2308,6 +2394,11 @@ enhanced-resolve@^5.0.0, enhanced-resolve@^5.15.0, enhanced-resolve@^5.7.0:
graceful-fs "^4.2.4"
tapable "^2.2.0"
entities@^4.2.0, entities@^4.4.0:
version "4.5.0"
resolved "https://registry.yarnpkg.com/entities/-/entities-4.5.0.tgz#5d268ea5e7113ec74c4d033b79ea5a35a488fb48"
integrity sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==
error-ex@^1.3.1:
version "1.3.2"
resolved "https://registry.yarnpkg.com/error-ex/-/error-ex-1.3.2.tgz#b4ac40648107fdcdcfae242f428bea8a14d4f1bf"
@@ -3016,6 +3107,16 @@ html-escaper@^2.0.0:
resolved "https://registry.yarnpkg.com/html-escaper/-/html-escaper-2.0.2.tgz#dfd60027da36a36dfcbe236262c00a5822681453"
integrity sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==
htmlparser2@^8.0.1:
version "8.0.2"
resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-8.0.2.tgz#f002151705b383e62433b5cf466f5b716edaec21"
integrity sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==
dependencies:
domelementtype "^2.3.0"
domhandler "^5.0.3"
domutils "^3.0.1"
entities "^4.4.0"
http-errors@2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/http-errors/-/http-errors-2.0.0.tgz#b7774a1486ef73cf7667ac9ae0858c012c57b9d3"
@@ -4239,6 +4340,13 @@ npmlog@^5.0.1:
gauge "^3.0.0"
set-blocking "^2.0.0"
nth-check@^2.0.1:
version "2.1.1"
resolved "https://registry.yarnpkg.com/nth-check/-/nth-check-2.1.1.tgz#c9eab428effce36cd6b92c924bdb000ef1f1ed1d"
integrity sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==
dependencies:
boolbase "^1.0.0"
object-assign@^4, object-assign@^4.1.1:
version "4.1.1"
resolved "https://registry.yarnpkg.com/object-assign/-/object-assign-4.1.1.tgz#2109adc7965887cfc05cbbd442cac8bfbb360863"
@@ -4277,6 +4385,16 @@ onetime@^6.0.0:
dependencies:
mimic-fn "^4.0.0"
open-graph-scraper@^6.3.0:
version "6.3.0"
resolved "https://registry.yarnpkg.com/open-graph-scraper/-/open-graph-scraper-6.3.0.tgz#aecac3dea4e206a43e1d2659bbb114cbeb25f45e"
integrity sha512-Kl8Xkigvw20ZppXt1m2RCucGo/L3ZsgtThZyE5CS4GJr5MrxDz7wQN7iaO/wkM3JuiP5XzblZ3gIpLIsC9dhQw==
dependencies:
chardet "^2.0.0"
cheerio "^1.0.0-rc.12"
undici "^5.25.4"
validator "^13.11.0"
open@^9.1.0:
version "9.1.0"
resolved "https://registry.yarnpkg.com/open/-/open-9.1.0.tgz#684934359c90ad25742f5a26151970ff8c6c80b6"
@@ -4377,6 +4495,21 @@ parse-json@^5.0.0, parse-json@^5.2.0:
json-parse-even-better-errors "^2.3.0"
lines-and-columns "^1.1.6"
parse5-htmlparser2-tree-adapter@^7.0.0:
version "7.0.0"
resolved "https://registry.yarnpkg.com/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.0.0.tgz#23c2cc233bcf09bb7beba8b8a69d46b08c62c2f1"
integrity sha512-B77tOZrqqfUfnVcOrUvfdLbz4pu4RopLD/4vmu3HUPswwTA8OH0EMW9BlWR2B0RCoiZRAHEUu7IxeP1Pd1UU+g==
dependencies:
domhandler "^5.0.2"
parse5 "^7.0.0"
parse5@^7.0.0:
version "7.1.2"
resolved "https://registry.yarnpkg.com/parse5/-/parse5-7.1.2.tgz#0736bebbfd77793823240a23b7fc5e010b7f8e32"
integrity sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==
dependencies:
entities "^4.4.0"
parseurl@~1.3.3:
version "1.3.3"
resolved "https://registry.yarnpkg.com/parseurl/-/parseurl-1.3.3.tgz#9da19e7bee8d12dff0513ed5b76957793bc2e8d4"
@@ -5328,6 +5461,13 @@ uid@2.0.2:
dependencies:
"@lukeed/csprng" "^1.0.0"
undici@^5.25.4:
version "5.25.4"
resolved "https://registry.yarnpkg.com/undici/-/undici-5.25.4.tgz#7d8ef81d94f84cd384986271e5e5599b6dff4296"
integrity sha512-450yJxT29qKMf3aoudzFpIciqpx6Pji3hEWaXqXmanbXF58LTAGCKxcJjxMXWu3iG+Mudgo3ZUfDB6YDFd/dAw==
dependencies:
"@fastify/busboy" "^2.0.0"
universalify@^2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/universalify/-/universalify-2.0.0.tgz#75a4984efedc4b08975c5aeb73f530d02df25717"
@@ -5398,6 +5538,11 @@ v8-to-istanbul@^9.0.1:
"@types/istanbul-lib-coverage" "^2.0.1"
convert-source-map "^1.6.0"
validator@^13.11.0:
version "13.11.0"
resolved "https://registry.yarnpkg.com/validator/-/validator-13.11.0.tgz#23ab3fd59290c61248364eabf4067f04955fbb1b"
integrity sha512-Ii+sehpSfZy+At5nPdnyMhx78fEoPDkR2XW/zimHEL3MyGJQOCQ7WeP20jPYRz7ZCpcKLB21NxuXHF3bxjStBQ==
vary@^1, vary@~1.1.2:
version "1.1.2"
resolved "https://registry.yarnpkg.com/vary/-/vary-1.1.2.tgz#2299f02c6ded30d4a5961b0b9f74524a18f634fc"