Files
services/rss/parser/parser.go
Asim Aslam 1a4db7c673 add rss service (#117)
* add rss service

* git ignore rss

* update readme

* go fmt

* maintain map by url

* make rss service multi-tenant
2021-05-20 14:09:56 +01:00

64 lines
1.1 KiB
Go

package parser
import (
"errors"
"net/http"
"net/url"
"github.com/PuerkitoBio/goquery"
)
var (
parsers = map[string]Parser{
"a16z.com": a16zParser,
"cnbc.com": cnbcParser,
"www.cnbc.com": cnbcParser,
}
)
type Parser func(string) (string, error)
func Parse(uri string) (string, error) {
u, err := url.Parse(uri)
if err != nil {
return "", err
}
if v, ok := parsers[u.Host]; ok {
return v(uri)
}
return "", errors.New("no parser for url")
}
func classParser(class string) Parser {
return func(url string) (string, error) {
// Request the HTML page.
res, err := http.Get(url)
if err != nil {
return "", err
}
defer res.Body.Close()
if res.StatusCode != 200 {
return "", errors.New("bad status code")
}
// Load the HTML document
doc, err := goquery.NewDocumentFromReader(res.Body)
if err != nil {
return "", err
}
return doc.Find(class).Html()
}
}
func a16zParser(url string) (string, error) {
return classParser(".blog-content")(url)
}
func cnbcParser(url string) (string, error) {
return classParser(".PageBuilder-col-9")(url)
}