Files
services/feeds/parser/parser.go
2021-02-27 20:47:46 +00:00

64 lines
1.1 KiB
Go

package parser
import (
"errors"
"net/http"
"net/url"
"github.com/PuerkitoBio/goquery"
)
var (
parsers = map[string]Parser{
"a16z.com": a16zParser,
"cnbc.com": cnbcParser,
"www.cnbc.com": cnbcParser,
}
)
type Parser func(string) (string, error)
func Parse(uri string) (string, error) {
u, err := url.Parse(uri)
if err != nil {
return "", err
}
if v, ok := parsers[u.Host]; ok {
return v(uri)
}
return "", errors.New("no parser for url")
}
func classParser(class string) Parser {
return func(url string) (string, error) {
// Request the HTML page.
res, err := http.Get(url)
if err != nil {
return "", err
}
defer res.Body.Close()
if res.StatusCode != 200 {
return "", errors.New("bad status code")
}
// Load the HTML document
doc, err := goquery.NewDocumentFromReader(res.Body)
if err != nil {
return "", err
}
return doc.Find(class).Html()
}
}
func a16zParser(url string) (string, error) {
return classParser(".blog-content")(url)
}
func cnbcParser(url string) (string, error) {
return classParser(".PageBuilder-col-9")(url)
}