add a16z parser

This commit is contained in:
Asim Aslam
2021-02-27 20:47:46 +00:00
parent efee90ab12
commit f800662806

View File

@@ -5,12 +5,13 @@ import (
"net/http" "net/http"
"net/url" "net/url"
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
) )
var ( var (
parsers = map[string]Parser{ parsers = map[string]Parser{
"cnbc.com": cnbcParser, "a16z.com": a16zParser,
"cnbc.com": cnbcParser,
"www.cnbc.com": cnbcParser, "www.cnbc.com": cnbcParser,
} }
) )
@@ -29,24 +30,34 @@ func Parse(uri string) (string, error) {
return "", errors.New("no parser for url") return "", errors.New("no parser for url")
} }
func cnbcParser(url string) (string, error) { func classParser(class string) Parser {
// Request the HTML page. return func(url string) (string, error) {
res, err := http.Get(url) // Request the HTML page.
if err != nil { res, err := http.Get(url)
return "", err if err != nil {
} return "", err
}
defer res.Body.Close() defer res.Body.Close()
if res.StatusCode != 200 { if res.StatusCode != 200 {
return "", errors.New("bad status code") return "", errors.New("bad status code")
} }
// Load the HTML document // Load the HTML document
doc, err := goquery.NewDocumentFromReader(res.Body) doc, err := goquery.NewDocumentFromReader(res.Body)
if err != nil { if err != nil {
return "", err return "", err
} }
return doc.Find(".PageBuilder-col-9").Html() return doc.Find(class).Html()
}
}
func a16zParser(url string) (string, error) {
return classParser(".blog-content")(url)
}
func cnbcParser(url string) (string, error) {
return classParser(".PageBuilder-col-9")(url)
} }