mirror of
https://github.com/kevin-DL/services.git
synced 2026-01-16 21:14:36 +00:00
add a16z parser
This commit is contained in:
@@ -5,12 +5,13 @@ import (
|
||||
"net/http"
|
||||
"net/url"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
var (
|
||||
parsers = map[string]Parser{
|
||||
"cnbc.com": cnbcParser,
|
||||
"a16z.com": a16zParser,
|
||||
"cnbc.com": cnbcParser,
|
||||
"www.cnbc.com": cnbcParser,
|
||||
}
|
||||
)
|
||||
@@ -29,24 +30,34 @@ func Parse(uri string) (string, error) {
|
||||
return "", errors.New("no parser for url")
|
||||
}
|
||||
|
||||
func cnbcParser(url string) (string, error) {
|
||||
// Request the HTML page.
|
||||
res, err := http.Get(url)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
func classParser(class string) Parser {
|
||||
return func(url string) (string, error) {
|
||||
// Request the HTML page.
|
||||
res, err := http.Get(url)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
defer res.Body.Close()
|
||||
defer res.Body.Close()
|
||||
|
||||
if res.StatusCode != 200 {
|
||||
return "", errors.New("bad status code")
|
||||
}
|
||||
if res.StatusCode != 200 {
|
||||
return "", errors.New("bad status code")
|
||||
}
|
||||
|
||||
// Load the HTML document
|
||||
doc, err := goquery.NewDocumentFromReader(res.Body)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
// Load the HTML document
|
||||
doc, err := goquery.NewDocumentFromReader(res.Body)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return doc.Find(".PageBuilder-col-9").Html()
|
||||
return doc.Find(class).Html()
|
||||
}
|
||||
}
|
||||
|
||||
func a16zParser(url string) (string, error) {
|
||||
return classParser(".blog-content")(url)
|
||||
}
|
||||
|
||||
func cnbcParser(url string) (string, error) {
|
||||
return classParser(".PageBuilder-col-9")(url)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user