mirror of
https://github.com/kevin-DL/services.git
synced 2026-01-23 15:51:24 +00:00
add a16z parser
This commit is contained in:
@@ -5,12 +5,13 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
|
|
||||||
"github.com/PuerkitoBio/goquery"
|
"github.com/PuerkitoBio/goquery"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
parsers = map[string]Parser{
|
parsers = map[string]Parser{
|
||||||
"cnbc.com": cnbcParser,
|
"a16z.com": a16zParser,
|
||||||
|
"cnbc.com": cnbcParser,
|
||||||
"www.cnbc.com": cnbcParser,
|
"www.cnbc.com": cnbcParser,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
@@ -29,24 +30,34 @@ func Parse(uri string) (string, error) {
|
|||||||
return "", errors.New("no parser for url")
|
return "", errors.New("no parser for url")
|
||||||
}
|
}
|
||||||
|
|
||||||
func cnbcParser(url string) (string, error) {
|
func classParser(class string) Parser {
|
||||||
// Request the HTML page.
|
return func(url string) (string, error) {
|
||||||
res, err := http.Get(url)
|
// Request the HTML page.
|
||||||
if err != nil {
|
res, err := http.Get(url)
|
||||||
return "", err
|
if err != nil {
|
||||||
}
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
defer res.Body.Close()
|
defer res.Body.Close()
|
||||||
|
|
||||||
if res.StatusCode != 200 {
|
if res.StatusCode != 200 {
|
||||||
return "", errors.New("bad status code")
|
return "", errors.New("bad status code")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load the HTML document
|
// Load the HTML document
|
||||||
doc, err := goquery.NewDocumentFromReader(res.Body)
|
doc, err := goquery.NewDocumentFromReader(res.Body)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
return doc.Find(".PageBuilder-col-9").Html()
|
return doc.Find(class).Html()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func a16zParser(url string) (string, error) {
|
||||||
|
return classParser(".blog-content")(url)
|
||||||
|
}
|
||||||
|
|
||||||
|
func cnbcParser(url string) (string, error) {
|
||||||
|
return classParser(".PageBuilder-col-9")(url)
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user