mirror of
https://github.com/kevin-DL/services.git
synced 2026-01-18 13:45:09 +00:00
add parser to feeds service
This commit is contained in:
@@ -8,6 +8,7 @@ import (
|
||||
"net/url"
|
||||
|
||||
"github.com/SlyMarbo/rss"
|
||||
"github.com/micro/services/feeds/parser"
|
||||
log "github.com/micro/micro/v3/service/logger"
|
||||
feeds "github.com/micro/services/feeds/proto"
|
||||
posts "github.com/micro/services/posts/proto"
|
||||
@@ -69,8 +70,15 @@ func (e *Feeds) fetch(f *feeds.Feed) error {
|
||||
content = item.Summary
|
||||
}
|
||||
|
||||
// if we have a parser which returns content use it
|
||||
// e.g cnbc
|
||||
c, err := parser.Parse(item.Link)
|
||||
if err == nil && len(c) > 0 {
|
||||
content = c
|
||||
}
|
||||
|
||||
// @todo make this optional
|
||||
_, err := e.postsService.Save(context.TODO(), &posts.SaveRequest{
|
||||
_, err = e.postsService.Save(context.TODO(), &posts.SaveRequest{
|
||||
Id: id,
|
||||
Title: item.Title,
|
||||
Content: content,
|
||||
|
||||
52
feeds/parser/parser.go
Normal file
52
feeds/parser/parser.go
Normal file
@@ -0,0 +1,52 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"net/http"
|
||||
"net/url"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
var (
|
||||
parsers = map[string]Parser{
|
||||
"cnbc.com": cnbcParser,
|
||||
"www.cnbc.com": cnbcParser,
|
||||
}
|
||||
)
|
||||
|
||||
type Parser func(string) (string, error)
|
||||
|
||||
func Parse(uri string) (string, error) {
|
||||
u, err := url.Parse(uri)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if v, ok := parsers[u.Host]; ok {
|
||||
return v(uri)
|
||||
}
|
||||
return "", errors.New("no parser for url")
|
||||
}
|
||||
|
||||
func cnbcParser(url string) (string, error) {
|
||||
// Request the HTML page.
|
||||
res, err := http.Get(url)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
defer res.Body.Close()
|
||||
|
||||
if res.StatusCode != 200 {
|
||||
return "", errors.New("bad status code")
|
||||
}
|
||||
|
||||
// Load the HTML document
|
||||
doc, err := goquery.NewDocumentFromReader(res.Body)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return doc.Find(".PageBuilder-col-9").Html()
|
||||
}
|
||||
Reference in New Issue
Block a user