mirror of
https://github.com/kevin-DL/services.git
synced 2026-01-11 19:04:35 +00:00
add parser to feeds service
This commit is contained in:
@@ -8,6 +8,7 @@ import (
|
||||
"net/url"
|
||||
|
||||
"github.com/SlyMarbo/rss"
|
||||
"github.com/micro/services/feeds/parser"
|
||||
log "github.com/micro/micro/v3/service/logger"
|
||||
feeds "github.com/micro/services/feeds/proto"
|
||||
posts "github.com/micro/services/posts/proto"
|
||||
@@ -69,8 +70,15 @@ func (e *Feeds) fetch(f *feeds.Feed) error {
|
||||
content = item.Summary
|
||||
}
|
||||
|
||||
// if we have a parser which returns content use it
|
||||
// e.g cnbc
|
||||
c, err := parser.Parse(item.Link)
|
||||
if err == nil && len(c) > 0 {
|
||||
content = c
|
||||
}
|
||||
|
||||
// @todo make this optional
|
||||
_, err := e.postsService.Save(context.TODO(), &posts.SaveRequest{
|
||||
_, err = e.postsService.Save(context.TODO(), &posts.SaveRequest{
|
||||
Id: id,
|
||||
Title: item.Title,
|
||||
Content: content,
|
||||
|
||||
52
feeds/parser/parser.go
Normal file
52
feeds/parser/parser.go
Normal file
@@ -0,0 +1,52 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"net/http"
|
||||
"net/url"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
var (
|
||||
parsers = map[string]Parser{
|
||||
"cnbc.com": cnbcParser,
|
||||
"www.cnbc.com": cnbcParser,
|
||||
}
|
||||
)
|
||||
|
||||
type Parser func(string) (string, error)
|
||||
|
||||
func Parse(uri string) (string, error) {
|
||||
u, err := url.Parse(uri)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if v, ok := parsers[u.Host]; ok {
|
||||
return v(uri)
|
||||
}
|
||||
return "", errors.New("no parser for url")
|
||||
}
|
||||
|
||||
func cnbcParser(url string) (string, error) {
|
||||
// Request the HTML page.
|
||||
res, err := http.Get(url)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
defer res.Body.Close()
|
||||
|
||||
if res.StatusCode != 200 {
|
||||
return "", errors.New("bad status code")
|
||||
}
|
||||
|
||||
// Load the HTML document
|
||||
doc, err := goquery.NewDocumentFromReader(res.Body)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return doc.Find(".PageBuilder-col-9").Html()
|
||||
}
|
||||
1
go.mod
1
go.mod
@@ -4,6 +4,7 @@ go 1.14
|
||||
|
||||
require (
|
||||
github.com/Masterminds/semver/v3 v3.1.1
|
||||
github.com/PuerkitoBio/goquery v1.6.1
|
||||
github.com/SlyMarbo/rss v1.0.1
|
||||
github.com/getkin/kin-openapi v0.26.0
|
||||
github.com/golang/protobuf v1.4.3
|
||||
|
||||
5
go.sum
5
go.sum
@@ -49,6 +49,8 @@ github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym
|
||||
github.com/Masterminds/semver/v3 v3.1.1 h1:hLg3sBzpNErnxhQtUy/mmLR2I9foDujNK030IGemrRc=
|
||||
github.com/Masterminds/semver/v3 v3.1.1/go.mod h1:VPu/7SZ7ePZ3QOrcuXROw5FAcLl4a0cBrbBpGY/8hQs=
|
||||
github.com/OpenDNS/vegadns2client v0.0.0-20180418235048-a3fa4a771d87/go.mod h1:iGLljf5n9GjT6kc0HBvyI1nOKnGQbNB66VzSNbK5iks=
|
||||
github.com/PuerkitoBio/goquery v1.6.1 h1:FgjbQZKl5HTmcn4sKBgvx8vv63nhyhIpv7lJpFGCWpk=
|
||||
github.com/PuerkitoBio/goquery v1.6.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
|
||||
github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo=
|
||||
github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI=
|
||||
github.com/SlyMarbo/rss v1.0.1 h1:fiaIU5UhcXauVOniHOIocWG7uj8Ej6pHNarMGPJilzA=
|
||||
@@ -58,6 +60,8 @@ github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuy
|
||||
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
||||
github.com/aliyun/alibaba-cloud-sdk-go v0.0.0-20190808125512-07798873deee/go.mod h1:myCDvQSzCW+wB1WAlocEru4wMGJxy+vlxHdhegi1CDQ=
|
||||
github.com/aliyun/aliyun-oss-go-sdk v0.0.0-20190307165228-86c17b95fcd5/go.mod h1:T/Aws4fEfogEE9v+HPhhw+CntffsBHJ8nXQCwKr0/g8=
|
||||
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
|
||||
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
|
||||
github.com/apache/thrift v0.12.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ=
|
||||
github.com/aws/aws-sdk-go v1.23.0/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo=
|
||||
github.com/axgle/mahonia v0.0.0-20180208002826-3358181d7394 h1:OYA+5W64v3OgClL+IrOD63t4i/RW7RqrAVl9LTZ9UqQ=
|
||||
@@ -519,6 +523,7 @@ golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzB
|
||||
golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
|
||||
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20180611182652-db08ff08e862/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
|
||||
Reference in New Issue
Block a user