159 lines
3.1 KiB
Go
159 lines
3.1 KiB
Go
package goh
|
|
|
|
import (
|
|
"errors"
|
|
"io"
|
|
|
|
"github.com/andybalholm/cascadia"
|
|
"golang.org/x/net/html"
|
|
)
|
|
|
|
type Parser interface {
|
|
// Find the first element that matches the selector.
|
|
Find(filter string) SingleParser
|
|
// FindAll the elements that match the selector.
|
|
FindAll(name string) MultiParser
|
|
}
|
|
|
|
type MultiParser interface {
|
|
Parser
|
|
// Attributes returns the attribute values of the elements that match
|
|
Attributes(filter, attr string) ([]string, error)
|
|
// Values returns the values of the elements that match
|
|
Values(filter string) ([]string, error)
|
|
}
|
|
|
|
type SingleParser interface {
|
|
Parser
|
|
// Attribute returns the attribute value of the first element that matches
|
|
Attribute(attr string) (string, error)
|
|
// Value returns the value of the first element that matches
|
|
Value() (string, error)
|
|
}
|
|
|
|
type parser struct {
|
|
doc *html.Node
|
|
docs []*html.Node
|
|
err error
|
|
}
|
|
|
|
func NewParser(reader io.Reader) (Parser, error) {
|
|
doc, err := html.Parse(reader)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &parser{doc: doc}, nil
|
|
}
|
|
|
|
func (p *parser) Find(filter string) SingleParser {
|
|
if p.err != nil {
|
|
return p
|
|
}
|
|
sel, err := cascadia.Parse(filter)
|
|
if err != nil {
|
|
p.err = err
|
|
return p
|
|
}
|
|
if p.doc == nil && len(p.docs) == 0 {
|
|
p.err = errors.New("no nodes found")
|
|
return p
|
|
}
|
|
if p.doc == nil && len(p.docs) > 0 {
|
|
p.doc = p.docs[0]
|
|
}
|
|
doc := cascadia.Query(p.doc, sel)
|
|
return &parser{doc: doc}
|
|
}
|
|
|
|
func (p *parser) FindAll(filter string) MultiParser {
|
|
if p.err != nil {
|
|
return p
|
|
}
|
|
sel, err := cascadia.Parse(filter)
|
|
if err != nil {
|
|
p.err = err
|
|
return p
|
|
}
|
|
docs := cascadia.QueryAll(p.doc, sel)
|
|
return &parser{docs: docs}
|
|
}
|
|
|
|
func (p *parser) Attribute(attr string) (string, error) {
|
|
if p.err != nil {
|
|
return "", p.err
|
|
}
|
|
if p.doc == nil {
|
|
return "", nil
|
|
}
|
|
for _, attribute := range p.doc.Attr {
|
|
if attribute.Key == attr {
|
|
return attribute.Val, nil
|
|
}
|
|
}
|
|
return "", errors.New("attribute not found")
|
|
}
|
|
|
|
func (p *parser) Attributes(filter, attr string) ([]string, error) {
|
|
if p.err != nil {
|
|
return nil, p.err
|
|
}
|
|
if p.docs == nil {
|
|
return nil, errors.New("no nodes found")
|
|
}
|
|
var attributes []string
|
|
sel, err := cascadia.Parse(filter)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
for _, doc := range p.docs {
|
|
nodes := cascadia.Query(doc, sel)
|
|
if nodes == nil {
|
|
continue
|
|
}
|
|
for _, attribute := range nodes.Attr {
|
|
if attribute.Key == attr {
|
|
attributes = append(attributes, attribute.Val)
|
|
}
|
|
}
|
|
}
|
|
return attributes, nil
|
|
}
|
|
|
|
func (p *parser) Value() (string, error) {
|
|
if p.err != nil {
|
|
return "", p.err
|
|
}
|
|
if p.doc == nil {
|
|
return "", nil
|
|
}
|
|
if p.doc.FirstChild == nil {
|
|
return "", nil
|
|
}
|
|
return p.doc.FirstChild.Data, nil
|
|
}
|
|
|
|
func (p *parser) Values(filter string) ([]string, error) {
|
|
if p.err != nil {
|
|
return nil, p.err
|
|
}
|
|
if len(p.docs) == 0 {
|
|
return nil, errors.New("no nodes found")
|
|
}
|
|
var values []string
|
|
sel, err := cascadia.Parse(filter)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
for _, doc := range p.docs {
|
|
nodes := cascadia.Query(doc, sel)
|
|
if nodes == nil {
|
|
continue
|
|
}
|
|
if nodes.FirstChild == nil {
|
|
continue
|
|
}
|
|
values = append(values, nodes.FirstChild.Data)
|
|
}
|
|
return values, nil
|
|
}
|