goh/goh.go
Young Xu 703122b00b first commit
Signed-off-by: Young Xu <xuthus5@gmail.com>
2024-09-22 18:51:12 +08:00

159 lines
3.1 KiB
Go

package goh
import (
"errors"
"io"
"github.com/andybalholm/cascadia"
"golang.org/x/net/html"
)
type Parser interface {
// Find the first element that matches the selector.
Find(filter string) SingleParser
// FindAll the elements that match the selector.
FindAll(name string) MultiParser
}
type MultiParser interface {
Parser
// Attributes returns the attribute values of the elements that match
Attributes(filter, attr string) ([]string, error)
// Values returns the values of the elements that match
Values(filter string) ([]string, error)
}
type SingleParser interface {
Parser
// Attribute returns the attribute value of the first element that matches
Attribute(attr string) (string, error)
// Value returns the value of the first element that matches
Value() (string, error)
}
type parser struct {
doc *html.Node
docs []*html.Node
err error
}
func NewParser(reader io.Reader) (Parser, error) {
doc, err := html.Parse(reader)
if err != nil {
return nil, err
}
return &parser{doc: doc}, nil
}
func (p *parser) Find(filter string) SingleParser {
if p.err != nil {
return p
}
sel, err := cascadia.Parse(filter)
if err != nil {
p.err = err
return p
}
if p.doc == nil && len(p.docs) == 0 {
p.err = errors.New("no nodes found")
return p
}
if p.doc == nil && len(p.docs) > 0 {
p.doc = p.docs[0]
}
doc := cascadia.Query(p.doc, sel)
return &parser{doc: doc}
}
func (p *parser) FindAll(filter string) MultiParser {
if p.err != nil {
return p
}
sel, err := cascadia.Parse(filter)
if err != nil {
p.err = err
return p
}
docs := cascadia.QueryAll(p.doc, sel)
return &parser{docs: docs}
}
func (p *parser) Attribute(attr string) (string, error) {
if p.err != nil {
return "", p.err
}
if p.doc == nil {
return "", nil
}
for _, attribute := range p.doc.Attr {
if attribute.Key == attr {
return attribute.Val, nil
}
}
return "", errors.New("attribute not found")
}
func (p *parser) Attributes(filter, attr string) ([]string, error) {
if p.err != nil {
return nil, p.err
}
if p.docs == nil {
return nil, errors.New("no nodes found")
}
var attributes []string
sel, err := cascadia.Parse(filter)
if err != nil {
return nil, err
}
for _, doc := range p.docs {
nodes := cascadia.Query(doc, sel)
if nodes == nil {
continue
}
for _, attribute := range nodes.Attr {
if attribute.Key == attr {
attributes = append(attributes, attribute.Val)
}
}
}
return attributes, nil
}
func (p *parser) Value() (string, error) {
if p.err != nil {
return "", p.err
}
if p.doc == nil {
return "", nil
}
if p.doc.FirstChild == nil {
return "", nil
}
return p.doc.FirstChild.Data, nil
}
func (p *parser) Values(filter string) ([]string, error) {
if p.err != nil {
return nil, p.err
}
if len(p.docs) == 0 {
return nil, errors.New("no nodes found")
}
var values []string
sel, err := cascadia.Parse(filter)
if err != nil {
return nil, err
}
for _, doc := range p.docs {
nodes := cascadia.Query(doc, sel)
if nodes == nil {
continue
}
if nodes.FirstChild == nil {
continue
}
values = append(values, nodes.FirstChild.Data)
}
return values, nil
}