package goh import ( "errors" "io" "github.com/andybalholm/cascadia" "golang.org/x/net/html" ) type Parser interface { // Find the first element that matches the selector. Find(filter string) SingleParser // FindAll the elements that match the selector. FindAll(name string) MultiParser } type MultiParser interface { Parser // Attributes returns the attribute values of the elements that match Attributes(filter, attr string) ([]string, error) // Values returns the values of the elements that match Values(filter string) ([]string, error) } type SingleParser interface { Parser // Attribute returns the attribute value of the first element that matches Attribute(attr string) (string, error) // Value returns the value of the first element that matches Value() (string, error) } type parser struct { doc *html.Node docs []*html.Node err error } func NewParser(reader io.Reader) (Parser, error) { doc, err := html.Parse(reader) if err != nil { return nil, err } return &parser{doc: doc}, nil } func (p *parser) Find(filter string) SingleParser { if p.err != nil { return p } sel, err := cascadia.Parse(filter) if err != nil { p.err = err return p } if p.doc == nil && len(p.docs) == 0 { p.err = errors.New("no nodes found") return p } if p.doc == nil && len(p.docs) > 0 { p.doc = p.docs[0] } doc := cascadia.Query(p.doc, sel) return &parser{doc: doc} } func (p *parser) FindAll(filter string) MultiParser { if p.err != nil { return p } sel, err := cascadia.Parse(filter) if err != nil { p.err = err return p } docs := cascadia.QueryAll(p.doc, sel) return &parser{docs: docs} } func (p *parser) Attribute(attr string) (string, error) { if p.err != nil { return "", p.err } if p.doc == nil { return "", nil } for _, attribute := range p.doc.Attr { if attribute.Key == attr { return attribute.Val, nil } } return "", errors.New("attribute not found") } func (p *parser) Attributes(filter, attr string) ([]string, error) { if p.err != nil { return nil, p.err } if p.docs == nil { return nil, errors.New("no nodes found") } var attributes []string sel, err := cascadia.Parse(filter) if err != nil { return nil, err } for _, doc := range p.docs { nodes := cascadia.Query(doc, sel) if nodes == nil { continue } for _, attribute := range nodes.Attr { if attribute.Key == attr { attributes = append(attributes, attribute.Val) } } } return attributes, nil } func (p *parser) Value() (string, error) { if p.err != nil { return "", p.err } if p.doc == nil { return "", nil } if p.doc.FirstChild == nil { return "", nil } return p.doc.FirstChild.Data, nil } func (p *parser) Values(filter string) ([]string, error) { if p.err != nil { return nil, p.err } if len(p.docs) == 0 { return nil, errors.New("no nodes found") } var values []string sel, err := cascadia.Parse(filter) if err != nil { return nil, err } for _, doc := range p.docs { nodes := cascadia.Query(doc, sel) if nodes == nil { continue } if nodes.FirstChild == nil { continue } values = append(values, nodes.FirstChild.Data) } return values, nil }