Skip to content

Instantly share code, notes, and snippets.

@Pastor
Created April 23, 2025 16:25
Show Gist options
  • Save Pastor/ce616ee861146985251456ee1429633a to your computer and use it in GitHub Desktop.
Save Pastor/ce616ee861146985251456ee1429633a to your computer and use it in GitHub Desktop.
goParser
package html
import (
"golang.org/x/net/html"
"net/http"
)
type Filter func(tagName string) bool
type Extractor func(node *html.Node) Data
type Data any
type Parser interface {
Import(url string) ([]Data, error)
}
type parser struct {
filter Filter
extractor Extractor
}
func (p parser) Import(url string) ([]Data, error) {
ret, err := http.Get(url)
if err != nil {
return nil, err
}
defer ret.Body.Close()
data, err := html.Parse(ret.Body)
if err != nil {
return nil, err
}
result := make([]Data, 0)
for n := range data.Descendants() {
if n.Type == html.ElementNode && p.filter(n.Data) {
d := p.extractor(n)
if d != nil {
result = append(result, d)
}
}
}
return result, nil
}
func NewParser(filter Filter, extractor Extractor) Parser {
return &parser{
filter: filter,
extractor: extractor,
}
}
package html
import (
"fmt"
"golang.org/x/net/html"
"testing"
)
func TestParser(t *testing.T) {
p := NewParser(func(tagName string) bool {
return tagName == "img"
}, func(node *html.Node) Data {
for i := range node.Attr {
attr := node.Attr[i]
if attr.Key == "src" {
return attr.Val
}
}
return nil
})
ret, _ := p.Import("https://skillfactory.ru")
for i := range ret {
href := ret[i].(string)
fmt.Println(href)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment