resource-scraper/helper/parser/html.go

66 lines
1.2 KiB
Go

package parser
import (
"errors"
"io"
"log"
"log/slog"
"net/http"
"strconv"
"strings"
"golang.org/x/net/html"
)
func setClient(url string) (*http.Response, error) {
client := &http.Client{}
req, err := http.NewRequest(http.MethodGet, url, nil)
if err != nil {
log.Fatalln(err)
}
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36")
return client.Do(req)
}
func HTMLSourceFromURL(url string) (*html.Node, error) {
resp, err := setClient(url)
if resp == nil {
slog.Error("client return nil response", "err", err)
return nil, err
}
defer func(Body io.ReadCloser) {
err = Body.Close()
if err != nil {
slog.Error("closing response body", "err", err)
}
}(resp.Body)
if err != nil {
return nil, err
}
if resp.StatusCode >= 400 {
return nil, errors.New("http status code: " + strconv.Itoa(resp.StatusCode))
}
doc, err := html.Parse(resp.Body)
if err != nil {
return nil, err
}
return doc, nil
}
func HTMLSource(htmlSource string) (*html.Node, error) {
doc, err := html.Parse(strings.NewReader(htmlSource))
if err != nil {
return nil, err
}
return doc, nil
}
//https://ahmadrosid.com/blog/how-to-query-html-dom-in-golang