66 lines
1.2 KiB
Go
66 lines
1.2 KiB
Go
package parser
|
|
|
|
import (
|
|
"errors"
|
|
"io"
|
|
"log"
|
|
"log/slog"
|
|
"net/http"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"golang.org/x/net/html"
|
|
)
|
|
|
|
func setClient(url string) (*http.Response, error) {
|
|
client := &http.Client{}
|
|
req, err := http.NewRequest(http.MethodGet, url, nil)
|
|
if err != nil {
|
|
log.Fatalln(err)
|
|
}
|
|
|
|
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36")
|
|
|
|
return client.Do(req)
|
|
}
|
|
|
|
func HTMLSourceFromURL(url string) (*html.Node, error) {
|
|
resp, err := setClient(url)
|
|
if resp == nil {
|
|
slog.Error("client return nil response", "err", err)
|
|
return nil, err
|
|
}
|
|
|
|
defer func(Body io.ReadCloser) {
|
|
err = Body.Close()
|
|
if err != nil {
|
|
slog.Error("closing response body", "err", err)
|
|
}
|
|
}(resp.Body)
|
|
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if resp.StatusCode >= 400 {
|
|
return nil, errors.New("http status code: " + strconv.Itoa(resp.StatusCode))
|
|
}
|
|
|
|
doc, err := html.Parse(resp.Body)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return doc, nil
|
|
}
|
|
|
|
func HTMLSource(htmlSource string) (*html.Node, error) {
|
|
doc, err := html.Parse(strings.NewReader(htmlSource))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return doc, nil
|
|
}
|
|
|
|
//https://ahmadrosid.com/blog/how-to-query-html-dom-in-golang
|