package repository import ( "fmt" "log/slog" "regexp" "slices" "strconv" "time" "github.com/go-shiori/dom" "github.com/golang-module/carbon/v2" "github.com/jmoiron/sqlx" "github.com/spf13/viper" "git.amok.space/yevhen/resource-scraper/helper/parser" "git.amok.space/yevhen/resource-scraper/pkg/repository/table" "git.amok.space/yevhen/resource-scraper/types/constant" "git.amok.space/yevhen/resource-scraper/types/model" ) type Prescene struct { db *sqlx.DB } func NewPresceneRepository(db *sqlx.DB) *Prescene { return &Prescene{db: db} } func (s *Prescene) GetPage(pageNumbers []string) ([]model.ExternalSources, error) { entries := make([]model.ExternalSources, 0) endpoint := viper.GetString(constant.CfgKeyEndpoint) //scope := viper.GetString(constant.CfgKeyScopeEnable) tags := viper.GetStringMapStringSlice("groups.tags") for _, t := range pageNumbers { if t != "1" { endpoint += fmt.Sprintf(viper.GetString(constant.CfgKeyEndpointNext), t) } doc, err := parser.HTMLSourceFromURL(endpoint) //doc, err := parser.HTMLSourceFromURL("https://mdb.amok.space/$/scnlog.html") if err != nil { slog.Error("Parse error", "err", err) continue } if doc == nil { slog.Warn("Document is nil", "err", err) continue } for _, item := range dom.QuerySelectorAll(doc, ".post.type-post.category-flac.category-music") { var es model.ExternalSources columns := []string{"`type`", "type_id", "title", "eXsource", "releaser", "created"} title := dom.QuerySelector(item, ".title") if title != nil { anchor := dom.QuerySelector(title, "h1 > a") if anchor != nil { es.Type = constant.ScopePrescene es.Title = dom.GetAttribute(anchor, "title") es.ExSource = dom.GetAttribute(anchor, "href") pattern := regexp.MustCompile(`(?is)-(\w+)$`) es.Releaser = pattern.FindStringSubmatch(es.Title)[1] for flag, groups := range tags { if slices.Contains(groups, es.Releaser) { es.A = flag es.H = flag columns = append(columns, "a", "h") break } } } if es.A == constant.TagIgnore { slog.Info("Skipped", "releaser", es.Releaser) continue } localtime := dom.QuerySelector(title, "small > span.localtime") if localtime != nil { lc := dom.GetAttribute(localtime, "data-lttime") es.Created = carbon.Parse(lc) } } cls := dom.GetAttribute(item, "class") pattern := regexp.MustCompile(`(?s)^post-(\d+)\spost`) es.TypeId, _ = strconv.Atoi(pattern.FindStringSubmatch(cls)[1]) //fmt.Println("====================== ", i, " ==============================") esModel := table.ExternalSources{Columns: columns} entry := esModel.InsertOnDuplicate(es, s.db) entries = append(entries, entry) //fmt.Printf("%+v\n", entry) } //fmt.Println("Sleeping...", j) time.Sleep(viper.GetDuration(constant.CfgKeySleepBeforeNextIteration)) } //fmt.Printf("scope: %v\n", scope) return entries, nil }