package repository import ( "fmt" "net/url" "regexp" "strconv" "strings" "github.com/emersion/go-imap/v2" "github.com/go-shiori/dom" "github.com/golang-module/carbon/v2" "github.com/jmoiron/sqlx" "github.com/spf13/viper" "golang.org/x/net/html" "git.amok.space/yevhen/resource-scraper/helper/parser" "git.amok.space/yevhen/resource-scraper/helper/sugar" "git.amok.space/yevhen/resource-scraper/internal/mail" _table "git.amok.space/yevhen/resource-scraper/pkg/repository/table" "git.amok.space/yevhen/resource-scraper/types/constant" "git.amok.space/yevhen/resource-scraper/types/model" ) type ShareTheBrutality struct { scope string EmailService mail.EmailService db *sqlx.DB } func NewShareTheBrutalityRepository(db *sqlx.DB) *ShareTheBrutality { return &ShareTheBrutality{db: db, scope: constant.ScopeShareTheBrutality} } func (s *ShareTheBrutality) GetMail(email string) ([]model.ExternalSources, *mail.EmailService) { s.EmailService = mail.EmailService{ User: email, } s.EmailService.Login() mailboxes := fmt.Sprintf("%s.mailboxes", s.scope) criteria := fmt.Sprintf("%s.search-criteria", s.scope) searchCriteria := &imap.SearchCriteria{ Text: viper.GetStringSlice(criteria), } //now := carbon.Now() s.EmailService.ListMessages(viper.GetStringSlice(mailboxes), searchCriteria) //box.CreateMailbox("INBOX/Processed") //s.EmailService.CreateMailbox("Processed/Succeed") //s.EmailService.CreateMailbox("Processed/Failed") //s.EmailService.CreateMailbox("Processed/Suspicious") //s.EmailService.MailboxesList() entries := s.Processing(viper.GetStringMapString(fmt.Sprintf("%s.sender", s.scope))) return entries, &s.EmailService } func (s *ShareTheBrutality) Processing(sender map[string]string) []model.ExternalSources { columns := []string{"`type`", "type_id", "title", "type_subsection_id", "releaser", "created", "fingerprint"} entriesBatched := make([]model.ExternalSources, 0) if len(s.EmailService.Messages) == 0 { return entriesBatched } //tmpPath := viper.GetString(fmt.Sprintf("%s.storage.filepath", s.scope)) dbType := viper.GetString(fmt.Sprintf("%s.db-type", s.scope)) regexPatterns := viper.GetStringMapString(fmt.Sprintf("%s.regex", s.scope)) topics := viper.GetStringMap(fmt.Sprintf("%s.topics", s.scope)) for _, msg := range s.EmailService.Messages { entries := make([]model.ExternalSources, 0) from := msg.Envelope.From[0] subject := msg.Envelope.Subject if !(from.Mailbox == sender["mailbox"] && from.Host == sender["host"] && subject == sender["subject"]) { continue } for _, section := range msg.BodySection { //sugar.WriteDataToTmpFile(msg.BodySection, tmpPath) if section.Bytes != nil { doc, err := parser.HTMLSource(string(section.Bytes)) if s.EmailService.CheckErr("parsing message body", err) { continue } table := dom.QuerySelector(doc, "body > table:nth-of-type(1n) table:nth-of-type(1n) table:nth-of-type(2n) > tbody") if table == nil { s.EmailService.Warn("dom.QuerySelector had not queried any data, returned nil") continue } var es model.ExternalSources for _, td := range dom.QuerySelectorAll(table, "tr > td:nth-child(2)") { anchor := dom.QuerySelector(td, "h2 > a") if anchor == nil { s.EmailService.Warn("dom.QuerySelector couldn't find title") continue } es.Title = sugar.SqueezeLine(dom.InnerHTML(anchor)) u, err := url.Parse(dom.GetAttribute(anchor, "href")) if s.EmailService.CheckErr("parsing url", err) { continue } es.Fingerprint = u.RequestURI() pattern := regexp.MustCompile(regexPatterns["type-id"]) typeIdMatch := pattern.FindStringSubmatch(es.Fingerprint) if len(typeIdMatch) != 2 { s.EmailService.Warn("Regexp => typeIdMatch not matched") continue } es.TypeId, _ = strconv.Atoi(typeIdMatch[1]) sourceData := dom.QuerySelector(td, "p:first-child") if sourceData == nil { s.EmailService.Warn("dom.QuerySelector couldn't find sourceData in paragraph") continue } sourceDataString := html.UnescapeString(sugar.SqueezeLine(dom.InnerHTML(sourceData))) pattern = regexp.MustCompile(regexPatterns["who-genre"]) sourceDataMatch := pattern.FindStringSubmatch(sourceDataString) if len(sourceDataMatch) != 3 { s.EmailService.Warn("Regexp => sourceData not matched") continue } es.Releaser = sourceDataMatch[1] es.Created = carbon.Parse(msg.Envelope.Date.String()) es.Type = dbType genre := strings.ToLower(sourceDataMatch[2]) es.TypeSubsectionId = topics[genre].(int) entries = append(entries, es) } result, status := _table.BatchInsertOnDuplicate(entries, s.db, columns) if status != constant.StatusFailed { entriesBatched = append(entriesBatched, result...) } s.EmailService.MoveMessageToMailbox(msg, status) } } } return entriesBatched }