155 lines
4.8 KiB
Go
155 lines
4.8 KiB
Go
package repository
|
|
|
|
import (
|
|
"fmt"
|
|
"net/url"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/emersion/go-imap/v2"
|
|
"github.com/go-shiori/dom"
|
|
"github.com/golang-module/carbon/v2"
|
|
"github.com/jmoiron/sqlx"
|
|
"github.com/spf13/viper"
|
|
"golang.org/x/net/html"
|
|
|
|
"git.amok.space/yevhen/resource-scraper/helper/parser"
|
|
"git.amok.space/yevhen/resource-scraper/helper/sugar"
|
|
"git.amok.space/yevhen/resource-scraper/internal/mail"
|
|
_table "git.amok.space/yevhen/resource-scraper/pkg/repository/table"
|
|
"git.amok.space/yevhen/resource-scraper/types/constant"
|
|
"git.amok.space/yevhen/resource-scraper/types/model"
|
|
)
|
|
|
|
type ShareTheBrutality struct {
|
|
scope string
|
|
EmailService mail.EmailService
|
|
db *sqlx.DB
|
|
}
|
|
|
|
func NewShareTheBrutalityRepository(db *sqlx.DB) *ShareTheBrutality {
|
|
return &ShareTheBrutality{db: db, scope: constant.ScopeShareTheBrutality}
|
|
}
|
|
|
|
func (s *ShareTheBrutality) GetMail(email string) ([]model.ExternalSources, *mail.EmailService) {
|
|
s.EmailService = mail.EmailService{
|
|
User: email,
|
|
}
|
|
|
|
s.EmailService.Login()
|
|
|
|
mailboxes := fmt.Sprintf("%s.mailboxes", s.scope)
|
|
criteria := fmt.Sprintf("%s.search-criteria", s.scope)
|
|
searchCriteria := &imap.SearchCriteria{
|
|
Text: viper.GetStringSlice(criteria),
|
|
}
|
|
//now := carbon.Now()
|
|
s.EmailService.ListMessages(viper.GetStringSlice(mailboxes), searchCriteria)
|
|
//box.CreateMailbox("INBOX/Processed")
|
|
//s.EmailService.CreateMailbox("Processed/Succeed")
|
|
//s.EmailService.CreateMailbox("Processed/Failed")
|
|
//s.EmailService.CreateMailbox("Processed/Suspicious")
|
|
//s.EmailService.MailboxesList()
|
|
|
|
entries := s.Processing(viper.GetStringMapString(fmt.Sprintf("%s.sender", s.scope)))
|
|
|
|
return entries, &s.EmailService
|
|
}
|
|
|
|
func (s *ShareTheBrutality) Processing(sender map[string]string) []model.ExternalSources {
|
|
columns := []string{"`type`", "type_id", "title", "type_subsection_id", "releaser", "created", "fingerprint"}
|
|
entriesBatched := make([]model.ExternalSources, 0)
|
|
if len(s.EmailService.Messages) == 0 {
|
|
return entriesBatched
|
|
}
|
|
|
|
//tmpPath := viper.GetString(fmt.Sprintf("%s.storage.filepath", s.scope))
|
|
dbType := viper.GetString(fmt.Sprintf("%s.db-type", s.scope))
|
|
regexPatterns := viper.GetStringMapString(fmt.Sprintf("%s.regex", s.scope))
|
|
topics := viper.GetStringMap(fmt.Sprintf("%s.topics", s.scope))
|
|
|
|
for _, msg := range s.EmailService.Messages {
|
|
entries := make([]model.ExternalSources, 0)
|
|
from := msg.Envelope.From[0]
|
|
subject := msg.Envelope.Subject
|
|
|
|
if !(from.Mailbox == sender["mailbox"] && from.Host == sender["host"] && subject == sender["subject"]) {
|
|
continue
|
|
}
|
|
|
|
for _, section := range msg.BodySection {
|
|
//sugar.WriteDataToTmpFile(msg.BodySection, tmpPath)
|
|
|
|
if section.Bytes != nil {
|
|
doc, err := parser.HTMLSource(string(section.Bytes))
|
|
if s.EmailService.CheckErr("parsing message body", err) {
|
|
continue
|
|
}
|
|
|
|
table := dom.QuerySelector(doc, "body > table:nth-of-type(1n) table:nth-of-type(1n) table:nth-of-type(2n) > tbody")
|
|
if table == nil {
|
|
s.EmailService.Warn("dom.QuerySelector had not queried any data, returned nil")
|
|
continue
|
|
}
|
|
|
|
var es model.ExternalSources
|
|
for _, td := range dom.QuerySelectorAll(table, "tr > td:nth-child(2)") {
|
|
anchor := dom.QuerySelector(td, "h2 > a")
|
|
if anchor == nil {
|
|
s.EmailService.Warn("dom.QuerySelector couldn't find title")
|
|
continue
|
|
}
|
|
es.Title = sugar.SqueezeLine(dom.InnerHTML(anchor))
|
|
|
|
u, err := url.Parse(dom.GetAttribute(anchor, "href"))
|
|
if s.EmailService.CheckErr("parsing url", err) {
|
|
continue
|
|
}
|
|
|
|
es.Fingerprint = u.RequestURI()
|
|
pattern := regexp.MustCompile(regexPatterns["type-id"])
|
|
typeIdMatch := pattern.FindStringSubmatch(es.Fingerprint)
|
|
if len(typeIdMatch) != 2 {
|
|
s.EmailService.Warn("Regexp => typeIdMatch not matched")
|
|
continue
|
|
}
|
|
es.TypeId, _ = strconv.Atoi(typeIdMatch[1])
|
|
|
|
sourceData := dom.QuerySelector(td, "p:first-child")
|
|
if sourceData == nil {
|
|
s.EmailService.Warn("dom.QuerySelector couldn't find sourceData in paragraph")
|
|
continue
|
|
}
|
|
sourceDataString := html.UnescapeString(sugar.SqueezeLine(dom.InnerHTML(sourceData)))
|
|
pattern = regexp.MustCompile(regexPatterns["who-genre"])
|
|
sourceDataMatch := pattern.FindStringSubmatch(sourceDataString)
|
|
|
|
if len(sourceDataMatch) != 3 {
|
|
s.EmailService.Warn("Regexp => sourceData not matched")
|
|
continue
|
|
}
|
|
|
|
es.Releaser = sourceDataMatch[1]
|
|
es.Created = carbon.Parse(msg.Envelope.Date.String())
|
|
es.Type = dbType
|
|
|
|
genre := strings.ToLower(sourceDataMatch[2])
|
|
es.TypeSubsectionId = topics[genre].(int)
|
|
|
|
entries = append(entries, es)
|
|
}
|
|
|
|
result, status := _table.BatchInsertOnDuplicate(entries, s.db, columns)
|
|
if status != constant.StatusFailed {
|
|
entriesBatched = append(entriesBatched, result...)
|
|
}
|
|
|
|
s.EmailService.MoveMessageToMailbox(msg, status)
|
|
}
|
|
}
|
|
}
|
|
|
|
return entriesBatched
|
|
}
|