From 2575cfaa83fd56a88333f7e3654005be419a949b Mon Sep 17 00:00:00 2001 From: Dave Gallant Date: Sun, 14 Jan 2024 23:59:20 -0500 Subject: [PATCH] Strip redirects using regexp2 --- backend/app.go | 74 ++++++++++++++++++++++++++++++++++++++++++++++++-- backend/go.mod | 1 + backend/go.sum | 2 ++ 3 files changed, 74 insertions(+), 3 deletions(-) diff --git a/backend/app.go b/backend/app.go index d8debb2..f214474 100644 --- a/backend/app.go +++ b/backend/app.go @@ -6,9 +6,11 @@ import ( "io/ioutil" "math/rand" "net/http" + "net/url" "strings" "time" + "github.com/dlclark/regexp2" _ "github.com/joho/godotenv/autoload" "github.com/rs/zerolog/log" @@ -35,6 +37,12 @@ type App struct { BasePath string CurrentTopics []Topic LastRefresh time.Time + Redirects []Redirect +} + +type Redirect struct { + Name string `json:"name"` + Pattern string `json:"pattern"` } func (a *App) Initialize() { @@ -81,10 +89,15 @@ func (a *App) listTopics(w http.ResponseWriter, r *http.Request) { func (a *App) refreshTopics() { for { + log.Info().Msg("Refreshing topics") latestTopics := a.getDeals(9, 1, 6) latestTopics = a.updateScores(latestTopics) - log.Debug().Msg("Refreshing topics") - a.CurrentTopics = latestTopics + + log.Info().Msg("Refreshing redirects") + latestRedirects := a.getRedirects() + a.Redirects = latestRedirects + a.CurrentTopics = a.stripRedirects(latestTopics) + a.LastRefresh = time.Now() rand.Seed(time.Now().UnixNano()) time.Sleep(time.Duration(rand.Intn(90-60+1)+60) * time.Second) @@ -98,6 +111,38 @@ func (a *App) updateScores(t []Topic) []Topic { return t } +func (a *App) stripRedirects(t []Topic) []Topic { + for i := range t { + if t[i].Offer.Url == "" { + continue + } + + var offerUrl = t[i].Offer.Url + log.Debug().Msgf("Offer url is : %s", offerUrl) + for _, r := range a.Redirects { + re := regexp2.MustCompile(r.Pattern, 0) + if m, _ := re.FindStringMatch(offerUrl); m != nil { + g := m.GroupByName("baseUrl") + + if g.Name != "baseUrl" { + continue + } + decodedValue, err := url.QueryUnescape(g.String()) + if err != nil { + log.Error().Msgf("%s", err) + break + } + t[i].Offer.Url = decodedValue + log.Debug().Msgf("Setting offer url to: %s", t[i].Offer.Url) + + break + } + } + + } + return t +} + func (a *App) isSponsor(t Topic) bool { return strings.HasPrefix(t.Title, "[Sponsored]") } @@ -122,7 +167,7 @@ func (a *App) getDeals(id int, firstPage int, lastPage int) []Topic { err = json.Unmarshal([]byte(body), &response) if err != nil { - log.Warn().Msgf("could not unmarshal response body: %s\n %s", err) + log.Warn().Msgf("could not unmarshal response body: %s", err) } for _, topic := range response.Topics { @@ -134,3 +179,26 @@ func (a *App) getDeals(id int, firstPage int, lastPage int) []Topic { } return t } + +func (a *App) getRedirects() []Redirect { + + requestURL := fmt.Sprintf("https://raw.githubusercontent.com/davegallant/rfd-redirect-stripper/main/redirects.json") + res, err := http.Get(requestURL) + if err != nil { + log.Warn().Msgf("error fetching redirects: %s\n", err) + } + body, err := ioutil.ReadAll(res.Body) + if err != nil { + log.Warn().Msgf("could not read response body: %s\n", err) + } + + var r []Redirect + + err = json.Unmarshal([]byte(body), &r) + + if err != nil { + log.Warn().Msgf("could not unmarshal response body: %s", err) + } + + return r +} diff --git a/backend/go.mod b/backend/go.mod index 8b030f3..22a77aa 100644 --- a/backend/go.mod +++ b/backend/go.mod @@ -9,6 +9,7 @@ require ( ) require ( + github.com/dlclark/regexp2 v1.10.0 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.19 // indirect golang.org/x/sys v0.12.0 // indirect diff --git a/backend/go.sum b/backend/go.sum index a198c46..2cf5bb4 100644 --- a/backend/go.sum +++ b/backend/go.sum @@ -1,4 +1,6 @@ github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= +github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0= +github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=