Strip redirects using regexp2

This commit is contained in:
Dave Gallant
2024-01-14 23:59:20 -05:00
parent 945c9b6dd4
commit 2575cfaa83
3 changed files with 74 additions and 3 deletions

View File

@@ -6,9 +6,11 @@ import (
"io/ioutil" "io/ioutil"
"math/rand" "math/rand"
"net/http" "net/http"
"net/url"
"strings" "strings"
"time" "time"
"github.com/dlclark/regexp2"
_ "github.com/joho/godotenv/autoload" _ "github.com/joho/godotenv/autoload"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
@@ -35,6 +37,12 @@ type App struct {
BasePath string BasePath string
CurrentTopics []Topic CurrentTopics []Topic
LastRefresh time.Time LastRefresh time.Time
Redirects []Redirect
}
type Redirect struct {
Name string `json:"name"`
Pattern string `json:"pattern"`
} }
func (a *App) Initialize() { func (a *App) Initialize() {
@@ -81,10 +89,15 @@ func (a *App) listTopics(w http.ResponseWriter, r *http.Request) {
func (a *App) refreshTopics() { func (a *App) refreshTopics() {
for { for {
log.Info().Msg("Refreshing topics")
latestTopics := a.getDeals(9, 1, 6) latestTopics := a.getDeals(9, 1, 6)
latestTopics = a.updateScores(latestTopics) latestTopics = a.updateScores(latestTopics)
log.Debug().Msg("Refreshing topics")
a.CurrentTopics = latestTopics log.Info().Msg("Refreshing redirects")
latestRedirects := a.getRedirects()
a.Redirects = latestRedirects
a.CurrentTopics = a.stripRedirects(latestTopics)
a.LastRefresh = time.Now() a.LastRefresh = time.Now()
rand.Seed(time.Now().UnixNano()) rand.Seed(time.Now().UnixNano())
time.Sleep(time.Duration(rand.Intn(90-60+1)+60) * time.Second) time.Sleep(time.Duration(rand.Intn(90-60+1)+60) * time.Second)
@@ -98,6 +111,38 @@ func (a *App) updateScores(t []Topic) []Topic {
return t return t
} }
func (a *App) stripRedirects(t []Topic) []Topic {
for i := range t {
if t[i].Offer.Url == "" {
continue
}
var offerUrl = t[i].Offer.Url
log.Debug().Msgf("Offer url is : %s", offerUrl)
for _, r := range a.Redirects {
re := regexp2.MustCompile(r.Pattern, 0)
if m, _ := re.FindStringMatch(offerUrl); m != nil {
g := m.GroupByName("baseUrl")
if g.Name != "baseUrl" {
continue
}
decodedValue, err := url.QueryUnescape(g.String())
if err != nil {
log.Error().Msgf("%s", err)
break
}
t[i].Offer.Url = decodedValue
log.Debug().Msgf("Setting offer url to: %s", t[i].Offer.Url)
break
}
}
}
return t
}
func (a *App) isSponsor(t Topic) bool { func (a *App) isSponsor(t Topic) bool {
return strings.HasPrefix(t.Title, "[Sponsored]") return strings.HasPrefix(t.Title, "[Sponsored]")
} }
@@ -122,7 +167,7 @@ func (a *App) getDeals(id int, firstPage int, lastPage int) []Topic {
err = json.Unmarshal([]byte(body), &response) err = json.Unmarshal([]byte(body), &response)
if err != nil { if err != nil {
log.Warn().Msgf("could not unmarshal response body: %s\n %s", err) log.Warn().Msgf("could not unmarshal response body: %s", err)
} }
for _, topic := range response.Topics { for _, topic := range response.Topics {
@@ -134,3 +179,26 @@ func (a *App) getDeals(id int, firstPage int, lastPage int) []Topic {
} }
return t return t
} }
func (a *App) getRedirects() []Redirect {
requestURL := fmt.Sprintf("https://raw.githubusercontent.com/davegallant/rfd-redirect-stripper/main/redirects.json")
res, err := http.Get(requestURL)
if err != nil {
log.Warn().Msgf("error fetching redirects: %s\n", err)
}
body, err := ioutil.ReadAll(res.Body)
if err != nil {
log.Warn().Msgf("could not read response body: %s\n", err)
}
var r []Redirect
err = json.Unmarshal([]byte(body), &r)
if err != nil {
log.Warn().Msgf("could not unmarshal response body: %s", err)
}
return r
}

View File

@@ -9,6 +9,7 @@ require (
) )
require ( require (
github.com/dlclark/regexp2 v1.10.0 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.19 // indirect github.com/mattn/go-isatty v0.0.19 // indirect
golang.org/x/sys v0.12.0 // indirect golang.org/x/sys v0.12.0 // indirect

View File

@@ -1,4 +1,6 @@
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0=
github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ= github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=