Strip redirects using regexp2

This commit is contained in:
Dave Gallant
2024-01-14 23:59:20 -05:00
parent 945c9b6dd4
commit 2575cfaa83
3 changed files with 74 additions and 3 deletions

View File

@@ -6,9 +6,11 @@ import (
"io/ioutil"
"math/rand"
"net/http"
"net/url"
"strings"
"time"
"github.com/dlclark/regexp2"
_ "github.com/joho/godotenv/autoload"
"github.com/rs/zerolog/log"
@@ -35,6 +37,12 @@ type App struct {
BasePath string
CurrentTopics []Topic
LastRefresh time.Time
Redirects []Redirect
}
type Redirect struct {
Name string `json:"name"`
Pattern string `json:"pattern"`
}
func (a *App) Initialize() {
@@ -81,10 +89,15 @@ func (a *App) listTopics(w http.ResponseWriter, r *http.Request) {
func (a *App) refreshTopics() {
for {
log.Info().Msg("Refreshing topics")
latestTopics := a.getDeals(9, 1, 6)
latestTopics = a.updateScores(latestTopics)
log.Debug().Msg("Refreshing topics")
a.CurrentTopics = latestTopics
log.Info().Msg("Refreshing redirects")
latestRedirects := a.getRedirects()
a.Redirects = latestRedirects
a.CurrentTopics = a.stripRedirects(latestTopics)
a.LastRefresh = time.Now()
rand.Seed(time.Now().UnixNano())
time.Sleep(time.Duration(rand.Intn(90-60+1)+60) * time.Second)
@@ -98,6 +111,38 @@ func (a *App) updateScores(t []Topic) []Topic {
return t
}
func (a *App) stripRedirects(t []Topic) []Topic {
for i := range t {
if t[i].Offer.Url == "" {
continue
}
var offerUrl = t[i].Offer.Url
log.Debug().Msgf("Offer url is : %s", offerUrl)
for _, r := range a.Redirects {
re := regexp2.MustCompile(r.Pattern, 0)
if m, _ := re.FindStringMatch(offerUrl); m != nil {
g := m.GroupByName("baseUrl")
if g.Name != "baseUrl" {
continue
}
decodedValue, err := url.QueryUnescape(g.String())
if err != nil {
log.Error().Msgf("%s", err)
break
}
t[i].Offer.Url = decodedValue
log.Debug().Msgf("Setting offer url to: %s", t[i].Offer.Url)
break
}
}
}
return t
}
func (a *App) isSponsor(t Topic) bool {
return strings.HasPrefix(t.Title, "[Sponsored]")
}
@@ -122,7 +167,7 @@ func (a *App) getDeals(id int, firstPage int, lastPage int) []Topic {
err = json.Unmarshal([]byte(body), &response)
if err != nil {
log.Warn().Msgf("could not unmarshal response body: %s\n %s", err)
log.Warn().Msgf("could not unmarshal response body: %s", err)
}
for _, topic := range response.Topics {
@@ -134,3 +179,26 @@ func (a *App) getDeals(id int, firstPage int, lastPage int) []Topic {
}
return t
}
func (a *App) getRedirects() []Redirect {
requestURL := fmt.Sprintf("https://raw.githubusercontent.com/davegallant/rfd-redirect-stripper/main/redirects.json")
res, err := http.Get(requestURL)
if err != nil {
log.Warn().Msgf("error fetching redirects: %s\n", err)
}
body, err := ioutil.ReadAll(res.Body)
if err != nil {
log.Warn().Msgf("could not read response body: %s\n", err)
}
var r []Redirect
err = json.Unmarshal([]byte(body), &r)
if err != nil {
log.Warn().Msgf("could not unmarshal response body: %s", err)
}
return r
}

View File

@@ -9,6 +9,7 @@ require (
)
require (
github.com/dlclark/regexp2 v1.10.0 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.19 // indirect
golang.org/x/sys v0.12.0 // indirect

View File

@@ -1,4 +1,6 @@
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0=
github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=