Skip to content

Commit

Permalink
Improve tor connecton
Browse files Browse the repository at this point in the history
  • Loading branch information
web-flow committed Apr 19, 2021
1 parent 4ad567f commit 15cdbe6
Show file tree
Hide file tree
Showing 5 changed files with 161 additions and 67 deletions.
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,16 @@ func main() {
// https://www.bbc.com => https://archive.li/HjqQV
```

### Access Tor Hidden Service

[archive.today](https://archive.today) providing [Tor Hidden Service](http://archivecaslytosk.onion/) to saving webpage, and it's preferred to access
Tor Hidden Service, access <http://archive.today> if Tor Hidden Service is unavailable.

By default, the program will dial a proxy using tor socks port `127.0.0.1:9050`,
use `TOR_HOST` and `TOR_SOCKS_PORT` specified a different host and port

It'll look up tor executable file if dial socks proxy failed, and start it to dial proxy.

## FAQ

### archive.today is unavailable?
Expand Down
6 changes: 5 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,12 @@ go 1.15

require (
github.com/PuerkitoBio/goquery v1.6.1
github.com/andybalholm/cascadia v1.2.0 // indirect
github.com/cretz/bine v0.1.0
github.com/stretchr/testify v1.7.0 // indirect
github.com/wabarc/helper v0.0.0-20210127120855-10af37cc2616
github.com/wabarc/helper v0.0.0-20210407153720-1bfe98b427fe
github.com/wabarc/logger v0.0.0-20210417045349-d0d82e8e99ee
golang.org/x/crypto v0.0.0-20210415154028-4f45737414dc // indirect
golang.org/x/net v0.0.0-20210415231046-e915ea6b2b7d
golang.org/x/sys v0.0.0-20210415045647-66c3f260301c // indirect
)
36 changes: 29 additions & 7 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,29 +1,51 @@
github.com/PuerkitoBio/goquery v1.6.1 h1:FgjbQZKl5HTmcn4sKBgvx8vv63nhyhIpv7lJpFGCWpk=
github.com/PuerkitoBio/goquery v1.6.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/andybalholm/cascadia v1.2.0 h1:vuRCkM5Ozh/BfmsaTm26kbjm0mIOM3yS5Ek/F5h18aE=
github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY=
github.com/cretz/bine v0.1.0 h1:1/fvhLE+fk0bPzjdO5Ci+0ComYxEMuB1JhM4X5skT3g=
github.com/cretz/bine v0.1.0/go.mod h1:6PF6fWAvYtwjRGkAuDEJeWNOv3a2hUouSP/yRYXmvHw=
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/go-internal v1.5.2/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/wabarc/helper v0.0.0-20210127120855-10af37cc2616 h1:wZ5HtpmZAVUq0Im5Sm92ycJrTeLJk5lB/Kvh55Rd+Ps=
github.com/wabarc/helper v0.0.0-20210127120855-10af37cc2616/go.mod h1:N9P4r7Rn46p4nkWtXV6ztN3p5ACVnp++bgfwjTqSxQ8=
github.com/wabarc/helper v0.0.0-20210407153720-1bfe98b427fe h1:V9yz2vQlSVLs51nlo0DAeETFOE57OvlYm98X1LKJA6U=
github.com/wabarc/helper v0.0.0-20210407153720-1bfe98b427fe/go.mod h1:TuTZtoiOu984UWOf7FfX58JllKMjq7FCz701kB5W88E=
github.com/wabarc/logger v0.0.0-20210417045349-d0d82e8e99ee h1:MMIp++7eem2CI1jIYDoPByMwXeZAjsFo2ciBNtvhB80=
github.com/wabarc/logger v0.0.0-20210417045349-d0d82e8e99ee/go.mod h1:4uYr9fnQaQoDk1ttTzLnSB3lZm3i/vrJwN8EZIB2YuI=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2 h1:VklqNMn3ovrHsnt90PveolxSbWFaJdECFbxSq0Mqo2M=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210415154028-4f45737414dc h1:+q90ECDSAQirdykUN6sPEiBXBsp8Csjcca8Oy7bgLTA=
golang.org/x/crypto v0.0.0-20210415154028-4f45737414dc/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a h1:1BGLXjeY4akVXGgbC9HugT3Jv3hCI0z56oJR5vAMgBU=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210415231046-e915ea6b2b7d h1:BgJvlyh+UqCUaPlscHJ+PN8GcpfrFdr7NHjd1JL0+Gs=
golang.org/x/net v0.0.0-20210415231046-e915ea6b2b7d/go.mod h1:9tjilg8BloeKEkVJvy7fQ90B1CfIiPueXVOjqfkSzI8=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210415045647-66c3f260301c h1:6L+uOeS3OQt/f4eFHXZcTxeZrGCuz+CLElgEBjbcTA4=
golang.org/x/sys v0.0.0-20210415045647-66c3f260301c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
mvdan.cc/xurls/v2 v2.2.0 h1:NSZPykBXJFCetGZykLAxaL6SIpvbVy/UFEniIfHAa8A=
mvdan.cc/xurls/v2 v2.2.0/go.mod h1:EV1RMtya9D6G5DMYPGD8zTQzaHet6Jh8gFlRgGRJeO8=
63 changes: 36 additions & 27 deletions is.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ var (
}
)

func init() {
if os.Getenv("DEBUG") != "" {
logger.EnableDebug()
}
}

// Wayback is the handle of saving webpages to archive.is
func (wbrc *Archiver) Wayback(links []string) (map[string]string, error) {
collects, results := make(map[string]string), make(map[string]string)
Expand All @@ -66,11 +72,16 @@ func (wbrc *Archiver) Wayback(links []string) (map[string]string, error) {
collects[link] = link
}

torClient, tor, err := newTorClient()
done := make(chan bool, 1)
torClient, err := newTorClient(done)
if err != nil {
logger.Error("%v", err)
} else {
defer tor.Close()
}

is := &IS{
wbrc: wbrc,
httpClient: &http.Client{Timeout: timeout, CheckRedirect: noRedirect},
torClient: torClient,
}

ch := make(chan string, len(collects))
Expand All @@ -81,13 +92,8 @@ func (wbrc *Archiver) Wayback(links []string) (map[string]string, error) {
for _, link := range collects {
wg.Add(1)
go func(link string) {
is := &IS{
wbrc: wbrc,
httpClient: &http.Client{Timeout: timeout, CheckRedirect: noRedirect},
torClient: torClient,
submitid: "",
}
mu.Lock()
is.submitid = ""
is.archive(link, ch)
results[link] = strings.Replace(<-ch, onion, "archive.today", 1)
mu.Unlock()
Expand All @@ -96,6 +102,9 @@ func (wbrc *Archiver) Wayback(links []string) (map[string]string, error) {
}
wg.Wait()

// Close tor connection
done <- true

if len(results) == 0 {
return results, fmt.Errorf("No results")
}
Expand All @@ -115,11 +124,16 @@ func (wbrc *Archiver) Playback(links []string) (map[string]string, error) {
return results, fmt.Errorf("No found URL")
}

torClient, tor, err := newTorClient()
done := make(chan bool, 1)
torClient, err := newTorClient(done)
if err != nil {
logger.Error("%v", err)
} else {
defer tor.Close()
}

is := &IS{
wbrc: wbrc,
httpClient: &http.Client{Timeout: timeout, CheckRedirect: noRedirect},
torClient: torClient,
}

ch := make(chan string, len(collects))
Expand All @@ -130,13 +144,8 @@ func (wbrc *Archiver) Playback(links []string) (map[string]string, error) {
for _, link := range collects {
wg.Add(1)
go func(link string) {
is := &IS{
wbrc: wbrc,
httpClient: &http.Client{Timeout: timeout, CheckRedirect: noRedirect},
torClient: torClient,
submitid: "",
}
mu.Lock()
is.submitid = ""
is.search(link, ch)
results[link] = strings.Replace(<-ch, onion, "archive.today", 1)
mu.Unlock()
Expand Down Expand Up @@ -195,6 +204,15 @@ func (is *IS) archive(uri string, ch chan<- string) {
return
}

// When use anyway parameter.
refresh := resp.Header.Get("Refresh")
if len(refresh) > 0 {
r := strings.Split(refresh, ";url=")
if len(r) == 2 {
ch <- r[1]
return
}
}
loc := resp.Header.Get("location")
if len(loc) > 2 {
ch <- loc
Expand All @@ -206,15 +224,6 @@ func (is *IS) archive(uri string, ch chan<- string) {
ch <- final
return
}
// When use anyway parameter.
refresh := resp.Header.Get("refresh")
if len(refresh) > 0 {
r := strings.Split(refresh, ";url=")
if len(r) == 2 {
ch <- r[1]
return
}
}

ch <- fmt.Sprintf("%s/timegate/%s", domain, uri)
}
Expand Down
113 changes: 81 additions & 32 deletions tor.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,51 +4,76 @@ import (
"context"
"crypto/tls"
"fmt"
// "net"
"net"
"net/http"
"os"
"os/exec"
"runtime"
"time"

"github.com/cretz/bine/tor"
// "golang.org/x/net/proxy"
"github.com/wabarc/logger"
"golang.org/x/net/proxy"
)

func newTorClient() (*http.Client, *tor.Tor, error) {
// Lookup tor executable file
if _, err := exec.LookPath("tor"); err != nil {
return nil, nil, fmt.Errorf("%w", err)
}
func newTorClient(done <-chan bool) (*http.Client, error) {
var dialer proxy.ContextDialer
if useProxy() {
// Create a socks5 dialer
pxy, err := proxy.SOCKS5("tcp", "127.0.0.1:9050", nil, proxy.Direct)
if err != nil {
return nil, fmt.Errorf("Can't connect to the proxy: %w", err)
}

// Start tor with default config
startConf := &tor.StartConf{TempDataDirBase: os.TempDir()}
t, err := tor.Start(nil, startConf)
if err != nil {
return nil, nil, fmt.Errorf("Make connection failed: %w", err)
}
// defer t.Close()
dialer = pxy.(interface {
DialContext(ctx context.Context, network, addr string) (net.Conn, error)
})
} else {
// Lookup tor executable file
if _, err := exec.LookPath("tor"); err != nil {
return nil, fmt.Errorf("%w", err)
}

// Wait at most a minute to start network and get
dialCtx, dialCancel := context.WithTimeout(context.Background(), time.Minute)
defer dialCancel()
// Start tor with default config
startConf := &tor.StartConf{TempDataDirBase: os.TempDir()}
t, err := tor.Start(nil, startConf)
if err != nil {
return nil, fmt.Errorf("Make connection failed: %w", err)
}
// defer t.Close()

// Make connection
dialer, err := t.Dialer(dialCtx, nil)
if err != nil {
t.Close()
return nil, nil, fmt.Errorf("Make connection failed: %w", err)
}
// Wait at most a minute to start network and get
dialCtx, dialCancel := context.WithTimeout(context.Background(), time.Minute)
defer dialCancel()

// Create a socks5 dialer
// pxy, err := proxy.SOCKS5("tcp", "127.0.0.1:9050", nil, proxy.Direct)
// if err != nil {
// return nil, fmt.Errorf("Can't connect to the proxy: %w", err)
// }
// Make connection
dialer, err = t.Dialer(dialCtx, nil)
if err != nil {
t.Close()
return nil, fmt.Errorf("Make connection failed: %w", err)
}

// dialer := pxy.(interface {
// DialContext(ctx context.Context, network, addr string) (net.Conn, error)
// })
go func() {
// Auto close tor client after 10 min
tick := time.NewTicker(10 * time.Minute)
for {
select {
case <-done:
logger.Debug("Closed tor client")
tick.Stop()
t.Close()
return
case <-tick.C:
logger.Debug("Closed tor client, timeout")
tick.Stop()
t.Close()
return
default:
logger.Debug("Waiting for close tor client")
}
}
}()
}

return &http.Client{
Timeout: timeout,
Expand All @@ -65,5 +90,29 @@ func newTorClient() (*http.Client, *tor.Tor, error) {
InsecureSkipVerify: true,
},
},
}, t, nil
}, nil
}

func useProxy() bool {
host := os.Getenv("TOR_HOST")
port := os.Getenv("TOR_SOCKS_PORT")
if host == "" {
host = "127.0.0.1"
}
if port == "" {
port = "9050"
}

conn, err := net.DialTimeout("tcp", net.JoinHostPort(host, port), time.Second)
if err != nil {
logger.Debug("Try to connect tor proxy failed: %v", err)
return false
}
if conn != nil {
conn.Close()
logger.Debug("Connected: %v", net.JoinHostPort(host, port))
return true
}

return false
}

0 comments on commit 15cdbe6

Please sign in to comment.