Skip to content

Commit

Permalink
Add size threshold for cloning
Browse files Browse the repository at this point in the history
  • Loading branch information
robertjndw committed Sep 21, 2023
1 parent 30e5cdb commit 76c3766
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 19 deletions.
2 changes: 2 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ BITBUCKET_USERNAME=your_username
BITBUCKET_PASSWORD=your_password

USE_SSH_CLONING=false # Optional: default: false
MAX_REPO_SIZE=5000000000 # Optional: default: 5000000000 (5GB)
PARALLELISM=20 # Optional: default: 20

OUTPUT_DIR=./repos_zipped # Optional: default: ./repos_zipped
CLONE_DIR=./repos_cloned # Optional: default: ./repos_cloned
52 changes: 46 additions & 6 deletions bitbucket/project.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@ package bitbucket

import (
"bitbucket_archiver/utils"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"

log "github.com/sirupsen/logrus"
Expand Down Expand Up @@ -37,11 +38,50 @@ func (r Repo) GetHTTPRepoUrl() *string {
return r.extractRepoUrls("http")
}

func (r Repo) Delete() error {
// Create a basic authentication header
auth := utils.Cfg.BitbucketUsername + ":" + utils.Cfg.BitbucketPassword
basicAuth := "Basic " + base64.StdEncoding.EncodeToString([]byte(auth))
func (r Repo) GetSize() int64 {
sizeUrl := fmt.Sprintf("%s/projects/%s/repos/%s/sizes", utils.Cfg.BitbucketUrl, r.Project.Key, r.Slug)

req, err := http.NewRequest("GET", sizeUrl, nil)
if err != nil {
log.WithError(err).Fatal("Error creating request")
}

// Set the Authorization header for basic authentication
req.Header.Add("Authorization", basicAuth())

// Send an HTTP GET request to the URL
resp, err := client.Do(req)
if err != nil {
log.WithError(err).Fatal("Error sending GET request")
return 0
}
defer resp.Body.Close()

// Check if the response status code is 200 OK
if resp.StatusCode != http.StatusOK {
log.Error("Error: Unexpected status code:", resp.Status)
return 0
}

// Read the response body
body, err := io.ReadAll(resp.Body)
if err != nil {
log.WithError(err).Fatal("Error reading response body")
return 0
}

sizeInfo := struct {
RepositorySize int64 `json:"repository"`
}{}
err = json.Unmarshal(body, &sizeInfo)
if err != nil {
log.WithError(err).Fatal("Error unmarshalling JSON")
}

return sizeInfo.RepositorySize
}

func (r Repo) Delete() error {
bitbucketDeleteUrl := fmt.Sprintf("%s/rest/api/latest/projects/%s/repos/%s", utils.Cfg.BitbucketUrl, r.Project.Key, r.Slug)

req, err := http.NewRequest("DELETE", bitbucketDeleteUrl, nil)
Expand All @@ -50,7 +90,7 @@ func (r Repo) Delete() error {
}

// Set the Authorization header for basic authentication
req.Header.Add("Authorization", basicAuth)
req.Header.Add("Authorization", basicAuth())

// Send an HTTP GET request to the URL
resp, err := client.Do(req)
Expand Down
25 changes: 13 additions & 12 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import (
"bitbucket_archiver/utils"
"path"

"errors"
"os"
"sync"

Expand All @@ -21,9 +20,6 @@ import (
var clone_wg sync.WaitGroup

func cloneGitRepo(repo bitbucket.Repo, username, password string) error {
// Clone the repository from the given URL
defer clone_wg.Done()

destPath := path.Join(utils.Cfg.CloneDir, repo.Project.Name, repo.Name)

log.Debugf("Cloning repo: %s to: %s", repo.Name, destPath)
Expand All @@ -39,6 +35,7 @@ func cloneGitRepo(repo bitbucket.Repo, username, password string) error {

if err != nil {
log.WithError(err).Errorf("Error cloning repo: %s to: %s ", repo.Name, destPath)
return err
}

return nil
Expand All @@ -48,21 +45,25 @@ func cloneGitRepo(repo bitbucket.Repo, username, password string) error {
// This funciton limits the number of parallel clones to 30
func cloneListOfRepos(repos []bitbucket.Repo, username, password string) error {
log.Debugf("Cloning %d repos", len(repos))
// safety check
if len(repos) > 20 {
log.Error("too many repos to clone at once")
return errors.New("too many repos to clone at once")
}

clone_wg = sync.WaitGroup{}
for _, repo := range repos {
clone_wg.Add(1)
go cloneGitRepo(repo, username, password)
repo := repo
go func() {
defer clone_wg.Done()
if repo.GetSize() < utils.Cfg.MaxRepoSize {
cloneGitRepo(repo, username, password)
} else {
log.Warnf("Skipping repo %s because it is too big", repo.Name)
}
}()
}
clone_wg.Wait()
return nil
}

func main() {
log.SetFormatter(&log.TextFormatter{TimestampFormat: "02.01.2006 15:04:05", FullTimestamp: true})
if is_debug := os.Getenv("DEBUG"); is_debug == "true" {
log.SetLevel(log.DebugLevel)
log.Warn("DEBUG MODE ENABLED")
Expand Down Expand Up @@ -109,7 +110,7 @@ func main() {
log.Info("Number of repos: ", len(repos))

// Split the list of repos into chunks to limit parallelism
repoChunks := utils.Chunks(repos, 20)
repoChunks := utils.Chunks(repos, utils.Cfg.Parallelism)
for _, chunk := range repoChunks {
cloneListOfRepos(chunk, utils.Cfg.GitUsername, utils.Cfg.GitPassword)
}
Expand Down
4 changes: 3 additions & 1 deletion utils/cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ type Config struct {
BitbucketUsername string `env:"BITBUCKET_USERNAME,notEmpty"`
BitbucketPassword string `env:"BITBUCKET_PASSWORD,notEmpty"`

UseSSHCloning bool `env:"USE_SSH_CLONING" envDefault:"false"`
UseSSHCloning bool `env:"USE_SSH_CLONING" envDefault:"false"`
MaxRepoSize int64 `env:"MAX_REPO_SIZE" envDefault:"5000000000"`
Parallelism int `env:"PARALLELISM" envDefault:"20"`

OutputDir string `env:"OUTPUT_DIR" envDefault:"./repos_zipped"`
CloneDir string `env:"CLONE_DIR" envDefault:"./repos_cloned"`
Expand Down

0 comments on commit 76c3766

Please sign in to comment.