From b8d5191cc9b49af0703ede60a2395839b392a63d Mon Sep 17 00:00:00 2001 From: Ryan Gonzalez Date: Tue, 5 Sep 2023 15:22:24 -0500 Subject: [PATCH 1/7] Use github.com/saracen/walker for file walk operations In some local tests w/ a slowed down filesystem, this massively cut down on the time to clean up a repository by ~3x, bringing a total 'publish update' time from ~16s to ~13s. Signed-off-by: Ryan Gonzalez --- api/files.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/api/files.go b/api/files.go index 7ae682f85..eea5a169d 100644 --- a/api/files.go +++ b/api/files.go @@ -131,11 +131,7 @@ func apiFilesListFiles(c *gin.Context) { listLock := &sync.Mutex{} root := filepath.Join(context.UploadPath(), utils.SanitizePath(c.Params.ByName("dir"))) - err := filepath.Walk(root, func(path string, _ os.FileInfo, err error) error { - if err != nil { - return err - } - + err := walker.Walk(root, func(path string, info os.FileInfo) error { if path == root { return nil } From 23be063dec2cff50496552dd75b9b854fc3e31c6 Mon Sep 17 00:00:00 2001 From: Ryan Gonzalez Date: Fri, 10 Nov 2023 17:01:16 -0600 Subject: [PATCH 2/7] Split reflists to share their contents across snapshots In current aptly, each repository and snapshot has its own reflist in the database. This brings a few problems with it: - Given a sufficiently large repositories and snapshots, these lists can get enormous, reaching >1MB. This is a problem for LevelDB's overall performance, as it tends to prefer values around the confiruged block size (defaults to just 4KiB). - When you take these large repositories and snapshot them, you have a full, new copy of the reflist, even if only a few packages changed. This means that having a lot of snapshots with a few changes causes the database to basically be full of largely duplicate reflists. - All the duplication also means that many of the same refs are being loaded repeatedly, which can cause some slowdown but, more notably, eats up huge amounts of memory. - Adding on more and more new repositories and snapshots will cause the time and memory spent on things like cleanup and publishing to grow roughly linearly. At the core, there are two problems here: - Reflists get very big because there are just a lot of packages. - Different reflists can tend to duplicate much of the same contents. *Split reflists* aim at solving this by separating reflists into 64 *buckets*. Package refs are sorted into individual buckets according to the following system: - Take the first 3 letters of the package name, after dropping a `lib` prefix. (Using only the first 3 letters will cause packages with similar prefixes to end up in the same bucket, under the assumption that packages with similar names tend to be updated together.) - Take the 64-bit xxhash of these letters. (xxhash was chosen because it relatively good distribution across the individual bits, which is important for the next step.) - Use the first 6 bits of the hash (range [0:63]) as an index into the buckets. Once refs are placed in buckets, a sha256 digest of all the refs in the bucket is taken. These buckets are then stored in the database, split into roughly block-sized segments, and all the repositories and snapshots simply store an array of bucket digests. This approach means that *repositories and snapshots can share their reflist buckets*. If a snapshot is taken of a repository, it will have the same contents, so its split reflist will point to the same buckets as the base repository, and only one copy of each bucket is stored in the database. When some packages in the repository change, only the buckets containing those packages will be modified; all the other buckets will remain unchanged, and thus their contents will still be shared. Later on, when these reflists are loaded, each bucket is only loaded once, short-cutting loaded many megabytes of data. In effect, split reflists are essentially copy-on-write, with only the changed buckets stored individually. Changing the disk format means that a migration needs to take place, so that task is moved into the database cleanup step, which will migrate reflists over to split reflists, as well as delete any unused reflist buckets. All the reflist tests are also changed to additionally test out split reflists; although the internal logic is all shared (since buckets are, themselves, just normal reflists), some special additions are needed to have native versions of the various reflist helper methods. In our tests, we've observed the following improvements: - Memory usage during publish and database cleanup, with `GOMEMLIMIT=2GiB`, goes down from ~3.2GiB (larger than the memory limit!) to ~0.7GiB, a decrease of ~4.5x. - Database size decreases from 1.3GB to 367MB. *In my local tests*, publish times had also decreased down to mere seconds but the same effect wasn't observed on the server, with the times staying around the same. My suspicions are that this is due to I/O performance: my local system is an M1 MBP, which almost certainly has much faster disk speeds than our DigitalOcean block volumes. Split reflists include a side effect of requiring more random accesses from reading all the buckets by their keys, so if your random I/O performance is slower, it might cancel out the benefits. That being said, even in that case, the memory usage and database size advantages still persist. Signed-off-by: Ryan Gonzalez --- api/api.go | 2 +- api/db.go | 81 ++- api/metrics.go | 2 +- api/mirror.go | 12 +- api/publish.go | 11 +- api/repos.go | 25 +- api/snapshot.go | 34 +- cmd/cmd.go | 2 +- cmd/db_cleanup.go | 130 +++-- cmd/mirror_create.go | 2 +- cmd/mirror_edit.go | 2 +- cmd/mirror_rename.go | 2 +- cmd/mirror_show.go | 9 +- cmd/mirror_update.go | 8 +- cmd/package_show.go | 6 +- cmd/publish_snapshot.go | 6 +- cmd/publish_switch.go | 4 +- cmd/publish_update.go | 2 +- cmd/repo_add.go | 6 +- cmd/repo_create.go | 4 +- cmd/repo_edit.go | 4 +- cmd/repo_include.go | 2 +- cmd/repo_list.go | 5 +- cmd/repo_move.go | 18 +- cmd/repo_remove.go | 6 +- cmd/repo_rename.go | 2 +- cmd/repo_show.go | 9 +- cmd/snapshot_create.go | 6 +- cmd/snapshot_diff.go | 6 +- cmd/snapshot_filter.go | 4 +- cmd/snapshot_merge.go | 4 +- cmd/snapshot_pull.go | 6 +- cmd/snapshot_rename.go | 2 +- cmd/snapshot_search.go | 8 +- cmd/snapshot_show.go | 13 +- cmd/snapshot_verify.go | 2 +- database/database.go | 2 + database/goleveldb/database.go | 3 + database/goleveldb/storage.go | 11 + deb/changes.go | 9 +- deb/changes_test.go | 12 +- deb/collections.go | 12 + deb/graph.go | 6 +- deb/list.go | 2 +- deb/local.go | 31 +- deb/local_test.go | 34 +- deb/publish.go | 89 +-- deb/publish_bench_test.go | 7 +- deb/publish_test.go | 84 +-- deb/reflist.go | 772 ++++++++++++++++++++++++- deb/reflist_bench_test.go | 38 ++ deb/reflist_test.go | 890 +++++++++++++++++++---------- deb/remote.go | 29 +- deb/remote_test.go | 44 +- deb/snapshot.go | 34 +- deb/snapshot_bench_test.go | 15 +- deb/snapshot_test.go | 50 +- system/t08_db/CleanupDB10Test_gold | 1 + system/t08_db/CleanupDB11Test_gold | 2 + system/t08_db/CleanupDB12Test_gold | 2 + system/t08_db/CleanupDB1Test_gold | 1 + system/t08_db/CleanupDB2Test_gold | 1 + system/t08_db/CleanupDB3Test_gold | 1 + system/t08_db/CleanupDB4Test_gold | 1 + system/t08_db/CleanupDB5Test_gold | 1 + system/t08_db/CleanupDB6Test_gold | 1 + system/t08_db/CleanupDB7Test_gold | 1 + system/t08_db/CleanupDB8Test_gold | 1 + system/t08_db/CleanupDB9Test_gold | 1 + 69 files changed, 1967 insertions(+), 668 deletions(-) diff --git a/api/api.go b/api/api.go index a52406d80..ea3304907 100644 --- a/api/api.go +++ b/api/api.go @@ -190,7 +190,7 @@ func maybeRunTaskInBackground(c *gin.Context, name string, resources []string, p // Common piece of code to show list of packages, // with searching & details if requested -func showPackages(c *gin.Context, reflist *deb.PackageRefList, collectionFactory *deb.CollectionFactory) { +func showPackages(c *gin.Context, reflist deb.AnyRefList, collectionFactory *deb.CollectionFactory) { result := []*deb.Package{} list, err := deb.NewPackageListFromRefList(reflist, collectionFactory.PackageCollection(), nil) diff --git a/api/db.go b/api/db.go index 3f8b826dd..8869fbd99 100644 --- a/api/db.go +++ b/api/db.go @@ -5,6 +5,7 @@ import ( "sort" "github.com/aptly-dev/aptly/aptly" + "github.com/aptly-dev/aptly/database" "github.com/aptly-dev/aptly/deb" "github.com/aptly-dev/aptly/task" "github.com/aptly-dev/aptly/utils" @@ -20,18 +21,22 @@ func apiDbCleanup(c *gin.Context) { collectionFactory := context.NewCollectionFactory() - // collect information about referenced packages... - existingPackageRefs := deb.NewPackageRefList() + // collect information about referenced packages and their reflist buckets... + existingPackageRefs := deb.NewSplitRefList() + existingBuckets := deb.NewRefListDigestSet() + + reflistMigration := collectionFactory.RefListCollection().NewMigration() out.Printf("Loading mirrors, local repos, snapshots and published repos...") err = collectionFactory.RemoteRepoCollection().ForEach(func(repo *deb.RemoteRepo) error { - e := collectionFactory.RemoteRepoCollection().LoadComplete(repo) - if e != nil { + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, repo.RefKey(), reflistMigration) + if e != nil && e != database.ErrNotFound { return e } - if repo.RefList() != nil { - existingPackageRefs = existingPackageRefs.Merge(repo.RefList(), false, true) - } + + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) return nil }) @@ -40,14 +45,14 @@ func apiDbCleanup(c *gin.Context) { } err = collectionFactory.LocalRepoCollection().ForEach(func(repo *deb.LocalRepo) error { - e := collectionFactory.LocalRepoCollection().LoadComplete(repo) - if e != nil { + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, repo.RefKey(), reflistMigration) + if e != nil && e != database.ErrNotFound { return e } - if repo.RefList() != nil { - existingPackageRefs = existingPackageRefs.Merge(repo.RefList(), false, true) - } + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) return nil }) @@ -56,12 +61,14 @@ func apiDbCleanup(c *gin.Context) { } err = collectionFactory.SnapshotCollection().ForEach(func(snapshot *deb.Snapshot) error { - e := collectionFactory.SnapshotCollection().LoadComplete(snapshot) + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, snapshot.RefKey(), reflistMigration) if e != nil { return e } - existingPackageRefs = existingPackageRefs.Merge(snapshot.RefList(), false, true) + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) return nil }) @@ -73,13 +80,16 @@ func apiDbCleanup(c *gin.Context) { if published.SourceKind != deb.SourceLocalRepo { return nil } - e := collectionFactory.PublishedRepoCollection().LoadComplete(published, collectionFactory) - if e != nil { - return e - } for _, component := range published.Components() { - existingPackageRefs = existingPackageRefs.Merge(published.RefList(component), false, true) + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, published.RefKey(component), reflistMigration) + if e != nil { + return e + } + + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) } return nil }) @@ -87,11 +97,20 @@ func apiDbCleanup(c *gin.Context) { return nil, err } + err = reflistMigration.Flush() + if err != nil { + return nil, err + } + if stats := reflistMigration.Stats(); stats.Reflists > 0 { + out.Printf("Split %d reflist(s) into %d bucket(s) (%d segment(s))", + stats.Reflists, stats.Buckets, stats.Segments) + } + // ... and compare it to the list of all packages out.Printf("Loading list of all packages...") allPackageRefs := collectionFactory.PackageCollection().AllPackageRefs() - toDelete := allPackageRefs.Subtract(existingPackageRefs) + toDelete := allPackageRefs.Subtract(existingPackageRefs.Flatten()) // delete packages that are no longer referenced out.Printf("Deleting unreferenced packages (%d)...", toDelete.Len()) @@ -112,6 +131,28 @@ func apiDbCleanup(c *gin.Context) { } } + bucketsToDelete, err := collectionFactory.RefListCollection().AllBucketDigests() + if err != nil { + return nil, err + } + + bucketsToDelete.RemoveAll(existingBuckets) + + out.Printf("Deleting unreferenced reflist buckets (%d)...", bucketsToDelete.Len()) + if bucketsToDelete.Len() > 0 { + batch := db.CreateBatch() + err := bucketsToDelete.ForEach(func(digest []byte) error { + return collectionFactory.RefListCollection().UnsafeDropBucket(digest, batch) + }) + if err != nil { + return nil, err + } + + if err := batch.Write(); err != nil { + return nil, err + } + } + // now, build a list of files that should be present in Repository (package pool) out.Printf("Building list of files referenced by packages...") referencedFiles := make([]string, 0, existingPackageRefs.Len()) diff --git a/api/metrics.go b/api/metrics.go index 94a9dc252..875c3c196 100644 --- a/api/metrics.go +++ b/api/metrics.go @@ -102,7 +102,7 @@ func countPackagesByRepos() { components := repo.Components() for _, c := range components { - count := float64(len(repo.RefList(c).Refs)) + count := float64(repo.RefList(c).Len()) apiReposPackageCountGauge.WithLabelValues(fmt.Sprintf("%s", (repo.SourceNames())), repo.Distribution, c).Set(count) } diff --git a/api/mirror.go b/api/mirror.go index 0052f2a59..4139dd3ab 100644 --- a/api/mirror.go +++ b/api/mirror.go @@ -150,7 +150,7 @@ func apiMirrorsCreate(c *gin.Context) { return } - err = collection.Add(repo) + err = collection.Add(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, fmt.Errorf("unable to add mirror: %s", err)) return @@ -228,7 +228,7 @@ func apiMirrorsShow(c *gin.Context) { return } - err = collection.LoadComplete(repo) + err = collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, fmt.Errorf("unable to show: %s", err)) } @@ -259,7 +259,7 @@ func apiMirrorsPackages(c *gin.Context) { return } - err = collection.LoadComplete(repo) + err = collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, fmt.Errorf("unable to show: %s", err)) } @@ -485,12 +485,12 @@ func apiMirrorsUpdate(c *gin.Context) { e := context.ReOpenDatabase() if e == nil { remote.MarkAsIdle() - collection.Update(remote) + collection.Update(remote, collectionFactory.RefListCollection()) } }() remote.MarkAsUpdating() - err = collection.Update(remote) + err = collection.Update(remote, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to update: %s", err) } @@ -648,7 +648,7 @@ func apiMirrorsUpdate(c *gin.Context) { log.Info().Msgf("%s: Finalizing download...", b.Name) remote.FinalizeDownload(collectionFactory, out) - err = collectionFactory.RemoteRepoCollection().Update(remote) + err = collectionFactory.RemoteRepoCollection().Update(remote, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to update: %s", err) } diff --git a/api/publish.go b/api/publish.go index c7aced61d..40bf7307b 100644 --- a/api/publish.go +++ b/api/publish.go @@ -150,7 +150,6 @@ func apiPublishRepoOrSnapshot(c *gin.Context) { } resources = append(resources, string(snapshot.ResourceKey())) - sources = append(sources, snapshot) } } else if b.SourceKind == deb.SourceLocalRepo { @@ -189,10 +188,10 @@ func apiPublishRepoOrSnapshot(c *gin.Context) { switch s := source.(type) { case *deb.Snapshot: snapshotCollection := collectionFactory.SnapshotCollection() - err = snapshotCollection.LoadComplete(s) + err = snapshotCollection.LoadComplete(s, collectionFactory.RefListCollection()) case *deb.LocalRepo: localCollection := collectionFactory.LocalRepoCollection() - err = localCollection.LoadComplete(s) + err = localCollection.LoadComplete(s, collectionFactory.RefListCollection()) default: err = fmt.Errorf("unexpected type for source: %T", source) } @@ -245,7 +244,7 @@ func apiPublishRepoOrSnapshot(c *gin.Context) { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to publish: %s", err) } - err = collection.Add(published) + err = collection.Add(published, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to save to DB: %s", err) } @@ -337,7 +336,7 @@ func apiPublishUpdateSwitch(c *gin.Context) { resources = append(resources, string(published.Key())) taskName := fmt.Sprintf("Update published %s (%s): %s", published.SourceKind, strings.Join(updatedComponents, " "), strings.Join(updatedSnapshots, ", ")) maybeRunTaskInBackground(c, taskName, resources, func(out aptly.Progress, _ *task.Detail) (*task.ProcessReturnValue, error) { - err = collection.LoadComplete(published, collectionFactory) + err = collection.LoadComplete(published, collectionFactory, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("Unable to update: %s", err) } @@ -367,7 +366,7 @@ func apiPublishUpdateSwitch(c *gin.Context) { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to update: %s", err) } - err = collection.Update(published) + err = collection.Update(published, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to save to DB: %s", err) } diff --git a/api/repos.go b/api/repos.go index 005c2aaa1..a804e8f9e 100644 --- a/api/repos.go +++ b/api/repos.go @@ -133,6 +133,13 @@ func apiReposCreate(c *gin.Context) { return } + collection := collectionFactory.LocalRepoCollection() + err := collection.Add(repo, collectionFactory.RefListCollection()) + if err != nil { + AbortWithJSONError(c, 400, err) + return + } + err := localRepoCollection.Add(repo) if err != nil { AbortWithJSONError(c, http.StatusInternalServerError, err) @@ -183,7 +190,7 @@ func apiReposEdit(c *gin.Context) { repo.DefaultComponent = *b.DefaultComponent } - err = collection.Update(repo) + err = collection.Update(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return @@ -260,7 +267,7 @@ func apiReposPackagesShow(c *gin.Context) { return } - err = collection.LoadComplete(repo) + err = collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return @@ -291,7 +298,7 @@ func apiReposPackagesAddDelete(c *gin.Context, taskNamePrefix string, cb func(li resources := []string{string(repo.Key())} maybeRunTaskInBackground(c, taskNamePrefix+repo.Name, resources, func(out aptly.Progress, _ *task.Detail) (*task.ProcessReturnValue, error) { - err = collection.LoadComplete(repo) + err = collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, err } @@ -320,9 +327,9 @@ func apiReposPackagesAddDelete(c *gin.Context, taskNamePrefix string, cb func(li } } - repo.UpdateRefList(deb.NewPackageRefListFromPackageList(list)) + repo.UpdateRefList(deb.NewSplitRefListFromPackageList(list)) - err = collectionFactory.LocalRepoCollection().Update(repo) + err = collectionFactory.LocalRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to save: %s", err) } @@ -407,7 +414,7 @@ func apiReposPackageFromDir(c *gin.Context) { resources := []string{string(repo.Key())} resources = append(resources, sources...) maybeRunTaskInBackground(c, taskName, resources, func(out aptly.Progress, _ *task.Detail) (*task.ProcessReturnValue, error) { - err = collection.LoadComplete(repo) + err = collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, err } @@ -442,9 +449,9 @@ func apiReposPackageFromDir(c *gin.Context) { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to import package files: %s", err) } - repo.UpdateRefList(deb.NewPackageRefListFromPackageList(list)) + repo.UpdateRefList(deb.NewSplitRefListFromPackageList(list)) - err = collectionFactory.LocalRepoCollection().Update(repo) + err = collectionFactory.LocalRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to save: %s", err) } @@ -701,7 +708,7 @@ func apiReposIncludePackageFromDir(c *gin.Context) { _, failedFiles2, err = deb.ImportChangesFiles( changesFiles, reporter, acceptUnsigned, ignoreSignature, forceReplace, noRemoveFiles, verifier, repoTemplate, context.Progress(), collectionFactory.LocalRepoCollection(), collectionFactory.PackageCollection(), - context.PackagePool(), collectionFactory.ChecksumCollection, nil, query.Parse) + collectionFactory.RefListCollection(), context.PackagePool(), collectionFactory.ChecksumCollection, nil, query.Parse) failedFiles = append(failedFiles, failedFiles2...) if err != nil { diff --git a/api/snapshot.go b/api/snapshot.go index 18aa64404..4374b5a75 100644 --- a/api/snapshot.go +++ b/api/snapshot.go @@ -76,7 +76,7 @@ func apiSnapshotsCreateFromMirror(c *gin.Context) { return &task.ProcessReturnValue{Code: http.StatusConflict, Value: nil}, err } - err = collection.LoadComplete(repo) + err = collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, err } @@ -90,7 +90,7 @@ func apiSnapshotsCreateFromMirror(c *gin.Context) { snapshot.Description = b.Description } - err = snapshotCollection.Add(snapshot) + err = snapshotCollection.Add(snapshot, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusBadRequest, Value: nil}, err } @@ -140,7 +140,7 @@ func apiSnapshotsCreate(c *gin.Context) { maybeRunTaskInBackground(c, "Create snapshot "+b.Name, resources, func(_ aptly.Progress, _ *task.Detail) (*task.ProcessReturnValue, error) { for i := range sources { - err = snapshotCollection.LoadComplete(sources[i]) + err = snapshotCollection.LoadComplete(sources[i], collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, err } @@ -163,9 +163,9 @@ func apiSnapshotsCreate(c *gin.Context) { } } - snapshot = deb.NewSnapshotFromRefList(b.Name, sources, deb.NewPackageRefListFromPackageList(list), b.Description) + snapshot = deb.NewSnapshotFromRefList(b.Name, sources, deb.NewSplitRefListFromPackageList(list), b.Description) - err = snapshotCollection.Add(snapshot) + err = snapshotCollection.Add(snapshot, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusBadRequest, Value: nil}, err } @@ -204,8 +204,8 @@ func apiSnapshotsCreateFromRepository(c *gin.Context) { // including snapshot resource key resources := []string{string(repo.Key()), "S" + b.Name} taskName := fmt.Sprintf("Create snapshot of repo %s", name) - maybeRunTaskInBackground(c, taskName, resources, func(_ aptly.Progress, _ *task.Detail) (*task.ProcessReturnValue, error) { - err := collection.LoadComplete(repo) + maybeRunTaskInBackground(c, taskName, resources, func(out aptly.Progress, detail *task.Detail) (*task.ProcessReturnValue, error) { + err := collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, err } @@ -219,7 +219,7 @@ func apiSnapshotsCreateFromRepository(c *gin.Context) { snapshot.Description = b.Description } - err = snapshotCollection.Add(snapshot) + err = snapshotCollection.Add(snapshot, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusBadRequest, Value: nil}, err } @@ -269,7 +269,7 @@ func apiSnapshotsUpdate(c *gin.Context) { snapshot.Description = b.Description } - err = collectionFactory.SnapshotCollection().Update(snapshot) + err = collectionFactory.SnapshotCollection().Update(snapshot, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, err } @@ -288,7 +288,7 @@ func apiSnapshotsShow(c *gin.Context) { return } - err = collection.LoadComplete(snapshot) + err = collection.LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return @@ -355,20 +355,20 @@ func apiSnapshotsDiff(c *gin.Context) { return } - err = collection.LoadComplete(snapshotA) + err = collection.LoadComplete(snapshotA, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return } - err = collection.LoadComplete(snapshotB) + err = collection.LoadComplete(snapshotB, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return } // Calculate diff - diff, err := snapshotA.RefList().Diff(snapshotB.RefList(), collectionFactory.PackageCollection()) + diff, err := snapshotA.RefList().Diff(snapshotB.RefList(), collectionFactory.PackageCollection(), nil) if err != nil { AbortWithJSONError(c, 500, err) return @@ -398,7 +398,7 @@ func apiSnapshotsSearchPackages(c *gin.Context) { return } - err = collection.LoadComplete(snapshot) + err = collection.LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return @@ -473,13 +473,13 @@ func apiSnapshotsMerge(c *gin.Context) { } maybeRunTaskInBackground(c, "Merge snapshot "+name, resources, func(_ aptly.Progress, _ *task.Detail) (*task.ProcessReturnValue, error) { - err = snapshotCollection.LoadComplete(sources[0]) + err = snapshotCollection.LoadComplete(sources[0], collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, err } result := sources[0].RefList() for i := 1; i < len(sources); i++ { - err = snapshotCollection.LoadComplete(sources[i]) + err = snapshotCollection.LoadComplete(sources[i], collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, err } @@ -498,7 +498,7 @@ func apiSnapshotsMerge(c *gin.Context) { snapshot = deb.NewSnapshotFromRefList(name, sources, result, fmt.Sprintf("Merged from sources: %s", strings.Join(sourceDescription, ", "))) - err = collectionFactory.SnapshotCollection().Add(snapshot) + err = collectionFactory.SnapshotCollection().Add(snapshot, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to create snapshot: %s", err) } diff --git a/cmd/cmd.go b/cmd/cmd.go index d5bdff25d..8c112d9df 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -21,7 +21,7 @@ const ( ) // ListPackagesRefList shows list of packages in PackageRefList -func ListPackagesRefList(reflist *deb.PackageRefList, collectionFactory *deb.CollectionFactory) (err error) { +func ListPackagesRefList(reflist deb.AnyRefList, collectionFactory *deb.CollectionFactory) (err error) { fmt.Printf("Packages:\n") if reflist == nil { diff --git a/cmd/db_cleanup.go b/cmd/db_cleanup.go index 66fece677..ec1906859 100644 --- a/cmd/db_cleanup.go +++ b/cmd/db_cleanup.go @@ -6,6 +6,7 @@ import ( "strings" "github.com/aptly-dev/aptly/aptly" + "github.com/aptly-dev/aptly/database" "github.com/aptly-dev/aptly/deb" "github.com/aptly-dev/aptly/utils" "github.com/smira/commander" @@ -24,12 +25,20 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { dryRun := context.Flags().Lookup("dry-run").Value.Get().(bool) collectionFactory := context.NewCollectionFactory() - // collect information about references packages... - existingPackageRefs := deb.NewPackageRefList() + // collect information about references packages and their reflistbuckets... + existingPackageRefs := deb.NewSplitRefList() + existingBuckets := deb.NewRefListDigestSet() // used only in verbose mode to report package use source packageRefSources := map[string][]string{} + var reflistMigration *deb.RefListMigration + if !dryRun { + reflistMigration = collectionFactory.RefListCollection().NewMigration() + } else { + reflistMigration = collectionFactory.RefListCollection().NewMigrationDryRun() + } + context.Progress().ColoredPrintf("@{w!}Loading mirrors, local repos, snapshots and published repos...@|") if verbose { context.Progress().ColoredPrintf("@{y}Loading mirrors:@|") @@ -39,20 +48,21 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { context.Progress().ColoredPrintf("- @{g}%s@|", repo.Name) } - e := collectionFactory.RemoteRepoCollection().LoadComplete(repo) - if e != nil { + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, repo.RefKey(), reflistMigration) + if e != nil && e != database.ErrNotFound { return e } - if repo.RefList() != nil { - existingPackageRefs = existingPackageRefs.Merge(repo.RefList(), false, true) - if verbose { - description := fmt.Sprintf("mirror %s", repo.Name) - repo.RefList().ForEach(func(key []byte) error { - packageRefSources[string(key)] = append(packageRefSources[string(key)], description) - return nil - }) - } + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) + + if verbose { + description := fmt.Sprintf("mirror %s", repo.Name) + sl.ForEach(func(key []byte) error { + packageRefSources[string(key)] = append(packageRefSources[string(key)], description) + return nil + }) } return nil @@ -71,21 +81,23 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { context.Progress().ColoredPrintf("- @{g}%s@|", repo.Name) } - e := collectionFactory.LocalRepoCollection().LoadComplete(repo) - if e != nil { + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, repo.RefKey(), reflistMigration) + if e != nil && e != database.ErrNotFound { return e } - if repo.RefList() != nil { - existingPackageRefs = existingPackageRefs.Merge(repo.RefList(), false, true) + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) - if verbose { - description := fmt.Sprintf("local repo %s", repo.Name) - repo.RefList().ForEach(func(key []byte) error { - packageRefSources[string(key)] = append(packageRefSources[string(key)], description) - return nil - }) - } + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + + if verbose { + description := fmt.Sprintf("local repo %s", repo.Name) + sl.ForEach(func(key []byte) error { + packageRefSources[string(key)] = append(packageRefSources[string(key)], description) + return nil + }) } return nil @@ -104,16 +116,18 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { context.Progress().ColoredPrintf("- @{g}%s@|", snapshot.Name) } - e := collectionFactory.SnapshotCollection().LoadComplete(snapshot) + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, snapshot.RefKey(), reflistMigration) if e != nil { return e } - existingPackageRefs = existingPackageRefs.Merge(snapshot.RefList(), false, true) + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) if verbose { description := fmt.Sprintf("snapshot %s", snapshot.Name) - snapshot.RefList().ForEach(func(key []byte) error { + sl.ForEach(func(key []byte) error { packageRefSources[string(key)] = append(packageRefSources[string(key)], description) return nil }) @@ -136,17 +150,21 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { if published.SourceKind != deb.SourceLocalRepo { return nil } - e := collectionFactory.PublishedRepoCollection().LoadComplete(published, collectionFactory) - if e != nil { - return e - } for _, component := range published.Components() { - existingPackageRefs = existingPackageRefs.Merge(published.RefList(component), false, true) + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, published.RefKey(component), reflistMigration) + if e != nil { + return e + } + + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) + if verbose { description := fmt.Sprintf("published repository %s:%s/%s component %s", published.Storage, published.Prefix, published.Distribution, component) - published.RefList(component).ForEach(func(key []byte) error { + sl.ForEach(func(key []byte) error { packageRefSources[string(key)] = append(packageRefSources[string(key)], description) return nil }) @@ -160,11 +178,29 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { collectionFactory.Flush() + err = reflistMigration.Flush() + if err != nil { + return err + } + + if verbose { + if stats := reflistMigration.Stats(); stats.Reflists > 0 { + if !dryRun { + context.Progress().ColoredPrintf("@{w!}Split %d reflist(s) into %d bucket(s) (%d segment(s))@|", + stats.Reflists, stats.Buckets, stats.Segments) + } else { + context.Progress().ColoredPrintf( + "@{y!}Skipped splitting %d reflist(s) into %d bucket(s) (%d segment(s)), as -dry-run has been requested.@|", + stats.Reflists, stats.Buckets, stats.Segments) + } + } + } + // ... and compare it to the list of all packages context.Progress().ColoredPrintf("@{w!}Loading list of all packages...@|") allPackageRefs := collectionFactory.PackageCollection().AllPackageRefs() - toDelete := allPackageRefs.Subtract(existingPackageRefs) + toDelete := allPackageRefs.Subtract(existingPackageRefs.Flatten()) // delete packages that are no longer referenced context.Progress().ColoredPrintf("@{r!}Deleting unreferenced packages (%d)...@|", toDelete.Len()) @@ -202,6 +238,32 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { } } + bucketsToDelete, err := collectionFactory.RefListCollection().AllBucketDigests() + if err != nil { + return err + } + + bucketsToDelete.RemoveAll(existingBuckets) + + context.Progress().ColoredPrintf("@{r!}Deleting unreferenced reflist buckets (%d)...@|", bucketsToDelete.Len()) + if bucketsToDelete.Len() > 0 { + if !dryRun { + batch := db.CreateBatch() + err := bucketsToDelete.ForEach(func(digest []byte) error { + return collectionFactory.RefListCollection().UnsafeDropBucket(digest, batch) + }) + if err != nil { + return err + } + + if err := batch.Write(); err != nil { + return err + } + } else { + context.Progress().ColoredPrintf("@{y!}Skipped reflist deletion, as -dry-run has been requested.@|") + } + } + collectionFactory.Flush() // now, build a list of files that should be present in Repository (package pool) diff --git a/cmd/mirror_create.go b/cmd/mirror_create.go index 022a5cf20..addc703d7 100644 --- a/cmd/mirror_create.go +++ b/cmd/mirror_create.go @@ -69,7 +69,7 @@ func aptlyMirrorCreate(cmd *commander.Command, args []string) error { } collectionFactory := context.NewCollectionFactory() - err = collectionFactory.RemoteRepoCollection().Add(repo) + err = collectionFactory.RemoteRepoCollection().Add(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to add mirror: %s", err) } diff --git a/cmd/mirror_edit.go b/cmd/mirror_edit.go index 5adf504b8..af8a809ec 100644 --- a/cmd/mirror_edit.go +++ b/cmd/mirror_edit.go @@ -78,7 +78,7 @@ func aptlyMirrorEdit(cmd *commander.Command, args []string) error { } } - err = collectionFactory.RemoteRepoCollection().Update(repo) + err = collectionFactory.RemoteRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to edit: %s", err) } diff --git a/cmd/mirror_rename.go b/cmd/mirror_rename.go index 2ff9f9204..ff453b857 100644 --- a/cmd/mirror_rename.go +++ b/cmd/mirror_rename.go @@ -37,7 +37,7 @@ func aptlyMirrorRename(cmd *commander.Command, args []string) error { } repo.Name = newName - err = collectionFactory.RemoteRepoCollection().Update(repo) + err = collectionFactory.RemoteRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to rename: %s", err) } diff --git a/cmd/mirror_show.go b/cmd/mirror_show.go index 03179161a..3c52d6e37 100644 --- a/cmd/mirror_show.go +++ b/cmd/mirror_show.go @@ -38,7 +38,7 @@ func aptlyMirrorShowTxt(_ *commander.Command, args []string) error { return fmt.Errorf("unable to show: %s", err) } - err = collectionFactory.RemoteRepoCollection().LoadComplete(repo) + err = collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to show: %s", err) } @@ -98,12 +98,13 @@ func aptlyMirrorShowJSON(_ *commander.Command, args []string) error { name := args[0] - repo, err := context.NewCollectionFactory().RemoteRepoCollection().ByName(name) + collectionFactory := context.NewCollectionFactory() + repo, err := collectionFactory.RemoteRepoCollection().ByName(name) if err != nil { return fmt.Errorf("unable to show: %s", err) } - err = context.NewCollectionFactory().RemoteRepoCollection().LoadComplete(repo) + err = collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to show: %s", err) } @@ -113,7 +114,7 @@ func aptlyMirrorShowJSON(_ *commander.Command, args []string) error { if withPackages { if repo.RefList() != nil { var list *deb.PackageList - list, err = deb.NewPackageListFromRefList(repo.RefList(), context.NewCollectionFactory().PackageCollection(), context.Progress()) + list, err = deb.NewPackageListFromRefList(repo.RefList(), collectionFactory.PackageCollection(), context.Progress()) if err != nil { return fmt.Errorf("unable to get package list: %s", err) } diff --git a/cmd/mirror_update.go b/cmd/mirror_update.go index 2e6df4397..9536188d5 100644 --- a/cmd/mirror_update.go +++ b/cmd/mirror_update.go @@ -29,7 +29,7 @@ func aptlyMirrorUpdate(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to update: %s", err) } - err = collectionFactory.RemoteRepoCollection().LoadComplete(repo) + err = collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to update: %s", err) } @@ -101,12 +101,12 @@ func aptlyMirrorUpdate(cmd *commander.Command, args []string) error { err = context.ReOpenDatabase() if err == nil { repo.MarkAsIdle() - collectionFactory.RemoteRepoCollection().Update(repo) + collectionFactory.RemoteRepoCollection().Update(repo, collectionFactory.RefListCollection()) } }() repo.MarkAsUpdating() - err = collectionFactory.RemoteRepoCollection().Update(repo) + err = collectionFactory.RemoteRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to update: %s", err) } @@ -262,7 +262,7 @@ func aptlyMirrorUpdate(cmd *commander.Command, args []string) error { } repo.FinalizeDownload(collectionFactory, context.Progress()) - err = collectionFactory.RemoteRepoCollection().Update(repo) + err = collectionFactory.RemoteRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to update: %s", err) } diff --git a/cmd/package_show.go b/cmd/package_show.go index 37f07e9b0..6bbec9849 100644 --- a/cmd/package_show.go +++ b/cmd/package_show.go @@ -14,7 +14,7 @@ import ( func printReferencesTo(p *deb.Package, collectionFactory *deb.CollectionFactory) (err error) { err = collectionFactory.RemoteRepoCollection().ForEach(func(repo *deb.RemoteRepo) error { - e := collectionFactory.RemoteRepoCollection().LoadComplete(repo) + e := collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if e != nil { return e } @@ -30,7 +30,7 @@ func printReferencesTo(p *deb.Package, collectionFactory *deb.CollectionFactory) } err = collectionFactory.LocalRepoCollection().ForEach(func(repo *deb.LocalRepo) error { - e := collectionFactory.LocalRepoCollection().LoadComplete(repo) + e := collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if e != nil { return e } @@ -46,7 +46,7 @@ func printReferencesTo(p *deb.Package, collectionFactory *deb.CollectionFactory) } err = collectionFactory.SnapshotCollection().ForEach(func(snapshot *deb.Snapshot) error { - e := collectionFactory.SnapshotCollection().LoadComplete(snapshot) + e := collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if e != nil { return e } diff --git a/cmd/publish_snapshot.go b/cmd/publish_snapshot.go index 3d2e43e60..4e82b665b 100644 --- a/cmd/publish_snapshot.go +++ b/cmd/publish_snapshot.go @@ -49,7 +49,7 @@ func aptlyPublishSnapshotOrRepo(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to publish: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to publish: %s", err) } @@ -85,7 +85,7 @@ func aptlyPublishSnapshotOrRepo(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to publish: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(localRepo) + err = collectionFactory.LocalRepoCollection().LoadComplete(localRepo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to publish: %s", err) } @@ -171,7 +171,7 @@ func aptlyPublishSnapshotOrRepo(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to publish: %s", err) } - err = collectionFactory.PublishedRepoCollection().Add(published) + err = collectionFactory.PublishedRepoCollection().Add(published, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save to DB: %s", err) } diff --git a/cmd/publish_switch.go b/cmd/publish_switch.go index 0f1a620b9..aee53fa9a 100644 --- a/cmd/publish_switch.go +++ b/cmd/publish_switch.go @@ -68,7 +68,7 @@ func aptlyPublishSwitch(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to switch: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to switch: %s", err) } @@ -104,7 +104,7 @@ func aptlyPublishSwitch(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to publish: %s", err) } - err = collectionFactory.PublishedRepoCollection().Update(published) + err = collectionFactory.PublishedRepoCollection().Update(published, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save to DB: %s", err) } diff --git a/cmd/publish_update.go b/cmd/publish_update.go index 28de8c67c..608d45104 100644 --- a/cmd/publish_update.go +++ b/cmd/publish_update.go @@ -73,7 +73,7 @@ func aptlyPublishUpdate(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to publish: %s", err) } - err = collectionFactory.PublishedRepoCollection().Update(published) + err = collectionFactory.PublishedRepoCollection().Update(published, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save to DB: %s", err) } diff --git a/cmd/repo_add.go b/cmd/repo_add.go index 8189e7834..0263879f3 100644 --- a/cmd/repo_add.go +++ b/cmd/repo_add.go @@ -28,7 +28,7 @@ func aptlyRepoAdd(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to add: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to add: %s", err) } @@ -58,9 +58,9 @@ func aptlyRepoAdd(cmd *commander.Command, args []string) error { processedFiles = append(processedFiles, otherFiles...) - repo.UpdateRefList(deb.NewPackageRefListFromPackageList(list)) + repo.UpdateRefList(deb.NewSplitRefListFromPackageList(list)) - err = collectionFactory.LocalRepoCollection().Update(repo) + err = collectionFactory.LocalRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save: %s", err) } diff --git a/cmd/repo_create.go b/cmd/repo_create.go index 5fef46d92..0e3a1e52b 100644 --- a/cmd/repo_create.go +++ b/cmd/repo_create.go @@ -36,7 +36,7 @@ func aptlyRepoCreate(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to load source snapshot: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to load source snapshot: %s", err) } @@ -44,7 +44,7 @@ func aptlyRepoCreate(cmd *commander.Command, args []string) error { repo.UpdateRefList(snapshot.RefList()) } - err = collectionFactory.LocalRepoCollection().Add(repo) + err = collectionFactory.LocalRepoCollection().Add(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to add local repo: %s", err) } diff --git a/cmd/repo_edit.go b/cmd/repo_edit.go index bc81dc4a1..c7fbc4196 100644 --- a/cmd/repo_edit.go +++ b/cmd/repo_edit.go @@ -22,7 +22,7 @@ func aptlyRepoEdit(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to edit: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to edit: %s", err) } @@ -53,7 +53,7 @@ func aptlyRepoEdit(cmd *commander.Command, args []string) error { } } - err = collectionFactory.LocalRepoCollection().Update(repo) + err = collectionFactory.LocalRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to edit: %s", err) } diff --git a/cmd/repo_include.go b/cmd/repo_include.go index 72e24c6da..6316a4aae 100644 --- a/cmd/repo_include.go +++ b/cmd/repo_include.go @@ -67,7 +67,7 @@ func aptlyRepoInclude(cmd *commander.Command, args []string) error { _, failedFiles2, err = deb.ImportChangesFiles( changesFiles, reporter, acceptUnsigned, ignoreSignatures, forceReplace, noRemoveFiles, verifier, repoTemplate, context.Progress(), collectionFactory.LocalRepoCollection(), collectionFactory.PackageCollection(), - context.PackagePool(), collectionFactory.ChecksumCollection, + collectionFactory.RefListCollection(), context.PackagePool(), collectionFactory.ChecksumCollection, uploaders, query.Parse) failedFiles = append(failedFiles, failedFiles2...) diff --git a/cmd/repo_list.go b/cmd/repo_list.go index 9c4b0d47e..f3ca4a8ba 100644 --- a/cmd/repo_list.go +++ b/cmd/repo_list.go @@ -36,7 +36,7 @@ func aptlyRepoListTxt(cmd *commander.Command, _ []string) error { if raw { repos[i] = repo.Name } else { - e := collectionFactory.LocalRepoCollection().LoadComplete(repo) + e := collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if e != nil { return e } @@ -77,7 +77,8 @@ func aptlyRepoListJSON(_ *commander.Command, _ []string) error { repos := make([]*deb.LocalRepo, context.NewCollectionFactory().LocalRepoCollection().Len()) i := 0 context.NewCollectionFactory().LocalRepoCollection().ForEach(func(repo *deb.LocalRepo) error { - e := context.NewCollectionFactory().LocalRepoCollection().LoadComplete(repo) + collectionFactory := context.NewCollectionFactory() + e := collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if e != nil { return e } diff --git a/cmd/repo_move.go b/cmd/repo_move.go index 8be6698b6..a1c5ba261 100644 --- a/cmd/repo_move.go +++ b/cmd/repo_move.go @@ -25,13 +25,13 @@ func aptlyRepoMoveCopyImport(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to %s: %s", command, err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(dstRepo) + err = collectionFactory.LocalRepoCollection().LoadComplete(dstRepo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to %s: %s", command, err) } var ( - srcRefList *deb.PackageRefList + srcRefList *deb.SplitRefList srcRepo *deb.LocalRepo ) @@ -45,7 +45,7 @@ func aptlyRepoMoveCopyImport(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to %s: source and destination are the same", command) } - err = collectionFactory.LocalRepoCollection().LoadComplete(srcRepo) + err = collectionFactory.LocalRepoCollection().LoadComplete(srcRepo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to %s: %s", command, err) } @@ -59,12 +59,12 @@ func aptlyRepoMoveCopyImport(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to %s: %s", command, err) } - err = collectionFactory.RemoteRepoCollection().LoadComplete(srcRemoteRepo) + err = collectionFactory.RemoteRepoCollection().LoadComplete(srcRemoteRepo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to %s: %s", command, err) } - if srcRemoteRepo.RefList() == nil { + if srcRemoteRepo.RefList().Len() == 0 { return fmt.Errorf("unable to %s: mirror not updated", command) } @@ -150,17 +150,17 @@ func aptlyRepoMoveCopyImport(cmd *commander.Command, args []string) error { if context.Flags().Lookup("dry-run").Value.Get().(bool) { context.Progress().Printf("\nChanges not saved, as dry run has been requested.\n") } else { - dstRepo.UpdateRefList(deb.NewPackageRefListFromPackageList(dstList)) + dstRepo.UpdateRefList(deb.NewSplitRefListFromPackageList(dstList)) - err = collectionFactory.LocalRepoCollection().Update(dstRepo) + err = collectionFactory.LocalRepoCollection().Update(dstRepo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save: %s", err) } if command == "move" { // nolint: goconst - srcRepo.UpdateRefList(deb.NewPackageRefListFromPackageList(srcList)) + srcRepo.UpdateRefList(deb.NewSplitRefListFromPackageList(srcList)) - err = collectionFactory.LocalRepoCollection().Update(srcRepo) + err = collectionFactory.LocalRepoCollection().Update(srcRepo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save: %s", err) } diff --git a/cmd/repo_remove.go b/cmd/repo_remove.go index 93e8535c7..d3a1159ee 100644 --- a/cmd/repo_remove.go +++ b/cmd/repo_remove.go @@ -24,7 +24,7 @@ func aptlyRepoRemove(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to remove: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to remove: %s", err) } @@ -59,9 +59,9 @@ func aptlyRepoRemove(cmd *commander.Command, args []string) error { if context.Flags().Lookup("dry-run").Value.Get().(bool) { context.Progress().Printf("\nChanges not saved, as dry run has been requested.\n") } else { - repo.UpdateRefList(deb.NewPackageRefListFromPackageList(list)) + repo.UpdateRefList(deb.NewSplitRefListFromPackageList(list)) - err = collectionFactory.LocalRepoCollection().Update(repo) + err = collectionFactory.LocalRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save: %s", err) } diff --git a/cmd/repo_rename.go b/cmd/repo_rename.go index 9234b7c72..459afcbbb 100644 --- a/cmd/repo_rename.go +++ b/cmd/repo_rename.go @@ -32,7 +32,7 @@ func aptlyRepoRename(cmd *commander.Command, args []string) error { } repo.Name = newName - err = collectionFactory.LocalRepoCollection().Update(repo) + err = collectionFactory.LocalRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to rename: %s", err) } diff --git a/cmd/repo_show.go b/cmd/repo_show.go index a61a5f1f9..741915d12 100644 --- a/cmd/repo_show.go +++ b/cmd/repo_show.go @@ -36,7 +36,7 @@ func aptlyRepoShowTxt(_ *commander.Command, args []string) error { return fmt.Errorf("unable to show: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to show: %s", err) } @@ -63,12 +63,13 @@ func aptlyRepoShowJSON(_ *commander.Command, args []string) error { name := args[0] - repo, err := context.NewCollectionFactory().LocalRepoCollection().ByName(name) + collectionFactory := context.NewCollectionFactory() + repo, err := collectionFactory.LocalRepoCollection().ByName(name) if err != nil { return fmt.Errorf("unable to show: %s", err) } - err = context.NewCollectionFactory().LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to show: %s", err) } @@ -79,7 +80,7 @@ func aptlyRepoShowJSON(_ *commander.Command, args []string) error { if withPackages { if repo.RefList() != nil { var list *deb.PackageList - list, err = deb.NewPackageListFromRefList(repo.RefList(), context.NewCollectionFactory().PackageCollection(), context.Progress()) + list, err = deb.NewPackageListFromRefList(repo.RefList(), collectionFactory.PackageCollection(), context.Progress()) if err == nil { packageList = list.FullNames() } diff --git a/cmd/snapshot_create.go b/cmd/snapshot_create.go index 000a78d9b..6bc319de3 100644 --- a/cmd/snapshot_create.go +++ b/cmd/snapshot_create.go @@ -30,7 +30,7 @@ func aptlySnapshotCreate(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to create snapshot: %s", err) } - err = collectionFactory.RemoteRepoCollection().LoadComplete(repo) + err = collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to create snapshot: %s", err) } @@ -50,7 +50,7 @@ func aptlySnapshotCreate(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to create snapshot: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to create snapshot: %s", err) } @@ -71,7 +71,7 @@ func aptlySnapshotCreate(cmd *commander.Command, args []string) error { return commander.ErrCommandError } - err = collectionFactory.SnapshotCollection().Add(snapshot) + err = collectionFactory.SnapshotCollection().Add(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to add snapshot: %s", err) } diff --git a/cmd/snapshot_diff.go b/cmd/snapshot_diff.go index ccbea32ee..19da7fcc9 100644 --- a/cmd/snapshot_diff.go +++ b/cmd/snapshot_diff.go @@ -23,7 +23,7 @@ func aptlySnapshotDiff(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to load snapshot A: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshotA) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshotA, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to load snapshot A: %s", err) } @@ -34,13 +34,13 @@ func aptlySnapshotDiff(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to load snapshot B: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshotB) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshotB, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to load snapshot B: %s", err) } // Calculate diff - diff, err := snapshotA.RefList().Diff(snapshotB.RefList(), collectionFactory.PackageCollection()) + diff, err := snapshotA.RefList().Diff(snapshotB.RefList(), collectionFactory.PackageCollection(), nil) if err != nil { return fmt.Errorf("unable to calculate diff: %s", err) } diff --git a/cmd/snapshot_filter.go b/cmd/snapshot_filter.go index b81a9cfc2..5aed03b95 100644 --- a/cmd/snapshot_filter.go +++ b/cmd/snapshot_filter.go @@ -27,7 +27,7 @@ func aptlySnapshotFilter(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to filter: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(source) + err = collectionFactory.SnapshotCollection().LoadComplete(source, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to filter: %s", err) } @@ -76,7 +76,7 @@ func aptlySnapshotFilter(cmd *commander.Command, args []string) error { destination := deb.NewSnapshotFromPackageList(args[1], []*deb.Snapshot{source}, result, fmt.Sprintf("Filtered '%s', query was: '%s'", source.Name, strings.Join(args[2:], " "))) - err = collectionFactory.SnapshotCollection().Add(destination) + err = collectionFactory.SnapshotCollection().Add(destination, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to create snapshot: %s", err) } diff --git a/cmd/snapshot_merge.go b/cmd/snapshot_merge.go index 0a319a5ab..e9eb0c779 100644 --- a/cmd/snapshot_merge.go +++ b/cmd/snapshot_merge.go @@ -24,7 +24,7 @@ func aptlySnapshotMerge(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to load snapshot: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(sources[i]) + err = collectionFactory.SnapshotCollection().LoadComplete(sources[i], collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to load snapshot: %s", err) } @@ -57,7 +57,7 @@ func aptlySnapshotMerge(cmd *commander.Command, args []string) error { destination := deb.NewSnapshotFromRefList(args[0], sources, result, fmt.Sprintf("Merged from sources: %s", strings.Join(sourceDescription, ", "))) - err = collectionFactory.SnapshotCollection().Add(destination) + err = collectionFactory.SnapshotCollection().Add(destination, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to create snapshot: %s", err) } diff --git a/cmd/snapshot_pull.go b/cmd/snapshot_pull.go index 884b50ffb..50e8488b8 100644 --- a/cmd/snapshot_pull.go +++ b/cmd/snapshot_pull.go @@ -29,7 +29,7 @@ func aptlySnapshotPull(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to pull: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to pull: %s", err) } @@ -40,7 +40,7 @@ func aptlySnapshotPull(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to pull: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(source) + err = collectionFactory.SnapshotCollection().LoadComplete(source, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to pull: %s", err) } @@ -138,7 +138,7 @@ func aptlySnapshotPull(cmd *commander.Command, args []string) error { destination := deb.NewSnapshotFromPackageList(args[2], []*deb.Snapshot{snapshot, source}, packageList, fmt.Sprintf("Pulled into '%s' with '%s' as source, pull request was: '%s'", snapshot.Name, source.Name, strings.Join(args[3:], " "))) - err = collectionFactory.SnapshotCollection().Add(destination) + err = collectionFactory.SnapshotCollection().Add(destination, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to create snapshot: %s", err) } diff --git a/cmd/snapshot_rename.go b/cmd/snapshot_rename.go index b8ac74cf3..b13b7dca6 100644 --- a/cmd/snapshot_rename.go +++ b/cmd/snapshot_rename.go @@ -32,7 +32,7 @@ func aptlySnapshotRename(cmd *commander.Command, args []string) error { } snapshot.Name = newName - err = collectionFactory.SnapshotCollection().Update(snapshot) + err = collectionFactory.SnapshotCollection().Update(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to rename: %s", err) } diff --git a/cmd/snapshot_search.go b/cmd/snapshot_search.go index d771af7ce..c0244f11c 100644 --- a/cmd/snapshot_search.go +++ b/cmd/snapshot_search.go @@ -25,7 +25,7 @@ func aptlySnapshotMirrorRepoSearch(cmd *commander.Command, args []string) error command := cmd.Parent.Name() collectionFactory := context.NewCollectionFactory() - var reflist *deb.PackageRefList + var reflist *deb.SplitRefList if command == "snapshot" { // nolint: goconst var snapshot *deb.Snapshot @@ -34,7 +34,7 @@ func aptlySnapshotMirrorRepoSearch(cmd *commander.Command, args []string) error return fmt.Errorf("unable to search: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to search: %s", err) } @@ -47,7 +47,7 @@ func aptlySnapshotMirrorRepoSearch(cmd *commander.Command, args []string) error return fmt.Errorf("unable to search: %s", err) } - err = collectionFactory.RemoteRepoCollection().LoadComplete(repo) + err = collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to search: %s", err) } @@ -60,7 +60,7 @@ func aptlySnapshotMirrorRepoSearch(cmd *commander.Command, args []string) error return fmt.Errorf("unable to search: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to search: %s", err) } diff --git a/cmd/snapshot_show.go b/cmd/snapshot_show.go index e03a49e58..582b6a9e3 100644 --- a/cmd/snapshot_show.go +++ b/cmd/snapshot_show.go @@ -35,7 +35,7 @@ func aptlySnapshotShowTxt(_ *commander.Command, args []string) error { return fmt.Errorf("unable to show: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to show: %s", err) } @@ -86,16 +86,17 @@ func aptlySnapshotShowTxt(_ *commander.Command, args []string) error { } func aptlySnapshotShowJSON(_ *commander.Command, args []string) error { + collectionFactory := context.NewCollectionFactory() var err error name := args[0] - snapshot, err := context.NewCollectionFactory().SnapshotCollection().ByName(name) + snapshot, err := collectionFactory.SnapshotCollection().ByName(name) if err != nil { return fmt.Errorf("unable to show: %s", err) } - err = context.NewCollectionFactory().SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to show: %s", err) } @@ -105,14 +106,14 @@ func aptlySnapshotShowJSON(_ *commander.Command, args []string) error { for _, sourceID := range snapshot.SourceIDs { if snapshot.SourceKind == deb.SourceSnapshot { var source *deb.Snapshot - source, err = context.NewCollectionFactory().SnapshotCollection().ByUUID(sourceID) + source, err = collectionFactory.SnapshotCollection().ByUUID(sourceID) if err != nil { continue } snapshot.Snapshots = append(snapshot.Snapshots, source) } else if snapshot.SourceKind == deb.SourceLocalRepo { var source *deb.LocalRepo - source, err = context.NewCollectionFactory().LocalRepoCollection().ByUUID(sourceID) + source, err = collectionFactory.LocalRepoCollection().ByUUID(sourceID) if err != nil { continue } @@ -133,7 +134,7 @@ func aptlySnapshotShowJSON(_ *commander.Command, args []string) error { if withPackages { if snapshot.RefList() != nil { var list *deb.PackageList - list, err = deb.NewPackageListFromRefList(snapshot.RefList(), context.NewCollectionFactory().PackageCollection(), context.Progress()) + list, err = deb.NewPackageListFromRefList(snapshot.RefList(), collectionFactory.PackageCollection(), context.Progress()) if err != nil { return fmt.Errorf("unable to get package list: %s", err) } diff --git a/cmd/snapshot_verify.go b/cmd/snapshot_verify.go index f815f29ce..fc566aae3 100644 --- a/cmd/snapshot_verify.go +++ b/cmd/snapshot_verify.go @@ -23,7 +23,7 @@ func aptlySnapshotVerify(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to verify: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshots[i]) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshots[i], collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to verify: %s", err) } diff --git a/database/database.go b/database/database.go index 709a1aa80..03d896b20 100644 --- a/database/database.go +++ b/database/database.go @@ -48,6 +48,8 @@ type Storage interface { CreateTemporary() (Storage, error) + GetRecommendedMaxKVSize() int + Open() error Close() error CompactDB() error diff --git a/database/goleveldb/database.go b/database/goleveldb/database.go index a2874a6e6..011681a6b 100644 --- a/database/goleveldb/database.go +++ b/database/goleveldb/database.go @@ -9,10 +9,13 @@ import ( "github.com/aptly-dev/aptly/database" ) +const blockSize = 4 * 1024 + func internalOpen(path string, throttleCompaction bool) (*leveldb.DB, error) { o := &opt.Options{ Filter: filter.NewBloomFilter(10), OpenFilesCacheCapacity: 256, + BlockSize: blockSize, } if throttleCompaction { diff --git a/database/goleveldb/storage.go b/database/goleveldb/storage.go index 37acf3d83..1281f3fbc 100644 --- a/database/goleveldb/storage.go +++ b/database/goleveldb/storage.go @@ -16,6 +16,17 @@ type storage struct { db *leveldb.DB } +func (s *storage) GetRecommendedMaxKVSize() int { + // The block size configured is not actually a *set* block size, but rather a + // *minimum*. LevelDB only checks if a block is full after a new key/value pair is + // written, meaning that blocks will tend to overflow a bit. + // Therefore, using the default block size as the max value size will ensure + // that a new block will only contain a single value and that the size will + // only ever be as large as around double the block size (if the block was + // nearly full before the new items were added). + return blockSize +} + // CreateTemporary creates new DB of the same type in temp dir func (s *storage) CreateTemporary() (database.Storage, error) { tempdir, err := os.MkdirTemp("", "aptly") diff --git a/deb/changes.go b/deb/changes.go index c264986ab..6c8bf8812 100644 --- a/deb/changes.go +++ b/deb/changes.go @@ -291,7 +291,8 @@ func CollectChangesFiles(locations []string, reporter aptly.ResultReporter) (cha // ImportChangesFiles imports referenced files in changes files into local repository func ImportChangesFiles(changesFiles []string, reporter aptly.ResultReporter, acceptUnsigned, ignoreSignatures, forceReplace, noRemoveFiles bool, verifier pgp.Verifier, repoTemplate *template.Template, progress aptly.Progress, localRepoCollection *LocalRepoCollection, packageCollection *PackageCollection, - pool aptly.PackagePool, checksumStorageProvider aptly.ChecksumStorageProvider, uploaders *Uploaders, parseQuery parseQuery) (processedFiles []string, failedFiles []string, err error) { + reflistCollection *RefListCollection, pool aptly.PackagePool, checksumStorageProvider aptly.ChecksumStorageProvider, uploaders *Uploaders, + parseQuery parseQuery) (processedFiles []string, failedFiles []string, err error) { for _, path := range changesFiles { var changes *Changes @@ -359,7 +360,7 @@ func ImportChangesFiles(changesFiles []string, reporter aptly.ResultReporter, ac } } - err = localRepoCollection.LoadComplete(repo) + err = localRepoCollection.LoadComplete(repo, reflistCollection) if err != nil { return nil, nil, fmt.Errorf("unable to load repo: %s", err) } @@ -382,9 +383,9 @@ func ImportChangesFiles(changesFiles []string, reporter aptly.ResultReporter, ac return nil, nil, fmt.Errorf("unable to import package files: %s", err) } - repo.UpdateRefList(NewPackageRefListFromPackageList(list)) + repo.UpdateRefList(NewSplitRefListFromPackageList(list)) - err = localRepoCollection.Update(repo) + err = localRepoCollection.Update(repo, reflistCollection) if err != nil { return nil, nil, fmt.Errorf("unable to save: %s", err) } diff --git a/deb/changes_test.go b/deb/changes_test.go index b7dc4d95d..1d50d610a 100644 --- a/deb/changes_test.go +++ b/deb/changes_test.go @@ -21,6 +21,7 @@ type ChangesSuite struct { db database.Storage localRepoCollection *LocalRepoCollection packageCollection *PackageCollection + reflistCollection *RefListCollection packagePool aptly.PackagePool checksumStorage aptly.ChecksumStorage progress aptly.Progress @@ -42,6 +43,7 @@ func (s *ChangesSuite) SetUpTest(c *C) { s.db, _ = goleveldb.NewOpenDB(c.MkDir()) s.localRepoCollection = NewLocalRepoCollection(s.db) s.packageCollection = NewPackageCollection(s.db) + s.reflistCollection = NewRefListCollection(s.db) s.checksumStorage = files.NewMockChecksumStorage() s.packagePool = files.NewPackagePool(s.Dir, false) @@ -88,7 +90,7 @@ func (s *ChangesSuite) TestCollectChangesFiles(c *C) { func (s *ChangesSuite) TestImportChangesFiles(c *C) { repo := NewLocalRepo("test", "Test Comment") - c.Assert(s.localRepoCollection.Add(repo), IsNil) + c.Assert(s.localRepoCollection.Add(repo, s.reflistCollection), IsNil) origFailedFiles := []string{ "testdata/changes/calamares.changes", @@ -124,7 +126,8 @@ func (s *ChangesSuite) TestImportChangesFiles(c *C) { processedFiles, failedFiles, err := ImportChangesFiles( append(changesFiles, "testdata/changes/notexistent.changes"), s.Reporter, true, true, false, false, &NullVerifier{}, - template.Must(template.New("test").Parse("test")), s.progress, s.localRepoCollection, s.packageCollection, s.packagePool, func(database.ReaderWriter) aptly.ChecksumStorage { return s.checksumStorage }, + template.Must(template.New("test").Parse("test")), s.progress, s.localRepoCollection, s.packageCollection, s.reflistCollection, s.packagePool, + func(database.ReaderWriter) aptly.ChecksumStorage { return s.checksumStorage }, nil, nil) c.Assert(err, IsNil) c.Check(failedFiles, DeepEquals, append(expectedFailedFiles, "testdata/changes/notexistent.changes")) @@ -133,7 +136,7 @@ func (s *ChangesSuite) TestImportChangesFiles(c *C) { func (s *ChangesSuite) TestImportDbgsymWithVersionedSourceField(c *C) { repo := NewLocalRepo("test", "Test Comment") - c.Assert(s.localRepoCollection.Add(repo), IsNil) + c.Assert(s.localRepoCollection.Add(repo, s.reflistCollection), IsNil) changesFiles, failedFiles := CollectChangesFiles( []string{"testdata/dbgsym-with-source-version"}, s.Reporter) @@ -142,7 +145,8 @@ func (s *ChangesSuite) TestImportDbgsymWithVersionedSourceField(c *C) { _, failedFiles, err := ImportChangesFiles( changesFiles, s.Reporter, true, true, false, true, &NullVerifier{}, - template.Must(template.New("test").Parse("test")), s.progress, s.localRepoCollection, s.packageCollection, s.packagePool, func(database.ReaderWriter) aptly.ChecksumStorage { return s.checksumStorage }, + template.Must(template.New("test").Parse("test")), s.progress, s.localRepoCollection, s.packageCollection, s.reflistCollection, s.packagePool, + func(database.ReaderWriter) aptly.ChecksumStorage { return s.checksumStorage }, nil, nil) c.Assert(err, IsNil) c.Check(failedFiles, IsNil) diff --git a/deb/collections.go b/deb/collections.go index 7dfe85235..ff711e265 100644 --- a/deb/collections.go +++ b/deb/collections.go @@ -16,6 +16,7 @@ type CollectionFactory struct { snapshots *SnapshotCollection localRepos *LocalRepoCollection publishedRepos *PublishedRepoCollection + reflists *RefListCollection checksums *ChecksumCollection } @@ -91,6 +92,17 @@ func (factory *CollectionFactory) PublishedRepoCollection() *PublishedRepoCollec return factory.publishedRepos } +func (factory *CollectionFactory) RefListCollection() *RefListCollection { + factory.Lock() + defer factory.Unlock() + + if factory.reflists == nil { + factory.reflists = NewRefListCollection(factory.db) + } + + return factory.reflists +} + // ChecksumCollection returns (or creates) new ChecksumCollection func (factory *CollectionFactory) ChecksumCollection(db database.ReaderWriter) aptly.ChecksumStorage { factory.Lock() diff --git a/deb/graph.go b/deb/graph.go index 16a7ce854..77421c3a5 100644 --- a/deb/graph.go +++ b/deb/graph.go @@ -33,7 +33,7 @@ func BuildGraph(collectionFactory *CollectionFactory, layout string) (gographviz existingNodes := map[string]bool{} err = collectionFactory.RemoteRepoCollection().ForEach(func(repo *RemoteRepo) error { - e := collectionFactory.RemoteRepoCollection().LoadComplete(repo) + e := collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if e != nil { return e } @@ -55,7 +55,7 @@ func BuildGraph(collectionFactory *CollectionFactory, layout string) (gographviz } err = collectionFactory.LocalRepoCollection().ForEach(func(repo *LocalRepo) error { - e := collectionFactory.LocalRepoCollection().LoadComplete(repo) + e := collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if e != nil { return e } @@ -81,7 +81,7 @@ func BuildGraph(collectionFactory *CollectionFactory, layout string) (gographviz }) err = collectionFactory.SnapshotCollection().ForEach(func(snapshot *Snapshot) error { - e := collectionFactory.SnapshotCollection().LoadComplete(snapshot) + e := collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if e != nil { return e } diff --git a/deb/list.go b/deb/list.go index f9a8244ac..a99cf19ef 100644 --- a/deb/list.go +++ b/deb/list.go @@ -90,7 +90,7 @@ func NewPackageListWithDuplicates(duplicates bool, capacity int) *PackageList { } // NewPackageListFromRefList loads packages list from PackageRefList -func NewPackageListFromRefList(reflist *PackageRefList, collection *PackageCollection, progress aptly.Progress) (*PackageList, error) { +func NewPackageListFromRefList(reflist AnyRefList, collection *PackageCollection, progress aptly.Progress) (*PackageList, error) { // empty reflist if reflist == nil { return NewPackageList(), nil diff --git a/deb/local.go b/deb/local.go index 1b09fdbda..d88904c28 100644 --- a/deb/local.go +++ b/deb/local.go @@ -26,7 +26,7 @@ type LocalRepo struct { // Uploaders configuration Uploaders *Uploaders `codec:"Uploaders,omitempty" json:"-"` // "Snapshot" of current list of packages - packageRefs *PackageRefList + packageRefs *SplitRefList } // NewLocalRepo creates new instance of Debian local repository @@ -48,20 +48,17 @@ func (repo *LocalRepo) String() string { // NumPackages return number of packages in local repo func (repo *LocalRepo) NumPackages() int { - if repo.packageRefs == nil { - return 0 - } return repo.packageRefs.Len() } // RefList returns package list for repo -func (repo *LocalRepo) RefList() *PackageRefList { +func (repo *LocalRepo) RefList() *SplitRefList { return repo.packageRefs } // UpdateRefList changes package list for local repo -func (repo *LocalRepo) UpdateRefList(reflist *PackageRefList) { - repo.packageRefs = reflist +func (repo *LocalRepo) UpdateRefList(sl *SplitRefList) { + repo.packageRefs = sl } // Encode does msgpack encoding of LocalRepo @@ -140,14 +137,14 @@ func (collection *LocalRepoCollection) search(filter func(*LocalRepo) bool, uniq } // Add appends new repo to collection and saves it -func (collection *LocalRepoCollection) Add(repo *LocalRepo) error { +func (collection *LocalRepoCollection) Add(repo *LocalRepo, reflistCollection *RefListCollection) error { _, err := collection.ByName(repo.Name) if err == nil { return fmt.Errorf("local repo with name %s already exists", repo.Name) } - err = collection.Update(repo) + err = collection.Update(repo, reflistCollection) if err != nil { return err } @@ -157,27 +154,25 @@ func (collection *LocalRepoCollection) Add(repo *LocalRepo) error { } // Update stores updated information about repo in DB -func (collection *LocalRepoCollection) Update(repo *LocalRepo) error { +func (collection *LocalRepoCollection) Update(repo *LocalRepo, reflistCollection *RefListCollection) error { batch := collection.db.CreateBatch() batch.Put(repo.Key(), repo.Encode()) if repo.packageRefs != nil { - batch.Put(repo.RefKey(), repo.packageRefs.Encode()) + bc := reflistCollection.NewBatch(batch) + reflistCollection.UpdateInBatch(repo.packageRefs, repo.RefKey(), bc) } return batch.Write() } // LoadComplete loads additional information for local repo -func (collection *LocalRepoCollection) LoadComplete(repo *LocalRepo) error { - encoded, err := collection.db.Get(repo.RefKey()) +func (collection *LocalRepoCollection) LoadComplete(repo *LocalRepo, reflistCollection *RefListCollection) error { + repo.packageRefs = NewSplitRefList() + err := reflistCollection.LoadComplete(repo.packageRefs, repo.RefKey()) if err == database.ErrNotFound { return nil } - if err != nil { - return err - } - repo.packageRefs = &PackageRefList{} - return repo.packageRefs.Decode(encoded) + return err } // ByName looks up repository by name diff --git a/deb/local_test.go b/deb/local_test.go index c9072b7dc..b87b1b624 100644 --- a/deb/local_test.go +++ b/deb/local_test.go @@ -12,7 +12,7 @@ import ( type LocalRepoSuite struct { db database.Storage list *PackageList - reflist *PackageRefList + reflist *SplitRefList repo *LocalRepo } @@ -24,7 +24,7 @@ func (s *LocalRepoSuite) SetUpTest(c *C) { s.list.Add(&Package{Name: "lib", Version: "1.7", Architecture: "i386"}) s.list.Add(&Package{Name: "app", Version: "1.9", Architecture: "amd64"}) - s.reflist = NewPackageRefListFromPackageList(s.list) + s.reflist = NewSplitRefListFromPackageList(s.list) s.repo = NewLocalRepo("lrepo", "Super repo") s.repo.packageRefs = s.reflist @@ -75,10 +75,11 @@ func (s *LocalRepoSuite) TestRefKey(c *C) { } type LocalRepoCollectionSuite struct { - db database.Storage - collection *LocalRepoCollection - list *PackageList - reflist *PackageRefList + db database.Storage + collection *LocalRepoCollection + reflistCollection *RefListCollection + list *PackageList + reflist *SplitRefList } var _ = Suite(&LocalRepoCollectionSuite{}) @@ -86,12 +87,13 @@ var _ = Suite(&LocalRepoCollectionSuite{}) func (s *LocalRepoCollectionSuite) SetUpTest(c *C) { s.db, _ = goleveldb.NewOpenDB(c.MkDir()) s.collection = NewLocalRepoCollection(s.db) + s.reflistCollection = NewRefListCollection(s.db) s.list = NewPackageList() s.list.Add(&Package{Name: "lib", Version: "1.7", Architecture: "i386"}) s.list.Add(&Package{Name: "app", Version: "1.9", Architecture: "amd64"}) - s.reflist = NewPackageRefListFromPackageList(s.list) + s.reflist = NewSplitRefListFromRefList(NewPackageRefListFromPackageList(s.list)) } func (s *LocalRepoCollectionSuite) TearDownTest(c *C) { @@ -103,8 +105,8 @@ func (s *LocalRepoCollectionSuite) TestAddByName(c *C) { c.Assert(err, ErrorMatches, "*.not found") repo := NewLocalRepo("local1", "Comment 1") - c.Assert(s.collection.Add(repo), IsNil) - c.Assert(s.collection.Add(repo), ErrorMatches, ".*already exists") + c.Assert(s.collection.Add(repo, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(repo, s.reflistCollection), ErrorMatches, ".*already exists") r, err := s.collection.ByName("local1") c.Assert(err, IsNil) @@ -121,7 +123,7 @@ func (s *LocalRepoCollectionSuite) TestByUUID(c *C) { c.Assert(err, ErrorMatches, "*.not found") repo := NewLocalRepo("local1", "Comment 1") - c.Assert(s.collection.Add(repo), IsNil) + c.Assert(s.collection.Add(repo, s.reflistCollection), IsNil) r, err := s.collection.ByUUID(repo.UUID) c.Assert(err, IsNil) @@ -135,7 +137,7 @@ func (s *LocalRepoCollectionSuite) TestByUUID(c *C) { func (s *LocalRepoCollectionSuite) TestUpdateLoadComplete(c *C) { repo := NewLocalRepo("local1", "Comment 1") - c.Assert(s.collection.Update(repo), IsNil) + c.Assert(s.collection.Update(repo, s.reflistCollection), IsNil) collection := NewLocalRepoCollection(s.db) r, err := collection.ByName("local1") @@ -143,20 +145,20 @@ func (s *LocalRepoCollectionSuite) TestUpdateLoadComplete(c *C) { c.Assert(r.packageRefs, IsNil) repo.packageRefs = s.reflist - c.Assert(s.collection.Update(repo), IsNil) + c.Assert(s.collection.Update(repo, s.reflistCollection), IsNil) collection = NewLocalRepoCollection(s.db) r, err = collection.ByName("local1") c.Assert(err, IsNil) c.Assert(r.packageRefs, IsNil) c.Assert(r.NumPackages(), Equals, 0) - c.Assert(s.collection.LoadComplete(r), IsNil) + c.Assert(s.collection.LoadComplete(r, s.reflistCollection), IsNil) c.Assert(r.NumPackages(), Equals, 2) } func (s *LocalRepoCollectionSuite) TestForEachAndLen(c *C) { repo := NewLocalRepo("local1", "Comment 1") - s.collection.Add(repo) + s.collection.Add(repo, s.reflistCollection) count := 0 err := s.collection.ForEach(func(*LocalRepo) error { @@ -178,10 +180,10 @@ func (s *LocalRepoCollectionSuite) TestForEachAndLen(c *C) { func (s *LocalRepoCollectionSuite) TestDrop(c *C) { repo1 := NewLocalRepo("local1", "Comment 1") - s.collection.Add(repo1) + s.collection.Add(repo1, s.reflistCollection) repo2 := NewLocalRepo("local2", "Comment 2") - s.collection.Add(repo2) + s.collection.Add(repo2, s.reflistCollection) r1, _ := s.collection.ByUUID(repo1.UUID) c.Check(r1, Equals, repo1) diff --git a/deb/publish.go b/deb/publish.go index 79c47cb83..5cf847bea 100644 --- a/deb/publish.go +++ b/deb/publish.go @@ -27,7 +27,7 @@ type repoSourceItem struct { // Pointer to local repo if SourceKind == "local" localRepo *LocalRepo // Package references is SourceKind == "local" - packageRefs *PackageRefList + packageRefs *SplitRefList } // PublishedRepo is a published for http/ftp representation of snapshot as Debian repository @@ -399,7 +399,7 @@ func (p *PublishedRepo) RefKey(component string) []byte { } // RefList returns list of package refs in local repo -func (p *PublishedRepo) RefList(component string) *PackageRefList { +func (p *PublishedRepo) RefList(component string) *SplitRefList { item := p.sourceItems[component] if p.SourceKind == SourceLocalRepo { return item.packageRefs @@ -958,14 +958,14 @@ func (collection *PublishedRepoCollection) loadList() { } // Add appends new repo to collection and saves it -func (collection *PublishedRepoCollection) Add(repo *PublishedRepo) error { +func (collection *PublishedRepoCollection) Add(repo *PublishedRepo, reflistCollection *RefListCollection) error { collection.loadList() if collection.CheckDuplicate(repo) != nil { return fmt.Errorf("published repo with storage/prefix/distribution %s/%s/%s already exists", repo.Storage, repo.Prefix, repo.Distribution) } - err := collection.Update(repo) + err := collection.Update(repo, reflistCollection) if err != nil { return err } @@ -988,13 +988,14 @@ func (collection *PublishedRepoCollection) CheckDuplicate(repo *PublishedRepo) * } // Update stores updated information about repo in DB -func (collection *PublishedRepoCollection) Update(repo *PublishedRepo) error { +func (collection *PublishedRepoCollection) Update(repo *PublishedRepo, reflistCollection *RefListCollection) error { batch := collection.db.CreateBatch() batch.Put(repo.Key(), repo.Encode()) if repo.SourceKind == SourceLocalRepo { + rb := reflistCollection.NewBatch(batch) for component, item := range repo.sourceItems { - batch.Put(repo.RefKey(component), item.packageRefs.Encode()) + reflistCollection.UpdateInBatch(item.packageRefs, repo.RefKey(component), rb) } } return batch.Write() @@ -1027,7 +1028,7 @@ func (collection *PublishedRepoCollection) LoadShallow(repo *PublishedRepo, coll return } - item.packageRefs = &PackageRefList{} + item.packageRefs = NewSplitRefList() repo.sourceItems[component] = item } } else { @@ -1043,35 +1044,29 @@ func (collection *PublishedRepoCollection) LoadComplete(repo *PublishedRepo, col if repo.SourceKind == SourceSnapshot { for _, item := range repo.sourceItems { - err = collectionFactory.SnapshotCollection().LoadComplete(item.snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(item.snapshot, collectionFactory.RefListCollection()) if err != nil { return } } } else if repo.SourceKind == SourceLocalRepo { for component, item := range repo.sourceItems { - err = collectionFactory.LocalRepoCollection().LoadComplete(item.localRepo) + err = collectionFactory.LocalRepoCollection().LoadComplete(item.localRepo, collectionFactory.RefListCollection()) if err != nil { return } - var encoded []byte - encoded, err = collection.db.Get(repo.RefKey(component)) + err = collectionFactory.RefListCollection().LoadComplete(item.packageRefs, repo.RefKey(component)) if err != nil { // < 0.6 saving w/o component name if err == database.ErrNotFound && len(repo.Sources) == 1 { - encoded, err = collection.db.Get(repo.RefKey("")) + err = collectionFactory.RefListCollection().LoadComplete(item.packageRefs, repo.RefKey("")) } if err != nil { return } } - - err = item.packageRefs.Decode(encoded) - if err != nil { - return - } } } else { panic("unknown SourceKind") @@ -1176,6 +1171,11 @@ func (collection *PublishedRepoCollection) listReferencedFilesByComponent(prefix referencedFiles := map[string][]string{} processedComponentRefs := map[string]*PackageRefList{} + processedComponentBuckets := map[string]*RefListDigestSet{} + for _, component := range components { + processedComponentBuckets[component] = NewRefListDigestSet() + } + for _, r := range collection.list { if r.Prefix == prefix { matches := false @@ -1199,36 +1199,51 @@ func (collection *PublishedRepoCollection) listReferencedFilesByComponent(prefix for _, component := range components { if utils.StrSliceHasItem(repoComponents, component) { - unseenRefs := r.RefList(component) - processedRefs := processedComponentRefs[component] - if processedRefs != nil { - unseenRefs = unseenRefs.Subtract(processedRefs) - } else { - processedRefs = NewPackageRefList() - } + processedBuckets := processedComponentBuckets[component] - if unseenRefs.Len() == 0 { - continue - } - processedComponentRefs[component] = processedRefs.Merge(unseenRefs, false, true) + err := r.RefList(component).ForEachBucket(func(digest []byte, bucket *PackageRefList) error { + if processedBuckets.Has(digest) { + return nil + } + processedBuckets.Add(digest) - packageList, err := NewPackageListFromRefList(unseenRefs, collectionFactory.PackageCollection(), progress) - if err != nil { - return nil, err - } + unseenRefs := bucket + processedRefs := processedComponentRefs[component] + if processedRefs != nil { + unseenRefs = unseenRefs.Subtract(processedRefs) + } else { + processedRefs = NewPackageRefList() + } - packageList.ForEach(func(p *Package) error { - poolDir, err := p.PoolDirectory() + if unseenRefs.Len() == 0 { + return nil + } + processedComponentRefs[component] = processedRefs.Merge(unseenRefs, false, true) + + packageList, err := NewPackageListFromRefList(unseenRefs, collectionFactory.PackageCollection(), progress) if err != nil { return err } - for _, f := range p.Files() { - referencedFiles[component] = append(referencedFiles[component], filepath.Join(poolDir, f.Filename)) - } + packageList.ForEach(func(p *Package) error { + poolDir, err := p.PoolDirectory() + if err != nil { + return err + } + + for _, f := range p.Files() { + referencedFiles[component] = append(referencedFiles[component], filepath.Join(poolDir, f.Filename)) + } + + return nil + }) return nil }) + + if err != nil { + return nil, err + } } } } diff --git a/deb/publish_bench_test.go b/deb/publish_bench_test.go index 86f18c301..29b17bcc8 100644 --- a/deb/publish_bench_test.go +++ b/deb/publish_bench_test.go @@ -31,6 +31,7 @@ func BenchmarkListReferencedFiles(b *testing.B) { packageCollection := factory.PackageCollection() repoCollection := factory.LocalRepoCollection() publishCollection := factory.PublishedRepoCollection() + reflistCollection := factory.RefListCollection() sharedRefs := NewPackageRefList() { @@ -91,14 +92,14 @@ func BenchmarkListReferencedFiles(b *testing.B) { repo := NewLocalRepo(fmt.Sprintf("repo%d", repoIndex), "comment") repo.DefaultDistribution = fmt.Sprintf("dist%d", repoIndex) repo.DefaultComponent = defaultComponent - repo.UpdateRefList(refs.Merge(sharedRefs, false, true)) - repoCollection.Add(repo) + repo.UpdateRefList(NewSplitRefListFromRefList(refs.Merge(sharedRefs, false, true))) + repoCollection.Add(repo, reflistCollection) publish, err := NewPublishedRepo("", "test", "", nil, []string{defaultComponent}, []interface{}{repo}, factory, false) if err != nil { b.Fatal(err) } - publishCollection.Add(publish) + publishCollection.Add(publish, reflistCollection) } db.CompactDB() diff --git a/deb/publish_test.go b/deb/publish_test.go index 5243a41a6..9c1838749 100644 --- a/deb/publish_test.go +++ b/deb/publish_test.go @@ -82,6 +82,7 @@ type PublishedRepoSuite struct { db database.Storage factory *CollectionFactory packageCollection *PackageCollection + reflistCollection *RefListCollection } var _ = Suite(&PublishedRepoSuite{}) @@ -113,21 +114,22 @@ func (s *PublishedRepoSuite) SetUpTest(c *C) { s.p2.UpdateFiles(s.p1.Files()) s.p3.UpdateFiles(s.p1.Files()) - s.reflist = NewPackageRefListFromPackageList(s.list) + s.reflist = NewSplitRefListFromPackageList(s.list) + s.reflistCollection = s.factory.RefListCollection() repo, _ := NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) repo.packageRefs = s.reflist - s.factory.RemoteRepoCollection().Add(repo) + s.factory.RemoteRepoCollection().Add(repo, s.reflistCollection) s.localRepo = NewLocalRepo("local1", "comment1") s.localRepo.packageRefs = s.reflist - s.factory.LocalRepoCollection().Add(s.localRepo) + s.factory.LocalRepoCollection().Add(s.localRepo, s.reflistCollection) s.snapshot, _ = NewSnapshotFromRepository("snap", repo) - s.factory.SnapshotCollection().Add(s.snapshot) + s.factory.SnapshotCollection().Add(s.snapshot, s.reflistCollection) s.snapshot2, _ = NewSnapshotFromRepository("snap", repo) - s.factory.SnapshotCollection().Add(s.snapshot2) + s.factory.SnapshotCollection().Add(s.snapshot2, s.reflistCollection) s.packageCollection = s.factory.PackageCollection() s.packageCollection.Update(s.p1) @@ -336,7 +338,7 @@ func (s *PublishedRepoSuite) TestDistributionComponentGuessing(c *C) { s.localRepo.DefaultDistribution = "precise" s.localRepo.DefaultComponent = "contrib" - s.factory.LocalRepoCollection().Update(s.localRepo) + s.factory.LocalRepoCollection().Update(s.localRepo, s.reflistCollection) repo, err = NewPublishedRepo("", "ppa", "", nil, []string{""}, []interface{}{s.localRepo}, s.factory, false) c.Check(err, IsNil) @@ -494,6 +496,7 @@ type PublishedRepoCollectionSuite struct { db database.Storage factory *CollectionFactory snapshotCollection *SnapshotCollection + reflistCollection *RefListCollection collection *PublishedRepoCollection snap1, snap2 *Snapshot localRepo *LocalRepo @@ -509,22 +512,23 @@ func (s *PublishedRepoCollectionSuite) SetUpTest(c *C) { s.factory = NewCollectionFactory(s.db) s.snapshotCollection = s.factory.SnapshotCollection() + s.reflistCollection = s.factory.RefListCollection() snap1Refs := NewPackageRefList() snap1Refs.Refs = [][]byte{s.p1.Key(""), s.p2.Key("")} sort.Sort(snap1Refs) - s.snap1 = NewSnapshotFromRefList("snap1", []*Snapshot{}, snap1Refs, "desc1") + s.snap1 = NewSnapshotFromRefList("snap1", []*Snapshot{}, NewSplitRefListFromRefList(snap1Refs), "desc1") snap2Refs := NewPackageRefList() snap2Refs.Refs = [][]byte{s.p3.Key("")} sort.Sort(snap2Refs) - s.snap2 = NewSnapshotFromRefList("snap2", []*Snapshot{}, snap2Refs, "desc2") + s.snap2 = NewSnapshotFromRefList("snap2", []*Snapshot{}, NewSplitRefListFromRefList(snap2Refs), "desc2") - s.snapshotCollection.Add(s.snap1) - s.snapshotCollection.Add(s.snap2) + s.snapshotCollection.Add(s.snap1, s.reflistCollection) + s.snapshotCollection.Add(s.snap2, s.reflistCollection) s.localRepo = NewLocalRepo("local1", "comment1") - s.factory.LocalRepoCollection().Add(s.localRepo) + s.factory.LocalRepoCollection().Add(s.localRepo, s.reflistCollection) s.repo1, _ = NewPublishedRepo("", "ppa", "anaconda", []string{}, []string{"main"}, []interface{}{s.snap1}, s.factory, false) s.repo2, _ = NewPublishedRepo("", "", "anaconda", []string{}, []string{"main", "contrib"}, []interface{}{s.snap2, s.snap1}, s.factory, false) @@ -543,14 +547,14 @@ func (s *PublishedRepoCollectionSuite) TestAddByStoragePrefixDistribution(c *C) _, err := s.collection.ByStoragePrefixDistribution("", "ppa", "anaconda") c.Assert(err, ErrorMatches, "*.not found") - c.Assert(s.collection.Add(s.repo1), IsNil) - c.Assert(s.collection.Add(s.repo1), ErrorMatches, ".*already exists") + c.Assert(s.collection.Add(s.repo1, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.repo1, s.reflistCollection), ErrorMatches, ".*already exists") c.Assert(s.collection.CheckDuplicate(s.repo2), IsNil) - c.Assert(s.collection.Add(s.repo2), IsNil) - c.Assert(s.collection.Add(s.repo3), ErrorMatches, ".*already exists") + c.Assert(s.collection.Add(s.repo2, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.repo3, s.reflistCollection), ErrorMatches, ".*already exists") c.Assert(s.collection.CheckDuplicate(s.repo3), Equals, s.repo1) - c.Assert(s.collection.Add(s.repo4), IsNil) - c.Assert(s.collection.Add(s.repo5), IsNil) + c.Assert(s.collection.Add(s.repo4, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.repo5, s.reflistCollection), IsNil) r, err := s.collection.ByStoragePrefixDistribution("", "ppa", "anaconda") c.Assert(err, IsNil) @@ -576,7 +580,7 @@ func (s *PublishedRepoCollectionSuite) TestByUUID(c *C) { _, err := s.collection.ByUUID(s.repo1.UUID) c.Assert(err, ErrorMatches, "*.not found") - c.Assert(s.collection.Add(s.repo1), IsNil) + c.Assert(s.collection.Add(s.repo1, s.reflistCollection), IsNil) r, err := s.collection.ByUUID(s.repo1.UUID) c.Assert(err, IsNil) @@ -587,8 +591,8 @@ func (s *PublishedRepoCollectionSuite) TestByUUID(c *C) { } func (s *PublishedRepoCollectionSuite) TestUpdateLoadComplete(c *C) { - c.Assert(s.collection.Update(s.repo1), IsNil) - c.Assert(s.collection.Update(s.repo4), IsNil) + c.Assert(s.collection.Update(s.repo1, s.reflistCollection), IsNil) + c.Assert(s.collection.Update(s.repo4, s.reflistCollection), IsNil) collection := NewPublishedRepoCollection(s.db) r, err := collection.ByStoragePrefixDistribution("", "ppa", "anaconda") @@ -636,7 +640,7 @@ func (s *PublishedRepoCollectionSuite) TestLoadPre0_6(c *C) { encoder.Encode(&old) c.Assert(s.db.Put(s.repo1.Key(), buf.Bytes()), IsNil) - c.Assert(s.db.Put(s.repo1.RefKey(""), s.localRepo.RefList().Encode()), IsNil) + c.Assert(s.db.Put(s.repo1.RefKey(""), NewPackageRefList().Encode()), IsNil) collection := NewPublishedRepoCollection(s.db) repo, err := collection.ByStoragePrefixDistribution("", "ppa", "anaconda") @@ -651,7 +655,7 @@ func (s *PublishedRepoCollectionSuite) TestLoadPre0_6(c *C) { } func (s *PublishedRepoCollectionSuite) TestForEachAndLen(c *C) { - s.collection.Add(s.repo1) + s.collection.Add(s.repo1, s.reflistCollection) count := 0 err := s.collection.ForEach(func(*PublishedRepo) error { @@ -672,17 +676,17 @@ func (s *PublishedRepoCollectionSuite) TestForEachAndLen(c *C) { } func (s *PublishedRepoCollectionSuite) TestBySnapshot(c *C) { - c.Check(s.collection.Add(s.repo1), IsNil) - c.Check(s.collection.Add(s.repo2), IsNil) + c.Check(s.collection.Add(s.repo1, s.reflistCollection), IsNil) + c.Check(s.collection.Add(s.repo2, s.reflistCollection), IsNil) c.Check(s.collection.BySnapshot(s.snap1), DeepEquals, []*PublishedRepo{s.repo1, s.repo2}) c.Check(s.collection.BySnapshot(s.snap2), DeepEquals, []*PublishedRepo{s.repo2}) } func (s *PublishedRepoCollectionSuite) TestByLocalRepo(c *C) { - c.Check(s.collection.Add(s.repo1), IsNil) - c.Check(s.collection.Add(s.repo4), IsNil) - c.Check(s.collection.Add(s.repo5), IsNil) + c.Check(s.collection.Add(s.repo1, s.reflistCollection), IsNil) + c.Check(s.collection.Add(s.repo4, s.reflistCollection), IsNil) + c.Check(s.collection.Add(s.repo5, s.reflistCollection), IsNil) c.Check(s.collection.ByLocalRepo(s.localRepo), DeepEquals, []*PublishedRepo{s.repo4, s.repo5}) } @@ -692,10 +696,10 @@ func (s *PublishedRepoCollectionSuite) TestListReferencedFiles(c *C) { c.Check(s.factory.PackageCollection().Update(s.p2), IsNil) c.Check(s.factory.PackageCollection().Update(s.p3), IsNil) - c.Check(s.collection.Add(s.repo1), IsNil) - c.Check(s.collection.Add(s.repo2), IsNil) - c.Check(s.collection.Add(s.repo4), IsNil) - c.Check(s.collection.Add(s.repo5), IsNil) + c.Check(s.collection.Add(s.repo1, s.reflistCollection), IsNil) + c.Check(s.collection.Add(s.repo2, s.reflistCollection), IsNil) + c.Check(s.collection.Add(s.repo4, s.reflistCollection), IsNil) + c.Check(s.collection.Add(s.repo5, s.reflistCollection), IsNil) files, err := s.collection.listReferencedFilesByComponent(".", []string{"main", "contrib"}, s.factory, nil) c.Assert(err, IsNil) @@ -711,12 +715,12 @@ func (s *PublishedRepoCollectionSuite) TestListReferencedFiles(c *C) { }) snap3 := NewSnapshotFromRefList("snap3", []*Snapshot{}, s.snap2.RefList(), "desc3") - s.snapshotCollection.Add(snap3) + s.snapshotCollection.Add(snap3, s.reflistCollection) // Ensure that adding a second publish point with matching files doesn't give duplicate results. repo3, err := NewPublishedRepo("", "", "anaconda-2", []string{}, []string{"main"}, []interface{}{snap3}, s.factory, false) c.Check(err, IsNil) - c.Check(s.collection.Add(repo3), IsNil) + c.Check(s.collection.Add(repo3, s.reflistCollection), IsNil) files, err = s.collection.listReferencedFilesByComponent(".", []string{"main", "contrib"}, s.factory, nil) c.Assert(err, IsNil) @@ -737,6 +741,7 @@ type PublishedRepoRemoveSuite struct { db database.Storage factory *CollectionFactory snapshotCollection *SnapshotCollection + reflistCollection *RefListCollection collection *PublishedRepoCollection root, root2 string provider *FakeStorageProvider @@ -752,10 +757,11 @@ func (s *PublishedRepoRemoveSuite) SetUpTest(c *C) { s.factory = NewCollectionFactory(s.db) s.snapshotCollection = s.factory.SnapshotCollection() + s.reflistCollection = s.factory.RefListCollection() s.snap1 = NewSnapshotFromPackageList("snap1", []*Snapshot{}, NewPackageList(), "desc1") - s.snapshotCollection.Add(s.snap1) + s.snapshotCollection.Add(s.snap1, s.reflistCollection) s.repo1, _ = NewPublishedRepo("", "ppa", "anaconda", []string{}, []string{"main"}, []interface{}{s.snap1}, s.factory, false) s.repo2, _ = NewPublishedRepo("", "", "anaconda", []string{}, []string{"main"}, []interface{}{s.snap1}, s.factory, false) @@ -764,11 +770,11 @@ func (s *PublishedRepoRemoveSuite) SetUpTest(c *C) { s.repo5, _ = NewPublishedRepo("files:other", "ppa", "osminog", []string{}, []string{"contrib"}, []interface{}{s.snap1}, s.factory, false) s.collection = s.factory.PublishedRepoCollection() - s.collection.Add(s.repo1) - s.collection.Add(s.repo2) - s.collection.Add(s.repo3) - s.collection.Add(s.repo4) - s.collection.Add(s.repo5) + s.collection.Add(s.repo1, s.reflistCollection) + s.collection.Add(s.repo2, s.reflistCollection) + s.collection.Add(s.repo3, s.reflistCollection) + s.collection.Add(s.repo4, s.reflistCollection) + s.collection.Add(s.repo5, s.reflistCollection) s.root = c.MkDir() s.publishedStorage = files.NewPublishedStorage(s.root, "", "") diff --git a/deb/reflist.go b/deb/reflist.go index 30396548c..e039de127 100644 --- a/deb/reflist.go +++ b/deb/reflist.go @@ -2,10 +2,15 @@ package deb import ( "bytes" + "crypto/sha256" + "encoding/base64" "encoding/json" + "fmt" "sort" "github.com/AlekSi/pointer" + "github.com/aptly-dev/aptly/database" + "github.com/cespare/xxhash/v2" "github.com/ugorji/go/codec" ) @@ -44,6 +49,13 @@ func NewPackageRefListFromPackageList(list *PackageList) *PackageRefList { return reflist } +func (l *PackageRefList) Clone() *PackageRefList { + clone := &PackageRefList{} + clone.Refs = make([][]byte, l.Len()) + copy(clone.Refs, l.Refs) + return clone +} + // Len returns number of refs func (l *PackageRefList) Len() int { return len(l.Refs) @@ -184,8 +196,12 @@ func (d PackageDiff) MarshalJSON() ([]byte, error) { type PackageDiffs []PackageDiff // Diff calculates difference between two reflists -func (l *PackageRefList) Diff(r *PackageRefList, packageCollection *PackageCollection) (result PackageDiffs, err error) { - result = make(PackageDiffs, 0, 128) +func (l *PackageRefList) Diff(r *PackageRefList, packageCollection *PackageCollection, result PackageDiffs) (PackageDiffs, error) { + var err error + + if result == nil { + result = make(PackageDiffs, 0, 128) + } // pointer to left and right reflists il, ir := 0, 0 @@ -258,7 +274,7 @@ func (l *PackageRefList) Diff(r *PackageRefList, packageCollection *PackageColle } } - return + return result, nil } // Merge merges reflist r into current reflist. If overrideMatching, merge @@ -391,3 +407,753 @@ func (l *PackageRefList) FilterLatestRefs() { lastArch, lastName, lastVer = arch, name, ver } } + +const ( + reflistBucketCount = 1 << 6 + reflistBucketMask = reflistBucketCount - 1 +) + +type reflistDigestArray [sha256.Size]byte + +func bucketRefPrefix(ref []byte) []byte { + const maxPrefixLen = 3 + + // Cut out the arch, leaving behind the package name and subsequent info. + _, ref, _ = bytes.Cut(ref, []byte{' '}) + + // Strip off the lib prefix, so that "libxyz" and "xyz", which are likely + // to be updated together, go in the same bucket. + libPrefix := []byte("lib") + if bytes.HasPrefix(ref, libPrefix) { + ref = ref[len(libPrefix):] + } + + prefixLen := min(maxPrefixLen, len(ref)) + prefix, _, _ := bytes.Cut(ref[:prefixLen], []byte{' '}) + return prefix +} + +func bucketIdxForRef(ref []byte) int { + return int(xxhash.Sum64(bucketRefPrefix(ref))) & reflistBucketMask +} + +// SplitRefList is a list of package refs, similar to a PackageRefList. However, +// instead of storing a linear array of refs, SplitRefList splits the refs into +// PackageRefList "buckets", based on a hash of the package name inside the ref. +// Each bucket has a digest of its contents that serves as its key in the database. +// +// When serialized, a SplitRefList just becomes an array of bucket digests, and +// the buckets themselves are stored separately. Because the buckets are then +// referenced by their digests, multiple independent reflists can share buckets, +// if their buckets have matching digests. +// +// Buckets themselves may not be confirmed to a single database value; instead, +// they're split into "segments", based on the database's preferred maximum +// value size. This prevents large buckets from slowing down the database. +type SplitRefList struct { + Buckets [][]byte + + bucketRefs []*PackageRefList +} + +// NewSplitRefList creates empty SplitRefList +func NewSplitRefList() *SplitRefList { + sl := &SplitRefList{} + sl.reset() + return sl +} + +// NewSplitRefListFromRefList creates SplitRefList from PackageRefList +func NewSplitRefListFromRefList(reflist *PackageRefList) *SplitRefList { + sl := NewSplitRefList() + sl.Replace(reflist) + return sl +} + +// NewSplitRefListFromRefList creates SplitRefList from PackageList +func NewSplitRefListFromPackageList(list *PackageList) *SplitRefList { + return NewSplitRefListFromRefList(NewPackageRefListFromPackageList(list)) +} + +func (sl *SplitRefList) reset() { + sl.Buckets = make([][]byte, reflistBucketCount) + sl.bucketRefs = make([]*PackageRefList, reflistBucketCount) +} + +// Has checks whether package is part of reflist +func (sl *SplitRefList) Has(p *Package) bool { + idx := bucketIdxForRef(p.Key("")) + if bucket := sl.bucketRefs[idx]; bucket != nil { + return bucket.Has(p) + } + return false +} + +// Len returns number of refs +func (sl *SplitRefList) Len() int { + total := 0 + for _, bucket := range sl.bucketRefs { + if bucket != nil { + total += bucket.Len() + } + } + return total +} + +func reflistDigest(l *PackageRefList) []byte { + // Different algorithms on PackageRefLists will sometimes return a nil slice + // of refs and other times return an empty slice. Regardless, they should + // both be treated identically and be given an empty digest. + if len(l.Refs) == 0 { + return nil + } + + h := sha256.New() + for _, ref := range l.Refs { + h.Write(ref) + h.Write([]byte{0}) + } + return h.Sum(nil) +} + +// Removes all the refs inside and replaces them with those in the given reflist +func (sl *SplitRefList) Replace(reflist *PackageRefList) { + sl.reset() + + for _, ref := range reflist.Refs { + idx := bucketIdxForRef(ref) + + bucket := sl.bucketRefs[idx] + if bucket == nil { + bucket = NewPackageRefList() + sl.bucketRefs[idx] = bucket + } + + bucket.Refs = append(bucket.Refs, ref) + } + + for idx, bucket := range sl.bucketRefs { + if bucket != nil { + sort.Sort(bucket) + sl.Buckets[idx] = reflistDigest(bucket) + } + } +} + +// Merge merges reflist r into current reflist (see PackageRefList.Merge) +func (sl *SplitRefList) Merge(r *SplitRefList, overrideMatching, ignoreConflicting bool) (result *SplitRefList) { + result = NewSplitRefList() + + var empty PackageRefList + for idx, lbucket := range sl.bucketRefs { + rbucket := r.bucketRefs[idx] + if lbucket == nil && rbucket == nil { + continue + } + + if lbucket == nil { + lbucket = &empty + } else if rbucket == nil { + rbucket = &empty + } + + result.bucketRefs[idx] = lbucket.Merge(rbucket, overrideMatching, ignoreConflicting) + result.Buckets[idx] = reflistDigest(result.bucketRefs[idx]) + } + + return +} + +// Subtract returns all packages in l that are not in r +func (sl *SplitRefList) Subtract(r *SplitRefList) (result *SplitRefList) { + result = NewSplitRefList() + + for idx, lbucket := range sl.bucketRefs { + rbucket := r.bucketRefs[idx] + if lbucket != nil { + if rbucket != nil { + result.bucketRefs[idx] = lbucket.Subtract(rbucket) + result.Buckets[idx] = reflistDigest(result.bucketRefs[idx]) + } else { + result.bucketRefs[idx] = lbucket.Clone() + result.Buckets[idx] = sl.Buckets[idx] + } + } + } + + return +} + +// Diff calculates difference between two reflists +func (sl *SplitRefList) Diff(r *SplitRefList, packageCollection *PackageCollection, result PackageDiffs) (PackageDiffs, error) { + var err error + + if result == nil { + result = make(PackageDiffs, 0, 128) + } + + var empty PackageRefList + for idx, lbucket := range sl.bucketRefs { + rbucket := r.bucketRefs[idx] + if lbucket != nil { + if rbucket != nil { + result, err = lbucket.Diff(rbucket, packageCollection, result) + } else { + result, err = lbucket.Diff(&empty, packageCollection, result) + } + } else if rbucket != nil { + result, err = empty.Diff(rbucket, packageCollection, result) + } + + if err != nil { + return nil, err + } + } + + sort.Slice(result, func(i, j int) bool { + var ri, rj []byte + if result[i].Left != nil { + ri = result[i].Left.Key("") + } else { + ri = result[i].Right.Key("") + } + if result[j].Left != nil { + rj = result[j].Left.Key("") + } else { + rj = result[j].Right.Key("") + } + + return bytes.Compare(ri, rj) < 0 + }) + + return result, nil +} + +// FilterLatestRefs reduces a reflist to the latest of each package (see PackageRefList.FilterLatestRefs) +func (sl *SplitRefList) FilterLatestRefs() { + for idx, bucket := range sl.bucketRefs { + if bucket != nil { + bucket.FilterLatestRefs() + sl.Buckets[idx] = reflistDigest(bucket) + } + } +} + +// Flatten creates a flat PackageRefList containing all the refs in this reflist +func (sl *SplitRefList) Flatten() *PackageRefList { + reflist := NewPackageRefList() + sl.ForEach(func(ref []byte) error { + reflist.Refs = append(reflist.Refs, ref) + return nil + }) + sort.Sort(reflist) + return reflist +} + +// ForEachBucket calls handler for each bucket in list +func (sl *SplitRefList) ForEachBucket(handler func(digest []byte, bucket *PackageRefList) error) error { + for idx, digest := range sl.Buckets { + if len(digest) == 0 { + continue + } + + bucket := sl.bucketRefs[idx] + if bucket != nil { + if err := handler(digest, bucket); err != nil { + return err + } + } + } + + return nil +} + +// ForEach calls handler for each package ref in list +// +// IMPORTANT: unlike PackageRefList.ForEach, the order of handler invocations +// is *not* guaranteed to be sorted. +func (sl *SplitRefList) ForEach(handler func([]byte) error) error { + for idx, digest := range sl.Buckets { + if len(digest) == 0 { + continue + } + + bucket := sl.bucketRefs[idx] + if bucket != nil { + if err := bucket.ForEach(handler); err != nil { + return err + } + } + } + + return nil +} + +// RefListDigestSet is a set of SplitRefList bucket digests +type RefListDigestSet struct { + items map[reflistDigestArray]struct{} +} + +// NewRefListDigestSet creates empty RefListDigestSet +func NewRefListDigestSet() *RefListDigestSet { + return &RefListDigestSet{items: map[reflistDigestArray]struct{}{}} +} + +// Len returns number of digests in the set +func (set *RefListDigestSet) Len() int { + return len(set.items) +} + +// ForEach calls handler for each digest in the set +func (set *RefListDigestSet) ForEach(handler func(digest []byte) error) error { + for digest := range set.items { + if err := handler(digest[:]); err != nil { + return err + } + } + + return nil +} + +// Add adds digest to set, doing nothing if the digest was already present +func (set *RefListDigestSet) Add(digest []byte) { + set.items[reflistDigestArray(digest)] = struct{}{} +} + +// AddAllInRefList adds all the bucket digests in a SplitRefList to the set +func (set *RefListDigestSet) AddAllInRefList(sl *SplitRefList) { + for _, digest := range sl.Buckets { + if len(digest) > 0 { + set.Add(digest) + } + } +} + +// Has checks whether a digest is part of set +func (set *RefListDigestSet) Has(digest []byte) bool { + _, ok := set.items[reflistDigestArray(digest)] + return ok +} + +// Remove removes a digest from set +func (set *RefListDigestSet) Remove(digest []byte) { + delete(set.items, reflistDigestArray(digest)) +} + +// RemoveAll removes all the digests in other from the current set +func (set *RefListDigestSet) RemoveAll(other *RefListDigestSet) { + for digest := range other.items { + delete(set.items, digest) + } +} + +// RefListCollection does listing, updating/adding/deleting of SplitRefLists +type RefListCollection struct { + db database.Storage + + cache map[reflistDigestArray]*PackageRefList +} + +// NewRefListCollection creates a RefListCollection +func NewRefListCollection(db database.Storage) *RefListCollection { + return &RefListCollection{db: db, cache: make(map[reflistDigestArray]*PackageRefList)} +} + +type reflistStorageFormat int + +const ( + // (legacy format) all the refs are stored inline in a single value + reflistStorageFormatInline reflistStorageFormat = iota + // the refs are split into buckets that are stored externally from the value + reflistStorageFormatSplit +) + +// NoPadding is used because all digests are the same length, so the padding +// is useless and only serves to muddy the output. +var bucketDigestEncoding = base64.StdEncoding.WithPadding(base64.NoPadding) + +func segmentPrefix(encodedDigest string) []byte { + return []byte(fmt.Sprintf("F%s-", encodedDigest)) +} + +func segmentIndexKey(prefix []byte, idx int) []byte { + // Assume most buckets won't have more than 0xFFFF = ~65k segments (which + // would be an extremely large bucket!). + return append(bytes.Clone(prefix), []byte(fmt.Sprintf("%04x", idx))...) +} + +// AllBucketDigests returns a set of all the bucket digests in the database +func (collection *RefListCollection) AllBucketDigests() (*RefListDigestSet, error) { + digests := NewRefListDigestSet() + + err := collection.db.ProcessByPrefix([]byte("F"), func(key []byte, value []byte) error { + if !bytes.HasSuffix(key, []byte("-0000")) { + // Ignore additional segments for the same digest. + return nil + } + + encodedDigest, _, foundDash := bytes.Cut(key[1:], []byte("-")) + if !foundDash { + return fmt.Errorf("invalid key: %s", string(key)) + } + digest := make([]byte, bucketDigestEncoding.DecodedLen(len(encodedDigest))) + if _, err := bucketDigestEncoding.Decode(digest, encodedDigest); err != nil { + return fmt.Errorf("decoding key %s: %w", string(key), err) + } + + digests.Add(digest) + return nil + }) + + if err != nil { + return nil, err + } + return digests, nil +} + +// UnsafeDropBucket drops the bucket associated with digest from the database, +// doing so inside batch +// +// This is considered "unsafe" because no checks are performed to ensure that +// the bucket is no longer referenced by any saved reflists. +func (collection *RefListCollection) UnsafeDropBucket(digest []byte, batch database.Batch) error { + prefix := segmentPrefix(bucketDigestEncoding.EncodeToString(digest)) + return collection.db.ProcessByPrefix(prefix, func(key []byte, value []byte) error { + return batch.Delete(key) + }) +} + +func (collection *RefListCollection) load(sl *SplitRefList, key []byte) (reflistStorageFormat, error) { + sl.reset() + + data, err := collection.db.Get(key) + if err != nil { + return 0, err + } + + var splitOrInlineRefList struct { + *SplitRefList + *PackageRefList + } + handle := &codec.MsgpackHandle{} + handle.ZeroCopy = true + decoder := codec.NewDecoderBytes(data, handle) + if err := decoder.Decode(&splitOrInlineRefList); err != nil { + return 0, err + } + + if splitOrInlineRefList.SplitRefList != nil { + sl.Buckets = splitOrInlineRefList.Buckets + } else if splitOrInlineRefList.PackageRefList != nil { + sl.Replace(splitOrInlineRefList.PackageRefList) + return reflistStorageFormatInline, nil + } + + return reflistStorageFormatSplit, nil +} + +func (collection *RefListCollection) loadBuckets(sl *SplitRefList) error { + for idx := range sl.Buckets { + if sl.bucketRefs[idx] != nil { + continue + } + + var bucket *PackageRefList + + if digest := sl.Buckets[idx]; len(digest) > 0 { + cacheKey := reflistDigestArray(digest) + bucket = collection.cache[cacheKey] + if bucket == nil { + bucket = NewPackageRefList() + prefix := segmentPrefix(bucketDigestEncoding.EncodeToString(digest)) + err := collection.db.ProcessByPrefix(prefix, func(digest []byte, value []byte) error { + var l PackageRefList + if err := l.Decode(append([]byte{}, value...)); err != nil { + return err + } + + bucket.Refs = append(bucket.Refs, l.Refs...) + return nil + }) + + if err != nil { + return err + } + + // The segments may not have been iterated in order, so make sure to re-sort + // here. + sort.Sort(bucket) + collection.cache[cacheKey] = bucket + } + + actualDigest := reflistDigest(bucket) + if !bytes.Equal(actualDigest, digest) { + return fmt.Errorf("corrupt reflist bucket %d: expected digest %s, got %s", + idx, + bucketDigestEncoding.EncodeToString(digest), + bucketDigestEncoding.EncodeToString(actualDigest)) + } + } + + sl.bucketRefs[idx] = bucket + } + + return nil +} + +// LoadComplete loads the reflist stored at the given key, as well as all the +// buckets referenced by a split reflist +func (collection *RefListCollection) LoadComplete(sl *SplitRefList, key []byte) error { + if _, err := collection.load(sl, key); err != nil { + return err + } + + return collection.loadBuckets(sl) +} + +// RefListBatch is a wrapper over a database.Batch that tracks already-written +// reflists to avoid writing them multiple times +// +// It is *not* safe to use the same underlying database.Batch that has already +// been given to UnsafeDropBucket. +type RefListBatch struct { + batch database.Batch + + alreadyWritten *RefListDigestSet +} + +// NewBatch creates a new RefListBatch wrapping the given database.Batch +func (collection *RefListCollection) NewBatch(batch database.Batch) *RefListBatch { + return &RefListBatch{ + batch: batch, + alreadyWritten: NewRefListDigestSet(), + } +} + +type reflistUpdateContext struct { + rb *RefListBatch + stats *RefListMigrationStats +} + +func clearSegmentRefs(reflist *PackageRefList, recommendedMaxKVSize int) { + avgRefsInSegment := recommendedMaxKVSize / 70 + reflist.Refs = make([][]byte, 0, avgRefsInSegment) +} + +func flushSegmentRefs(uctx *reflistUpdateContext, prefix []byte, segment int, reflist *PackageRefList) error { + encoded := reflist.Encode() + err := uctx.rb.batch.Put(segmentIndexKey(prefix, segment), encoded) + if err == nil && uctx.stats != nil { + uctx.stats.Segments++ + } + return err +} + +func (collection *RefListCollection) updateWithContext(sl *SplitRefList, key []byte, uctx *reflistUpdateContext) error { + if sl != nil { + recommendedMaxKVSize := collection.db.GetRecommendedMaxKVSize() + + for idx, digest := range sl.Buckets { + if len(digest) == 0 { + continue + } + + if uctx.rb.alreadyWritten.Has(digest) { + continue + } + + prefix := segmentPrefix(bucketDigestEncoding.EncodeToString(digest)) + if collection.db.HasPrefix(prefix) { + continue + } + + // All the sizing information taken from the msgpack spec: + // https://github.com/msgpack/msgpack/blob/master/spec.md + + // Assume that a segment will have [16,2^16) elements, which would + // fit into an array 16 and thus have 3 bytes of overhead. + // (A database would need a massive recommendedMaxKVSize to pass + // that limit.) + size := len(segmentIndexKey(prefix, 0)) + 3 + segment := 0 + + var reflist PackageRefList + clearSegmentRefs(&reflist, recommendedMaxKVSize) + for _, ref := range sl.bucketRefs[idx].Refs { + // In order to determine the size of the ref in the database, + // we need to know how much overhead will be added with by msgpack + // encoding. + requiredSize := len(ref) + if requiredSize < 1<<5 { + requiredSize++ + } else if requiredSize < 1<<8 { + requiredSize += 2 + } else if requiredSize < 1<<16 { + requiredSize += 3 + } else { + requiredSize += 4 + } + if size+requiredSize > recommendedMaxKVSize { + if err := flushSegmentRefs(uctx, prefix, segment, &reflist); err != nil { + return err + } + clearSegmentRefs(&reflist, recommendedMaxKVSize) + segment++ + } + + reflist.Refs = append(reflist.Refs, ref) + size += requiredSize + } + + if len(reflist.Refs) > 0 { + if err := flushSegmentRefs(uctx, prefix, segment, &reflist); err != nil { + return err + } + } + + uctx.rb.alreadyWritten.Add(digest) + if uctx.stats != nil { + uctx.stats.Buckets++ + } + } + } + + var buf bytes.Buffer + encoder := codec.NewEncoder(&buf, &codec.MsgpackHandle{}) + encoder.Encode(sl) + err := uctx.rb.batch.Put(key, buf.Bytes()) + if err == nil && uctx.stats != nil { + uctx.stats.Reflists++ + } + return err +} + +// UpdateInBatch will save or update the SplitRefList at key, as well as save the buckets inside, +// as part of the given batch +func (collection *RefListCollection) UpdateInBatch(sl *SplitRefList, key []byte, batch *RefListBatch) error { + return collection.updateWithContext(sl, key, &reflistUpdateContext{rb: batch}) +} + +// Update will save or update the SplitRefList at key, as well as save the buckets inside +func (collection *RefListCollection) Update(sl *SplitRefList, key []byte) error { + rb := collection.NewBatch(collection.db.CreateBatch()) + err := collection.UpdateInBatch(sl, key, rb) + if err == nil { + err = rb.batch.Write() + } + return err +} + +// RefListMigrationStats counts a number of reflists, buckets, and segments +type RefListMigrationStats struct { + Reflists, Buckets, Segments int +} + +// RefListMigration wraps a RefListBatch for the purpose of migrating inline format +// reflists to split reflists +// +// Once the batch gets too large, it will automatically be flushed to the database, +// and a new batch will be created in its place. +type RefListMigration struct { + rb *RefListBatch + + dryRun bool + + // current number of reflists/buckets/segments queued in the current, unwritten batch + batchStats RefListMigrationStats + flushStats RefListMigrationStats +} + +// NewMigration creates an empty RefListMigration +func (collection *RefListCollection) NewMigration() *RefListMigration { + return &RefListMigration{} +} + +// NewMigrationDryRun creates an empty RefListMigration that will track the +// changes to make as usual but avoid actually writing to the db +func (collection *RefListCollection) NewMigrationDryRun() *RefListMigration { + return &RefListMigration{dryRun: true} +} + +// Stats returns statistics on the written values in the current migration +func (migration *RefListMigration) Stats() RefListMigrationStats { + return migration.flushStats +} + +// Flush will flush the current batch in the migration to the database +func (migration *RefListMigration) Flush() error { + if migration.batchStats.Segments > 0 { + if !migration.dryRun { + if err := migration.rb.batch.Write(); err != nil { + return err + } + + // It's important that we don't clear the batch on dry runs, because + // the batch is what contains the list of already-written buckets. + // If we're not writing to the database, and we clear that list, + // duplicate "writes" will occur. + migration.rb = nil + } + + migration.flushStats.Reflists += migration.batchStats.Reflists + migration.flushStats.Buckets += migration.batchStats.Buckets + migration.flushStats.Segments += migration.batchStats.Segments + migration.batchStats = RefListMigrationStats{} + } + + return nil +} + +// LoadCompleteAndMigrate will load the reflist and its buckets as RefListCollection.LoadComplete, +// migrating any inline reflists to split ones along the way +func (collection *RefListCollection) LoadCompleteAndMigrate(sl *SplitRefList, key []byte, migration *RefListMigration) error { + // Given enough reflists, the memory used by a batch starts to become massive, so + // make sure to flush the written segments periodically. Note that this is only + // checked *after* a migration of a full bucket (and all the segments inside) + // takes place, as splitting a single bucket write into multiple batches would + // be unsafe if an interruption occurs midway. + const maxMigratorBatch = 50000 + + format, err := collection.load(sl, key) + if err != nil { + return err + } + + switch format { + case reflistStorageFormatInline: + if migration.rb == nil { + migration.rb = collection.NewBatch(collection.db.CreateBatch()) + } + + collection.updateWithContext(sl, key, &reflistUpdateContext{ + rb: migration.rb, + stats: &migration.batchStats, + }) + + if migration.batchStats.Segments > maxMigratorBatch { + if err := migration.Flush(); err != nil { + return err + } + } + + return nil + case reflistStorageFormatSplit: + return collection.loadBuckets(sl) + default: + panic(fmt.Sprintf("unexpected format %v", format)) + } +} + +// AnyRefList is implemented by both PackageRefList and SplitRefList +type AnyRefList interface { + Has(p *Package) bool + Len() int + ForEach(handler func([]byte) error) error + FilterLatestRefs() +} + +// Check interface +var ( + _ AnyRefList = (*PackageRefList)(nil) + _ AnyRefList = (*SplitRefList)(nil) +) diff --git a/deb/reflist_bench_test.go b/deb/reflist_bench_test.go index b377574ce..f81a84d0a 100644 --- a/deb/reflist_bench_test.go +++ b/deb/reflist_bench_test.go @@ -45,3 +45,41 @@ func BenchmarkReflistDecode(b *testing.B) { (&PackageRefList{}).Decode(data) } } + +func BenchmarkSplitRefListCreationSmall(b *testing.B) { + const count = 400 + + l := NewPackageRefList() + + for i := 0; i < count; i++ { + l.Refs = append(l.Refs, []byte(fmt.Sprintf("Pamd64 %x %d", i, i))) + } + + sort.Sort(l) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + for j := 0; j < 8; j++ { + NewSplitRefListFromRefList(l) + } + } +} + +func BenchmarkSplitRefListCreationLarge(b *testing.B) { + const count = 4096 + + l := NewPackageRefList() + + for i := 0; i < count; i++ { + l.Refs = append(l.Refs, []byte(fmt.Sprintf("Pamd64 %x %d", i, i))) + } + + sort.Sort(l) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + for j := 0; j < 8; j++ { + NewSplitRefListFromRefList(l) + } + } +} diff --git a/deb/reflist_test.go b/deb/reflist_test.go index bcabec3c2..058216848 100644 --- a/deb/reflist_test.go +++ b/deb/reflist_test.go @@ -1,7 +1,10 @@ package deb import ( + "bytes" + "encoding/hex" "errors" + "fmt" "github.com/aptly-dev/aptly/database/goleveldb" @@ -9,24 +12,83 @@ import ( ) type PackageRefListSuite struct { - // Simple list with "real" packages from stanzas - list *PackageList p1, p2, p3, p4, p5, p6 *Package } var _ = Suite(&PackageRefListSuite{}) -func toStrSlice(reflist *PackageRefList) (result []string) { +func verifyRefListIntegrity(c *C, rl AnyRefList) AnyRefList { + if rl, ok := rl.(*SplitRefList); ok { + for idx, bucket := range rl.bucketRefs { + if bucket == nil { + bucket = NewPackageRefList() + } + c.Check(rl.Buckets[idx], DeepEquals, reflistDigest(bucket)) + } + } + + return rl +} + +func getRefs(rl AnyRefList) (refs [][]byte) { + switch rl := rl.(type) { + case *PackageRefList: + refs = rl.Refs + case *SplitRefList: + refs = rl.Flatten().Refs + default: + panic(fmt.Sprintf("unexpected reflist type %t", rl)) + } + + // Hack so that passing getRefs-returned slices to DeepEquals won't fail given a nil + // slice and an empty slice. + if len(refs) == 0 { + refs = nil + } + return +} + +func toStrSlice(reflist AnyRefList) (result []string) { result = make([]string, reflist.Len()) - for i, r := range reflist.Refs { + for i, r := range getRefs(reflist) { result[i] = string(r) } return } -func (s *PackageRefListSuite) SetUpTest(c *C) { - s.list = NewPackageList() +type reflistFactory struct { + new func() AnyRefList + newFromRefs func(refs ...[]byte) AnyRefList + newFromPackageList func(list *PackageList) AnyRefList +} + +func forEachRefList(test func(f reflistFactory)) { + test(reflistFactory{ + new: func() AnyRefList { + return NewPackageRefList() + }, + newFromRefs: func(refs ...[]byte) AnyRefList { + return &PackageRefList{Refs: refs} + }, + newFromPackageList: func(list *PackageList) AnyRefList { + return NewPackageRefListFromPackageList(list) + }, + }) + test(reflistFactory{ + new: func() AnyRefList { + return NewSplitRefList() + }, + newFromRefs: func(refs ...[]byte) AnyRefList { + return NewSplitRefListFromRefList(&PackageRefList{Refs: refs}) + }, + newFromPackageList: func(list *PackageList) AnyRefList { + return NewSplitRefListFromPackageList(list) + }, + }) +} + +func (s *PackageRefListSuite) SetUpTest(c *C) { s.p1 = NewPackageFromControlFile(packageStanza.Copy()) s.p2 = NewPackageFromControlFile(packageStanza.Copy()) stanza := packageStanza.Copy() @@ -44,346 +106,600 @@ func (s *PackageRefListSuite) SetUpTest(c *C) { } func (s *PackageRefListSuite) TestNewPackageListFromRefList(c *C) { - db, _ := goleveldb.NewOpenDB(c.MkDir()) - coll := NewPackageCollection(db) - coll.Update(s.p1) - coll.Update(s.p3) + forEachRefList(func(f reflistFactory) { + list := NewPackageList() - s.list.Add(s.p1) - s.list.Add(s.p3) - s.list.Add(s.p5) - s.list.Add(s.p6) + db, _ := goleveldb.NewOpenDB(c.MkDir()) + coll := NewPackageCollection(db) + coll.Update(s.p1) + coll.Update(s.p3) - reflist := NewPackageRefListFromPackageList(s.list) + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p5) + list.Add(s.p6) - _, err := NewPackageListFromRefList(reflist, coll, nil) - c.Assert(err, ErrorMatches, "unable to load package with key.*") + reflist := f.newFromPackageList(list) - coll.Update(s.p5) - coll.Update(s.p6) + _, err := NewPackageListFromRefList(reflist, coll, nil) + c.Assert(err, ErrorMatches, "unable to load package with key.*") - list, err := NewPackageListFromRefList(reflist, coll, nil) - c.Assert(err, IsNil) - c.Check(list.Len(), Equals, 4) - c.Check(list.Add(s.p4), ErrorMatches, "package already exists and is different: .*") + coll.Update(s.p5) + coll.Update(s.p6) - list, err = NewPackageListFromRefList(nil, coll, nil) - c.Assert(err, IsNil) - c.Check(list.Len(), Equals, 0) + list, err = NewPackageListFromRefList(reflist, coll, nil) + c.Assert(err, IsNil) + c.Check(list.Len(), Equals, 4) + c.Check(list.Add(s.p4), ErrorMatches, "package already exists and is different: .*") + + list, err = NewPackageListFromRefList(nil, coll, nil) + c.Assert(err, IsNil) + c.Check(list.Len(), Equals, 0) + }) } func (s *PackageRefListSuite) TestNewPackageRefList(c *C) { - s.list.Add(s.p1) - s.list.Add(s.p3) - s.list.Add(s.p5) - s.list.Add(s.p6) - - reflist := NewPackageRefListFromPackageList(s.list) - c.Assert(reflist.Len(), Equals, 4) - c.Check(reflist.Refs[0], DeepEquals, []byte(s.p1.Key(""))) - c.Check(reflist.Refs[1], DeepEquals, []byte(s.p6.Key(""))) - c.Check(reflist.Refs[2], DeepEquals, []byte(s.p5.Key(""))) - c.Check(reflist.Refs[3], DeepEquals, []byte(s.p3.Key(""))) - - reflist = NewPackageRefList() - c.Check(reflist.Len(), Equals, 0) + forEachRefList(func(f reflistFactory) { + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p5) + list.Add(s.p6) + + reflist := f.newFromPackageList(list) + verifyRefListIntegrity(c, reflist) + c.Assert(reflist.Len(), Equals, 4) + refs := getRefs(reflist) + c.Check(refs[0], DeepEquals, []byte(s.p1.Key(""))) + c.Check(refs[1], DeepEquals, []byte(s.p6.Key(""))) + c.Check(refs[2], DeepEquals, []byte(s.p5.Key(""))) + c.Check(refs[3], DeepEquals, []byte(s.p3.Key(""))) + + reflist = f.new() + c.Check(reflist.Len(), Equals, 0) + }) } -func (s *PackageRefListSuite) TestPackageRefListEncodeDecode(c *C) { - s.list.Add(s.p1) - s.list.Add(s.p3) - s.list.Add(s.p5) - s.list.Add(s.p6) +func (s *PackageRefListSuite) TestPackageRefListForeach(c *C) { + forEachRefList(func(f reflistFactory) { + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p5) + list.Add(s.p6) - reflist := NewPackageRefListFromPackageList(s.list) + reflist := f.newFromPackageList(list) - reflist2 := &PackageRefList{} - err := reflist2.Decode(reflist.Encode()) - c.Assert(err, IsNil) - c.Check(reflist2.Len(), Equals, reflist.Len()) - c.Check(reflist2.Refs, DeepEquals, reflist.Refs) -} + Len := 0 + err := reflist.ForEach(func([]byte) error { + Len++ + return nil + }) -func (s *PackageRefListSuite) TestPackageRefListForeach(c *C) { - s.list.Add(s.p1) - s.list.Add(s.p3) - s.list.Add(s.p5) - s.list.Add(s.p6) + c.Check(Len, Equals, 4) + c.Check(err, IsNil) - reflist := NewPackageRefListFromPackageList(s.list) + e := errors.New("b") - Len := 0 - err := reflist.ForEach(func([]byte) error { - Len++ - return nil - }) - - c.Check(Len, Equals, 4) - c.Check(err, IsNil) + err = reflist.ForEach(func([]byte) error { + return e + }) - e := errors.New("b") + c.Check(err, Equals, e) + }) +} - err = reflist.ForEach(func([]byte) error { - return e +func (s *PackageRefListSuite) TestHas(c *C) { + forEachRefList(func(f reflistFactory) { + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p5) + reflist := f.newFromPackageList(list) + + c.Check(reflist.Has(s.p1), Equals, true) + c.Check(reflist.Has(s.p3), Equals, true) + c.Check(reflist.Has(s.p5), Equals, true) + c.Check(reflist.Has(s.p2), Equals, true) + c.Check(reflist.Has(s.p6), Equals, false) }) +} - c.Check(err, Equals, e) +func subtractRefLists(l, r AnyRefList) AnyRefList { + switch l := l.(type) { + case *PackageRefList: + return l.Subtract(r.(*PackageRefList)) + case *SplitRefList: + return l.Subtract(r.(*SplitRefList)) + default: + panic(fmt.Sprintf("unexpected reflist type %t", l)) + } } -func (s *PackageRefListSuite) TestHas(c *C) { - s.list.Add(s.p1) - s.list.Add(s.p3) - s.list.Add(s.p5) - reflist := NewPackageRefListFromPackageList(s.list) - - c.Check(reflist.Has(s.p1), Equals, true) - c.Check(reflist.Has(s.p3), Equals, true) - c.Check(reflist.Has(s.p5), Equals, true) - c.Check(reflist.Has(s.p2), Equals, true) - c.Check(reflist.Has(s.p6), Equals, false) +func (s *PackageRefListSuite) TestSubtract(c *C) { + forEachRefList(func(f reflistFactory) { + r1 := []byte("Pall r1") + r2 := []byte("Pall r2") + r3 := []byte("Pall r3") + r4 := []byte("Pall r4") + r5 := []byte("Pall r5") + + empty := f.newFromRefs() + l1 := f.newFromRefs(r1, r2, r3, r4) + l2 := f.newFromRefs(r1, r3) + l3 := f.newFromRefs(r2, r4) + l4 := f.newFromRefs(r4, r5) + l5 := f.newFromRefs(r1, r2, r3) + + c.Check(getRefs(subtractRefLists(l1, empty)), DeepEquals, getRefs(l1)) + c.Check(getRefs(subtractRefLists(l1, l2)), DeepEquals, getRefs(l3)) + c.Check(getRefs(subtractRefLists(l1, l3)), DeepEquals, getRefs(l2)) + c.Check(getRefs(subtractRefLists(l1, l4)), DeepEquals, getRefs(l5)) + c.Check(getRefs(subtractRefLists(empty, l1)), DeepEquals, getRefs(empty)) + c.Check(getRefs(subtractRefLists(l2, l3)), DeepEquals, getRefs(l2)) + }) } -func (s *PackageRefListSuite) TestSubstract(c *C) { - r1 := []byte("r1") - r2 := []byte("r2") - r3 := []byte("r3") - r4 := []byte("r4") - r5 := []byte("r5") - - empty := &PackageRefList{Refs: [][]byte{}} - l1 := &PackageRefList{Refs: [][]byte{r1, r2, r3, r4}} - l2 := &PackageRefList{Refs: [][]byte{r1, r3}} - l3 := &PackageRefList{Refs: [][]byte{r2, r4}} - l4 := &PackageRefList{Refs: [][]byte{r4, r5}} - l5 := &PackageRefList{Refs: [][]byte{r1, r2, r3}} - - c.Check(l1.Subtract(empty), DeepEquals, l1) - c.Check(l1.Subtract(l2), DeepEquals, l3) - c.Check(l1.Subtract(l3), DeepEquals, l2) - c.Check(l1.Subtract(l4), DeepEquals, l5) - c.Check(empty.Subtract(l1), DeepEquals, empty) - c.Check(l2.Subtract(l3), DeepEquals, l2) +func diffRefLists(l, r AnyRefList, packageCollection *PackageCollection) (PackageDiffs, error) { + switch l := l.(type) { + case *PackageRefList: + return l.Diff(r.(*PackageRefList), packageCollection, nil) + case *SplitRefList: + return l.Diff(r.(*SplitRefList), packageCollection, nil) + default: + panic(fmt.Sprintf("unexpected reflist type %t", l)) + } } func (s *PackageRefListSuite) TestDiff(c *C) { - db, _ := goleveldb.NewOpenDB(c.MkDir()) - coll := NewPackageCollection(db) - - packages := []*Package{ - {Name: "lib", Version: "1.0", Architecture: "i386"}, //0 - {Name: "dpkg", Version: "1.7", Architecture: "i386"}, //1 - {Name: "data", Version: "1.1~bp1", Architecture: "all"}, //2 - {Name: "app", Version: "1.1~bp1", Architecture: "i386"}, //3 - {Name: "app", Version: "1.1~bp2", Architecture: "i386"}, //4 - {Name: "app", Version: "1.1~bp2", Architecture: "amd64"}, //5 - {Name: "xyz", Version: "3.0", Architecture: "sparc"}, //6 - } + forEachRefList(func(f reflistFactory) { + db, _ := goleveldb.NewOpenDB(c.MkDir()) + coll := NewPackageCollection(db) + + packages := []*Package{ + {Name: "lib", Version: "1.0", Architecture: "i386"}, //0 + {Name: "dpkg", Version: "1.7", Architecture: "i386"}, //1 + {Name: "data", Version: "1.1~bp1", Architecture: "all"}, //2 + {Name: "app", Version: "1.1~bp1", Architecture: "i386"}, //3 + {Name: "app", Version: "1.1~bp2", Architecture: "i386"}, //4 + {Name: "app", Version: "1.1~bp2", Architecture: "amd64"}, //5 + {Name: "xyz", Version: "3.0", Architecture: "sparc"}, //6 + } + + for _, p := range packages { + coll.Update(p) + } + + listA := NewPackageList() + listA.Add(packages[0]) + listA.Add(packages[1]) + listA.Add(packages[2]) + listA.Add(packages[3]) + listA.Add(packages[6]) + + listB := NewPackageList() + listB.Add(packages[0]) + listB.Add(packages[2]) + listB.Add(packages[4]) + listB.Add(packages[5]) + + reflistA := f.newFromPackageList(listA) + reflistB := f.newFromPackageList(listB) + + diffAA, err := diffRefLists(reflistA, reflistA, coll) + c.Check(err, IsNil) + c.Check(diffAA, HasLen, 0) + + diffAB, err := diffRefLists(reflistA, reflistB, coll) + c.Check(err, IsNil) + c.Check(diffAB, HasLen, 4) + + c.Check(diffAB[0].Left, IsNil) + c.Check(diffAB[0].Right.String(), Equals, "app_1.1~bp2_amd64") + + c.Check(diffAB[1].Left.String(), Equals, "app_1.1~bp1_i386") + c.Check(diffAB[1].Right.String(), Equals, "app_1.1~bp2_i386") + + c.Check(diffAB[2].Left.String(), Equals, "dpkg_1.7_i386") + c.Check(diffAB[2].Right, IsNil) + + c.Check(diffAB[3].Left.String(), Equals, "xyz_3.0_sparc") + c.Check(diffAB[3].Right, IsNil) + + diffBA, err := diffRefLists(reflistB, reflistA, coll) + c.Check(err, IsNil) + c.Check(diffBA, HasLen, 4) + + c.Check(diffBA[0].Right, IsNil) + c.Check(diffBA[0].Left.String(), Equals, "app_1.1~bp2_amd64") + + c.Check(diffBA[1].Right.String(), Equals, "app_1.1~bp1_i386") + c.Check(diffBA[1].Left.String(), Equals, "app_1.1~bp2_i386") + + c.Check(diffBA[2].Right.String(), Equals, "dpkg_1.7_i386") + c.Check(diffBA[2].Left, IsNil) + + c.Check(diffBA[3].Right.String(), Equals, "xyz_3.0_sparc") + c.Check(diffBA[3].Left, IsNil) + }) +} - for _, p := range packages { - coll.Update(p) - } +func (s *PackageRefListSuite) TestDiffCompactsAtEnd(c *C) { + forEachRefList(func(f reflistFactory) { + db, _ := goleveldb.NewOpenDB(c.MkDir()) + coll := NewPackageCollection(db) - listA := NewPackageList() - listA.Add(packages[0]) - listA.Add(packages[1]) - listA.Add(packages[2]) - listA.Add(packages[3]) - listA.Add(packages[6]) + packages := []*Package{ + {Name: "app", Version: "1.1~bp1", Architecture: "i386"}, //0 + {Name: "app", Version: "1.1~bp2", Architecture: "i386"}, //1 + {Name: "app", Version: "1.1~bp2", Architecture: "amd64"}, //2 + } - listB := NewPackageList() - listB.Add(packages[0]) - listB.Add(packages[2]) - listB.Add(packages[4]) - listB.Add(packages[5]) + for _, p := range packages { + coll.Update(p) + } - reflistA := NewPackageRefListFromPackageList(listA) - reflistB := NewPackageRefListFromPackageList(listB) + listA := NewPackageList() + listA.Add(packages[0]) - diffAA, err := reflistA.Diff(reflistA, coll) - c.Check(err, IsNil) - c.Check(diffAA, HasLen, 0) + listB := NewPackageList() + listB.Add(packages[1]) + listB.Add(packages[2]) - diffAB, err := reflistA.Diff(reflistB, coll) - c.Check(err, IsNil) - c.Check(diffAB, HasLen, 4) + reflistA := f.newFromPackageList(listA) + reflistB := f.newFromPackageList(listB) - c.Check(diffAB[0].Left, IsNil) - c.Check(diffAB[0].Right.String(), Equals, "app_1.1~bp2_amd64") + diffAB, err := diffRefLists(reflistA, reflistB, coll) + c.Check(err, IsNil) + c.Check(diffAB, HasLen, 2) + + c.Check(diffAB[0].Left, IsNil) + c.Check(diffAB[0].Right.String(), Equals, "app_1.1~bp2_amd64") + + c.Check(diffAB[1].Left.String(), Equals, "app_1.1~bp1_i386") + c.Check(diffAB[1].Right.String(), Equals, "app_1.1~bp2_i386") + }) +} - c.Check(diffAB[1].Left.String(), Equals, "app_1.1~bp1_i386") - c.Check(diffAB[1].Right.String(), Equals, "app_1.1~bp2_i386") +func mergeRefLists(l, r AnyRefList, overrideMatching, ignoreConflicting bool) AnyRefList { + switch l := l.(type) { + case *PackageRefList: + return l.Merge(r.(*PackageRefList), overrideMatching, ignoreConflicting) + case *SplitRefList: + return l.Merge(r.(*SplitRefList), overrideMatching, ignoreConflicting) + default: + panic(fmt.Sprintf("unexpected reflist type %t", l)) + } +} - c.Check(diffAB[2].Left.String(), Equals, "dpkg_1.7_i386") - c.Check(diffAB[2].Right, IsNil) +func (s *PackageRefListSuite) TestMerge(c *C) { + forEachRefList(func(f reflistFactory) { + db, _ := goleveldb.NewOpenDB(c.MkDir()) + coll := NewPackageCollection(db) + + packages := []*Package{ + {Name: "lib", Version: "1.0", Architecture: "i386"}, //0 + {Name: "dpkg", Version: "1.7", Architecture: "i386"}, //1 + {Name: "data", Version: "1.1~bp1", Architecture: "all"}, //2 + {Name: "app", Version: "1.1~bp1", Architecture: "i386"}, //3 + {Name: "app", Version: "1.1~bp2", Architecture: "i386"}, //4 + {Name: "app", Version: "1.1~bp2", Architecture: "amd64"}, //5 + {Name: "dpkg", Version: "1.0", Architecture: "i386"}, //6 + {Name: "xyz", Version: "1.0", Architecture: "sparc"}, //7 + {Name: "dpkg", Version: "1.0", Architecture: "i386", FilesHash: 0x34445}, //8 + {Name: "app", Version: "1.1~bp2", Architecture: "i386", FilesHash: 0x44}, //9 + } + + for _, p := range packages { + p.V06Plus = true + coll.Update(p) + } + + listA := NewPackageList() + listA.Add(packages[0]) + listA.Add(packages[1]) + listA.Add(packages[2]) + listA.Add(packages[3]) + listA.Add(packages[7]) + + listB := NewPackageList() + listB.Add(packages[0]) + listB.Add(packages[2]) + listB.Add(packages[4]) + listB.Add(packages[5]) + listB.Add(packages[6]) + + listC := NewPackageList() + listC.Add(packages[0]) + listC.Add(packages[8]) + listC.Add(packages[9]) + + reflistA := f.newFromPackageList(listA) + reflistB := f.newFromPackageList(listB) + reflistC := f.newFromPackageList(listC) + + mergeAB := mergeRefLists(reflistA, reflistB, true, false) + mergeBA := mergeRefLists(reflistB, reflistA, true, false) + mergeAC := mergeRefLists(reflistA, reflistC, true, false) + mergeBC := mergeRefLists(reflistB, reflistC, true, false) + mergeCB := mergeRefLists(reflistC, reflistB, true, false) + + verifyRefListIntegrity(c, mergeAB) + verifyRefListIntegrity(c, mergeBA) + verifyRefListIntegrity(c, mergeAC) + verifyRefListIntegrity(c, mergeBC) + verifyRefListIntegrity(c, mergeCB) + + c.Check(toStrSlice(mergeAB), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000000", "Pi386 dpkg 1.0 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) + c.Check(toStrSlice(mergeBA), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp1 00000000", "Pi386 dpkg 1.7 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) + c.Check(toStrSlice(mergeAC), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) + c.Check(toStrSlice(mergeBC), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", "Pi386 lib 1.0 00000000"}) + c.Check(toStrSlice(mergeCB), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000000", "Pi386 dpkg 1.0 00000000", "Pi386 lib 1.0 00000000"}) + + mergeABall := mergeRefLists(reflistA, reflistB, false, false) + mergeBAall := mergeRefLists(reflistB, reflistA, false, false) + mergeACall := mergeRefLists(reflistA, reflistC, false, false) + mergeBCall := mergeRefLists(reflistB, reflistC, false, false) + mergeCBall := mergeRefLists(reflistC, reflistB, false, false) + + verifyRefListIntegrity(c, mergeABall) + verifyRefListIntegrity(c, mergeBAall) + verifyRefListIntegrity(c, mergeACall) + verifyRefListIntegrity(c, mergeBCall) + verifyRefListIntegrity(c, mergeCBall) + + c.Check(mergeABall, DeepEquals, mergeBAall) + c.Check(toStrSlice(mergeBAall), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp1 00000000", "Pi386 app 1.1~bp2 00000000", + "Pi386 dpkg 1.0 00000000", "Pi386 dpkg 1.7 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) + + c.Check(mergeBCall, Not(DeepEquals), mergeCBall) + c.Check(toStrSlice(mergeACall), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pi386 app 1.1~bp1 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", + "Pi386 dpkg 1.7 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) + c.Check(toStrSlice(mergeBCall), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", + "Pi386 lib 1.0 00000000"}) + + mergeBCwithConflicts := mergeRefLists(reflistB, reflistC, false, true) + c.Check(toStrSlice(mergeBCwithConflicts), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000044", + "Pi386 dpkg 1.0 00000000", "Pi386 dpkg 1.0 00034445", "Pi386 lib 1.0 00000000"}) + }) +} - c.Check(diffAB[3].Left.String(), Equals, "xyz_3.0_sparc") - c.Check(diffAB[3].Right, IsNil) +func (s *PackageRefListSuite) TestFilterLatestRefs(c *C) { + forEachRefList(func(f reflistFactory) { + packages := []*Package{ + {Name: "lib", Version: "1.0", Architecture: "i386"}, + {Name: "lib", Version: "1.2~bp1", Architecture: "i386"}, + {Name: "lib", Version: "1.2", Architecture: "i386"}, + {Name: "dpkg", Version: "1.2", Architecture: "i386"}, + {Name: "dpkg", Version: "1.3", Architecture: "i386"}, + {Name: "dpkg", Version: "1.3~bp2", Architecture: "i386"}, + {Name: "dpkg", Version: "1.5", Architecture: "i386"}, + {Name: "dpkg", Version: "1.6", Architecture: "i386"}, + } + + rl := NewPackageList() + rl.Add(packages[0]) + rl.Add(packages[1]) + rl.Add(packages[2]) + rl.Add(packages[3]) + rl.Add(packages[4]) + rl.Add(packages[5]) + rl.Add(packages[6]) + rl.Add(packages[7]) + + result := f.newFromPackageList(rl) + result.FilterLatestRefs() + + verifyRefListIntegrity(c, result) + c.Check(toStrSlice(result), DeepEquals, + []string{"Pi386 dpkg 1.6", "Pi386 lib 1.2"}) + }) +} - diffBA, err := reflistB.Diff(reflistA, coll) - c.Check(err, IsNil) - c.Check(diffBA, HasLen, 4) +func (s *PackageRefListSuite) TestPackageRefListEncodeDecode(c *C) { + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p5) + list.Add(s.p6) - c.Check(diffBA[0].Right, IsNil) - c.Check(diffBA[0].Left.String(), Equals, "app_1.1~bp2_amd64") + reflist := NewPackageRefListFromPackageList(list) - c.Check(diffBA[1].Right.String(), Equals, "app_1.1~bp1_i386") - c.Check(diffBA[1].Left.String(), Equals, "app_1.1~bp2_i386") + reflist2 := &PackageRefList{} + err := reflist2.Decode(reflist.Encode()) + c.Assert(err, IsNil) + c.Check(reflist2.Len(), Equals, reflist.Len()) + c.Check(reflist2.Refs, DeepEquals, reflist.Refs) +} - c.Check(diffBA[2].Right.String(), Equals, "dpkg_1.7_i386") - c.Check(diffBA[2].Left, IsNil) +func (s *PackageRefListSuite) TestRefListBucketPrefix(c *C) { + c.Check(bucketRefPrefix([]byte("Pall abcd 1.0")), DeepEquals, []byte("abc")) + c.Check(bucketRefPrefix([]byte("Pall libabcd 1.0")), DeepEquals, []byte("abc")) + c.Check(bucketRefPrefix([]byte("Pamd64 xy 1.0")), DeepEquals, []byte("xy")) +} - c.Check(diffBA[3].Right.String(), Equals, "xyz_3.0_sparc") - c.Check(diffBA[3].Left, IsNil) +func (s *PackageRefListSuite) TestRefListBucketIdx(c *C) { + c.Check(bucketIdxForRef(s.p1.Key("")), Equals, 46) + c.Check(bucketIdxForRef(s.p2.Key("")), Equals, 46) + c.Check(bucketIdxForRef(s.p3.Key("")), Equals, 26) + c.Check(bucketIdxForRef(s.p4.Key("")), Equals, 46) + c.Check(bucketIdxForRef(s.p5.Key("")), Equals, 4) + c.Check(bucketIdxForRef(s.p6.Key("")), Equals, 46) +} +func (s *PackageRefListSuite) TestSplitRefListBuckets(c *C) { + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p4) + list.Add(s.p5) + list.Add(s.p6) + + sl := NewSplitRefListFromPackageList(list) + verifyRefListIntegrity(c, sl) + + c.Check(hex.EncodeToString(sl.Buckets[4]), Equals, "7287aed32daad5d1aab4e89533bde135381d932e60548cfc00b882ca8858ae07") + c.Check(toStrSlice(sl.bucketRefs[4]), DeepEquals, []string{string(s.p5.Key(""))}) + c.Check(hex.EncodeToString(sl.Buckets[26]), Equals, "f31fc28e82368b63c8be47eefc64b8e217e2e5349c7e3827b98f80536b956f6e") + c.Check(toStrSlice(sl.bucketRefs[26]), DeepEquals, []string{string(s.p3.Key(""))}) + c.Check(hex.EncodeToString(sl.Buckets[46]), Equals, "55e70286393afc5da5046d68c632d35f98bec24781ae433bd1a1069b52853367") + c.Check(toStrSlice(sl.bucketRefs[46]), DeepEquals, []string{string(s.p1.Key("")), string(s.p6.Key(""))}) } -func (s *PackageRefListSuite) TestDiffCompactsAtEnd(c *C) { - db, _ := goleveldb.NewOpenDB(c.MkDir()) - coll := NewPackageCollection(db) +func (s *PackageRefListSuite) TestRefListDigestSet(c *C) { + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p4) + list.Add(s.p5) + list.Add(s.p6) - packages := []*Package{ - {Name: "app", Version: "1.1~bp1", Architecture: "i386"}, //0 - {Name: "app", Version: "1.1~bp2", Architecture: "i386"}, //1 - {Name: "app", Version: "1.1~bp2", Architecture: "amd64"}, //2 - } + sl := NewSplitRefListFromPackageList(list) - for _, p := range packages { - coll.Update(p) - } + set := NewRefListDigestSet() + c.Check(set.Len(), Equals, 0) - listA := NewPackageList() - listA.Add(packages[0]) + err := sl.ForEachBucket(func(digest []byte, bucket *PackageRefList) error { + c.Check(set.Has(digest), Equals, false) + return nil + }) + c.Assert(err, IsNil) - listB := NewPackageList() - listB.Add(packages[1]) - listB.Add(packages[2]) + set.AddAllInRefList(sl) + c.Check(set.Len(), Equals, 3) - reflistA := NewPackageRefListFromPackageList(listA) - reflistB := NewPackageRefListFromPackageList(listB) + err = sl.ForEachBucket(func(digest []byte, bucket *PackageRefList) error { + c.Check(set.Has(digest), Equals, true) + return nil + }) + c.Assert(err, IsNil) - diffAB, err := reflistA.Diff(reflistB, coll) - c.Check(err, IsNil) - c.Check(diffAB, HasLen, 2) + firstDigest := sl.Buckets[bucketIdxForRef(s.p1.Key(""))] + set.Remove(firstDigest) + c.Check(set.Len(), Equals, 2) - c.Check(diffAB[0].Left, IsNil) - c.Check(diffAB[0].Right.String(), Equals, "app_1.1~bp2_amd64") + err = sl.ForEachBucket(func(digest []byte, bucket *PackageRefList) error { + c.Check(set.Has(digest), Equals, !bytes.Equal(digest, firstDigest)) + return nil + }) + c.Assert(err, IsNil) - c.Check(diffAB[1].Left.String(), Equals, "app_1.1~bp1_i386") - c.Check(diffAB[1].Right.String(), Equals, "app_1.1~bp2_i386") + set2 := NewRefListDigestSet() + set2.AddAllInRefList(sl) + set2.RemoveAll(set) + + err = sl.ForEachBucket(func(digest []byte, bucket *PackageRefList) error { + c.Check(set2.Has(digest), Equals, bytes.Equal(digest, firstDigest)) + return nil + }) + c.Assert(err, IsNil) } -func (s *PackageRefListSuite) TestMerge(c *C) { +func (s *PackageRefListSuite) TestRefListCollectionLoadSave(c *C) { db, _ := goleveldb.NewOpenDB(c.MkDir()) - coll := NewPackageCollection(db) - - packages := []*Package{ - {Name: "lib", Version: "1.0", Architecture: "i386"}, //0 - {Name: "dpkg", Version: "1.7", Architecture: "i386"}, //1 - {Name: "data", Version: "1.1~bp1", Architecture: "all"}, //2 - {Name: "app", Version: "1.1~bp1", Architecture: "i386"}, //3 - {Name: "app", Version: "1.1~bp2", Architecture: "i386"}, //4 - {Name: "app", Version: "1.1~bp2", Architecture: "amd64"}, //5 - {Name: "dpkg", Version: "1.0", Architecture: "i386"}, //6 - {Name: "xyz", Version: "1.0", Architecture: "sparc"}, //7 - {Name: "dpkg", Version: "1.0", Architecture: "i386", FilesHash: 0x34445}, //8 - {Name: "app", Version: "1.1~bp2", Architecture: "i386", FilesHash: 0x44}, //9 - } + reflistCollection := NewRefListCollection(db) + packageCollection := NewPackageCollection(db) + + packageCollection.Update(s.p1) + packageCollection.Update(s.p2) + packageCollection.Update(s.p3) + packageCollection.Update(s.p4) + packageCollection.Update(s.p5) + packageCollection.Update(s.p6) + + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p2) + list.Add(s.p3) + list.Add(s.p4) + list.Add(s.p5) + + key := []byte("test") + + reflist := NewPackageRefListFromPackageList(list) + db.Put(key, reflist.Encode()) + + sl := NewSplitRefList() + err := reflistCollection.LoadComplete(sl, key) + c.Assert(err, IsNil) + verifyRefListIntegrity(c, sl) + c.Check(toStrSlice(sl), DeepEquals, toStrSlice(reflist)) - for _, p := range packages { - p.V06Plus = true - coll.Update(p) - } + list.Add(s.p6) + sl = NewSplitRefListFromPackageList(list) + err = reflistCollection.Update(sl, key) + c.Assert(err, IsNil) - listA := NewPackageList() - listA.Add(packages[0]) - listA.Add(packages[1]) - listA.Add(packages[2]) - listA.Add(packages[3]) - listA.Add(packages[7]) - - listB := NewPackageList() - listB.Add(packages[0]) - listB.Add(packages[2]) - listB.Add(packages[4]) - listB.Add(packages[5]) - listB.Add(packages[6]) - - listC := NewPackageList() - listC.Add(packages[0]) - listC.Add(packages[8]) - listC.Add(packages[9]) - - reflistA := NewPackageRefListFromPackageList(listA) - reflistB := NewPackageRefListFromPackageList(listB) - reflistC := NewPackageRefListFromPackageList(listC) - - mergeAB := reflistA.Merge(reflistB, true, false) - mergeBA := reflistB.Merge(reflistA, true, false) - mergeAC := reflistA.Merge(reflistC, true, false) - mergeBC := reflistB.Merge(reflistC, true, false) - mergeCB := reflistC.Merge(reflistB, true, false) - - c.Check(toStrSlice(mergeAB), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000000", "Pi386 dpkg 1.0 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) - c.Check(toStrSlice(mergeBA), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp1 00000000", "Pi386 dpkg 1.7 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) - c.Check(toStrSlice(mergeAC), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) - c.Check(toStrSlice(mergeBC), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", "Pi386 lib 1.0 00000000"}) - c.Check(toStrSlice(mergeCB), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000000", "Pi386 dpkg 1.0 00000000", "Pi386 lib 1.0 00000000"}) - - mergeABall := reflistA.Merge(reflistB, false, false) - mergeBAall := reflistB.Merge(reflistA, false, false) - mergeACall := reflistA.Merge(reflistC, false, false) - mergeBCall := reflistB.Merge(reflistC, false, false) - mergeCBall := reflistC.Merge(reflistB, false, false) - - c.Check(mergeABall, DeepEquals, mergeBAall) - c.Check(toStrSlice(mergeBAall), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp1 00000000", "Pi386 app 1.1~bp2 00000000", - "Pi386 dpkg 1.0 00000000", "Pi386 dpkg 1.7 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) - - c.Check(mergeBCall, Not(DeepEquals), mergeCBall) - c.Check(toStrSlice(mergeACall), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pi386 app 1.1~bp1 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", - "Pi386 dpkg 1.7 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) - c.Check(toStrSlice(mergeBCall), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", - "Pi386 lib 1.0 00000000"}) - - mergeBCwithConflicts := reflistB.Merge(reflistC, false, true) - c.Check(toStrSlice(mergeBCwithConflicts), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000044", - "Pi386 dpkg 1.0 00000000", "Pi386 dpkg 1.0 00034445", "Pi386 lib 1.0 00000000"}) + sl = NewSplitRefList() + err = reflistCollection.LoadComplete(sl, key) + c.Assert(err, IsNil) + verifyRefListIntegrity(c, sl) + c.Check(toStrSlice(sl), DeepEquals, toStrSlice(NewPackageRefListFromPackageList(list))) } -func (s *PackageRefListSuite) TestFilterLatestRefs(c *C) { - packages := []*Package{ - {Name: "lib", Version: "1.0", Architecture: "i386"}, - {Name: "lib", Version: "1.2~bp1", Architecture: "i386"}, - {Name: "lib", Version: "1.2", Architecture: "i386"}, - {Name: "dpkg", Version: "1.2", Architecture: "i386"}, - {Name: "dpkg", Version: "1.3", Architecture: "i386"}, - {Name: "dpkg", Version: "1.3~bp2", Architecture: "i386"}, - {Name: "dpkg", Version: "1.5", Architecture: "i386"}, - {Name: "dpkg", Version: "1.6", Architecture: "i386"}, - } +func (s *PackageRefListSuite) TestRefListCollectionMigrate(c *C) { + db, _ := goleveldb.NewOpenDB(c.MkDir()) + reflistCollection := NewRefListCollection(db) + packageCollection := NewPackageCollection(db) + + packageCollection.Update(s.p1) + packageCollection.Update(s.p2) + packageCollection.Update(s.p3) + packageCollection.Update(s.p4) + packageCollection.Update(s.p5) + packageCollection.Update(s.p6) + + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p2) + list.Add(s.p3) + list.Add(s.p4) + list.Add(s.p5) + + key := []byte("test") + + reflist := NewPackageRefListFromPackageList(list) + db.Put(key, reflist.Encode()) + + sl := NewSplitRefList() + format, err := reflistCollection.load(sl, key) + c.Assert(err, IsNil) + c.Check(format, Equals, reflistStorageFormatInline) + + migrator := reflistCollection.NewMigration() + err = reflistCollection.LoadCompleteAndMigrate(sl, key, migrator) + c.Assert(err, IsNil) + verifyRefListIntegrity(c, sl) + c.Check(toStrSlice(sl), DeepEquals, toStrSlice(NewPackageRefListFromPackageList(list))) - rl := NewPackageList() - rl.Add(packages[0]) - rl.Add(packages[1]) - rl.Add(packages[2]) - rl.Add(packages[3]) - rl.Add(packages[4]) - rl.Add(packages[5]) - rl.Add(packages[6]) - rl.Add(packages[7]) - - result := NewPackageRefListFromPackageList(rl) - result.FilterLatestRefs() - - c.Check(toStrSlice(result), DeepEquals, - []string{"Pi386 dpkg 1.6", "Pi386 lib 1.2"}) + stats := migrator.Stats() + c.Check(stats.Reflists, Equals, 0) + c.Check(stats.Buckets, Equals, 0) + c.Check(stats.Segments, Equals, 0) + + err = migrator.Flush() + c.Assert(err, IsNil) + stats = migrator.Stats() + c.Check(stats.Reflists, Equals, 1) + c.Check(stats.Buckets, Not(Equals), 0) + c.Check(stats.Segments, Equals, stats.Segments) + + sl = NewSplitRefList() + err = reflistCollection.LoadComplete(sl, key) + c.Assert(err, IsNil) + verifyRefListIntegrity(c, sl) + c.Check(toStrSlice(sl), DeepEquals, toStrSlice(NewPackageRefListFromPackageList(list))) + + format, err = reflistCollection.load(sl, key) + c.Assert(err, IsNil) + c.Check(format, Equals, reflistStorageFormatSplit) } diff --git a/deb/remote.go b/deb/remote.go index 6ecb7e809..15efb54f7 100644 --- a/deb/remote.go +++ b/deb/remote.go @@ -73,7 +73,7 @@ type RemoteRepo struct { // Packages for json output Packages []string `codec:"-" json:",omitempty"` // "Snapshot" of current list of packages - packageRefs *PackageRefList + packageRefs *SplitRefList // Parsed archived root archiveRootURL *url.URL // Current list of packages (filled while updating mirror) @@ -164,14 +164,11 @@ func (repo *RemoteRepo) IsFlat() bool { // NumPackages return number of packages retrieved from remote repo func (repo *RemoteRepo) NumPackages() int { - if repo.packageRefs == nil { - return 0 - } return repo.packageRefs.Len() } // RefList returns package list for repo -func (repo *RemoteRepo) RefList() *PackageRefList { +func (repo *RemoteRepo) RefList() *SplitRefList { return repo.packageRefs } @@ -673,7 +670,7 @@ func (repo *RemoteRepo) FinalizeDownload(collectionFactory *CollectionFactory, p }) if err == nil { - repo.packageRefs = NewPackageRefListFromPackageList(repo.packageList) + repo.packageRefs = NewSplitRefListFromPackageList(repo.packageList) repo.packageList = nil } @@ -815,14 +812,14 @@ func (collection *RemoteRepoCollection) search(filter func(*RemoteRepo) bool, un } // Add appends new repo to collection and saves it -func (collection *RemoteRepoCollection) Add(repo *RemoteRepo) error { +func (collection *RemoteRepoCollection) Add(repo *RemoteRepo, reflistCollection *RefListCollection) error { _, err := collection.ByName(repo.Name) if err == nil { return fmt.Errorf("mirror with name %s already exists", repo.Name) } - err = collection.Update(repo) + err = collection.Update(repo, reflistCollection) if err != nil { return err } @@ -832,28 +829,26 @@ func (collection *RemoteRepoCollection) Add(repo *RemoteRepo) error { } // Update stores updated information about repo in DB -func (collection *RemoteRepoCollection) Update(repo *RemoteRepo) error { +func (collection *RemoteRepoCollection) Update(repo *RemoteRepo, reflistCollection *RefListCollection) error { batch := collection.db.CreateBatch() batch.Put(repo.Key(), repo.Encode()) if repo.packageRefs != nil { - batch.Put(repo.RefKey(), repo.packageRefs.Encode()) + rb := reflistCollection.NewBatch(batch) + reflistCollection.UpdateInBatch(repo.packageRefs, repo.RefKey(), rb) } return batch.Write() } // LoadComplete loads additional information for remote repo -func (collection *RemoteRepoCollection) LoadComplete(repo *RemoteRepo) error { - encoded, err := collection.db.Get(repo.RefKey()) +func (collection *RemoteRepoCollection) LoadComplete(repo *RemoteRepo, reflistCollection *RefListCollection) error { + repo.packageRefs = NewSplitRefList() + err := reflistCollection.LoadComplete(repo.packageRefs, repo.RefKey()) if err == database.ErrNotFound { return nil } - if err != nil { - return err - } - repo.packageRefs = &PackageRefList{} - return repo.packageRefs.Decode(encoded) + return err } // ByName looks up repository by name diff --git a/deb/remote_test.go b/deb/remote_test.go index 2cda7d5e6..a5601b1f3 100644 --- a/deb/remote_test.go +++ b/deb/remote_test.go @@ -52,7 +52,7 @@ func (n *NullVerifier) IsClearSigned(clearsign io.Reader) (bool, error) { type PackageListMixinSuite struct { p1, p2, p3 *Package list *PackageList - reflist *PackageRefList + reflist *SplitRefList } func (s *PackageListMixinSuite) SetUpPackages() { @@ -72,7 +72,7 @@ func (s *PackageListMixinSuite) SetUpPackages() { s.list.Add(s.p2) s.list.Add(s.p3) - s.reflist = NewPackageRefListFromPackageList(s.list) + s.reflist = NewSplitRefListFromPackageList(s.list) } type RemoteRepoSuite struct { @@ -291,7 +291,7 @@ func (s *RemoteRepoSuite) TestDownload(c *C) { s.repo.FinalizeDownload(s.collectionFactory, nil) c.Assert(s.repo.packageRefs, NotNil) - pkg, err := s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Refs[0]) + pkg, err := s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Flatten().Refs[0]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "amanda-client") @@ -373,12 +373,12 @@ func (s *RemoteRepoSuite) TestDownloadWithInstaller(c *C) { s.repo.FinalizeDownload(s.collectionFactory, nil) c.Assert(s.repo.packageRefs, NotNil) - pkg, err := s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Refs[0]) + pkg, err := s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Flatten().Refs[0]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "amanda-client") - pkg, err = s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Refs[1]) + pkg, err = s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Flatten().Refs[1]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "installer") } @@ -419,12 +419,12 @@ func (s *RemoteRepoSuite) TestDownloadWithSources(c *C) { s.repo.FinalizeDownload(s.collectionFactory, nil) c.Assert(s.repo.packageRefs, NotNil) - pkg, err := s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Refs[0]) + pkg, err := s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Flatten().Refs[0]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "amanda-client") - pkg, err = s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Refs[1]) + pkg, err = s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Flatten().Refs[1]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "access-modifier-checker") @@ -503,7 +503,7 @@ func (s *RemoteRepoSuite) TestDownloadFlat(c *C) { s.flat.FinalizeDownload(s.collectionFactory, nil) c.Assert(s.flat.packageRefs, NotNil) - pkg, err := s.collectionFactory.PackageCollection().ByKey(s.flat.packageRefs.Refs[0]) + pkg, err := s.collectionFactory.PackageCollection().ByKey(s.flat.packageRefs.Flatten().Refs[0]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "amanda-client") @@ -593,12 +593,12 @@ func (s *RemoteRepoSuite) TestDownloadWithSourcesFlat(c *C) { s.flat.FinalizeDownload(s.collectionFactory, nil) c.Assert(s.flat.packageRefs, NotNil) - pkg, err := s.collectionFactory.PackageCollection().ByKey(s.flat.packageRefs.Refs[0]) + pkg, err := s.collectionFactory.PackageCollection().ByKey(s.flat.packageRefs.Flatten().Refs[0]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "amanda-client") - pkg, err = s.collectionFactory.PackageCollection().ByKey(s.flat.packageRefs.Refs[1]) + pkg, err = s.collectionFactory.PackageCollection().ByKey(s.flat.packageRefs.Flatten().Refs[1]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "access-modifier-checker") @@ -658,8 +658,9 @@ func (s *RemoteRepoSuite) TestDownloadWithSourcesFlat(c *C) { type RemoteRepoCollectionSuite struct { PackageListMixinSuite - db database.Storage - collection *RemoteRepoCollection + db database.Storage + collection *RemoteRepoCollection + refListCollection *RefListCollection } var _ = Suite(&RemoteRepoCollectionSuite{}) @@ -667,6 +668,7 @@ var _ = Suite(&RemoteRepoCollectionSuite{}) func (s *RemoteRepoCollectionSuite) SetUpTest(c *C) { s.db, _ = goleveldb.NewOpenDB(c.MkDir()) s.collection = NewRemoteRepoCollection(s.db) + s.refListCollection = NewRefListCollection(s.db) s.SetUpPackages() } @@ -679,8 +681,8 @@ func (s *RemoteRepoCollectionSuite) TestAddByName(c *C) { c.Assert(err, ErrorMatches, "*.not found") repo, _ := NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) - c.Assert(s.collection.Add(repo), IsNil) - c.Assert(s.collection.Add(repo), ErrorMatches, ".*already exists") + c.Assert(s.collection.Add(repo, s.refListCollection), IsNil) + c.Assert(s.collection.Add(repo, s.refListCollection), ErrorMatches, ".*already exists") r, err := s.collection.ByName("yandex") c.Assert(err, IsNil) @@ -697,7 +699,7 @@ func (s *RemoteRepoCollectionSuite) TestByUUID(c *C) { c.Assert(err, ErrorMatches, "*.not found") repo, _ := NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) - c.Assert(s.collection.Add(repo), IsNil) + c.Assert(s.collection.Add(repo, s.refListCollection), IsNil) r, err := s.collection.ByUUID(repo.UUID) c.Assert(err, IsNil) @@ -711,7 +713,7 @@ func (s *RemoteRepoCollectionSuite) TestByUUID(c *C) { func (s *RemoteRepoCollectionSuite) TestUpdateLoadComplete(c *C) { repo, _ := NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) - c.Assert(s.collection.Update(repo), IsNil) + c.Assert(s.collection.Update(repo, s.refListCollection), IsNil) collection := NewRemoteRepoCollection(s.db) r, err := collection.ByName("yandex") @@ -719,20 +721,20 @@ func (s *RemoteRepoCollectionSuite) TestUpdateLoadComplete(c *C) { c.Assert(r.packageRefs, IsNil) repo.packageRefs = s.reflist - c.Assert(s.collection.Update(repo), IsNil) + c.Assert(s.collection.Update(repo, s.refListCollection), IsNil) collection = NewRemoteRepoCollection(s.db) r, err = collection.ByName("yandex") c.Assert(err, IsNil) c.Assert(r.packageRefs, IsNil) c.Assert(r.NumPackages(), Equals, 0) - c.Assert(s.collection.LoadComplete(r), IsNil) + c.Assert(s.collection.LoadComplete(r, s.refListCollection), IsNil) c.Assert(r.NumPackages(), Equals, 3) } func (s *RemoteRepoCollectionSuite) TestForEachAndLen(c *C) { repo, _ := NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) - s.collection.Add(repo) + s.collection.Add(repo, s.refListCollection) count := 0 err := s.collection.ForEach(func(*RemoteRepo) error { @@ -754,10 +756,10 @@ func (s *RemoteRepoCollectionSuite) TestForEachAndLen(c *C) { func (s *RemoteRepoCollectionSuite) TestDrop(c *C) { repo1, _ := NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) - s.collection.Add(repo1) + s.collection.Add(repo1, s.refListCollection) repo2, _ := NewRemoteRepo("tyndex", "http://mirror.yandex.ru/debian/", "wheezy", []string{"main"}, []string{}, false, false, false) - s.collection.Add(repo2) + s.collection.Add(repo2, s.refListCollection) r1, _ := s.collection.ByUUID(repo1.UUID) c.Check(r1, Equals, repo1) diff --git a/deb/snapshot.go b/deb/snapshot.go index f2a0d3879..0aeac4a34 100644 --- a/deb/snapshot.go +++ b/deb/snapshot.go @@ -40,7 +40,7 @@ type Snapshot struct { NotAutomatic string ButAutomaticUpgrades string - packageRefs *PackageRefList + packageRefs *SplitRefList } // NewSnapshotFromRepository creates snapshot from current state of repository @@ -76,7 +76,7 @@ func NewSnapshotFromLocalRepo(name string, repo *LocalRepo) (*Snapshot, error) { } if snap.packageRefs == nil { - snap.packageRefs = NewPackageRefList() + snap.packageRefs = NewSplitRefList() } return snap, nil @@ -84,11 +84,13 @@ func NewSnapshotFromLocalRepo(name string, repo *LocalRepo) (*Snapshot, error) { // NewSnapshotFromPackageList creates snapshot from PackageList func NewSnapshotFromPackageList(name string, sources []*Snapshot, list *PackageList, description string) *Snapshot { - return NewSnapshotFromRefList(name, sources, NewPackageRefListFromPackageList(list), description) + sl := NewSplitRefList() + sl.Replace(NewPackageRefListFromPackageList(list)) + return NewSnapshotFromRefList(name, sources, sl, description) } -// NewSnapshotFromRefList creates snapshot from PackageRefList -func NewSnapshotFromRefList(name string, sources []*Snapshot, list *PackageRefList, description string) *Snapshot { +// NewSnapshotFromRefList creates snapshot from SplitRefList +func NewSnapshotFromRefList(name string, sources []*Snapshot, list *SplitRefList, description string) *Snapshot { sourceUUIDs := make([]string, len(sources)) for i := range sources { sourceUUIDs[i] = sources[i].UUID @@ -116,7 +118,7 @@ func (s *Snapshot) NumPackages() int { } // RefList returns list of package refs in snapshot -func (s *Snapshot) RefList() *PackageRefList { +func (s *Snapshot) RefList() *SplitRefList { return s.packageRefs } @@ -209,13 +211,13 @@ func NewSnapshotCollection(db database.Storage) *SnapshotCollection { } // Add appends new repo to collection and saves it -func (collection *SnapshotCollection) Add(snapshot *Snapshot) error { +func (collection *SnapshotCollection) Add(snapshot *Snapshot, reflistCollection *RefListCollection) error { _, err := collection.ByName(snapshot.Name) if err == nil { return fmt.Errorf("snapshot with name %s already exists", snapshot.Name) } - err = collection.Update(snapshot) + err = collection.Update(snapshot, reflistCollection) if err != nil { return err } @@ -225,26 +227,22 @@ func (collection *SnapshotCollection) Add(snapshot *Snapshot) error { } // Update stores updated information about snapshot in DB -func (collection *SnapshotCollection) Update(snapshot *Snapshot) error { +func (collection *SnapshotCollection) Update(snapshot *Snapshot, reflistCollection *RefListCollection) error { batch := collection.db.CreateBatch() batch.Put(snapshot.Key(), snapshot.Encode()) if snapshot.packageRefs != nil { - batch.Put(snapshot.RefKey(), snapshot.packageRefs.Encode()) + rb := reflistCollection.NewBatch(batch) + reflistCollection.UpdateInBatch(snapshot.packageRefs, snapshot.RefKey(), rb) } return batch.Write() } // LoadComplete loads additional information about snapshot -func (collection *SnapshotCollection) LoadComplete(snapshot *Snapshot) error { - encoded, err := collection.db.Get(snapshot.RefKey()) - if err != nil { - return err - } - - snapshot.packageRefs = &PackageRefList{} - return snapshot.packageRefs.Decode(encoded) +func (collection *SnapshotCollection) LoadComplete(snapshot *Snapshot, reflistCollection *RefListCollection) error { + snapshot.packageRefs = NewSplitRefList() + return reflistCollection.LoadComplete(snapshot.packageRefs, snapshot.RefKey()) } func (collection *SnapshotCollection) search(filter func(*Snapshot) bool, unique bool) []*Snapshot { diff --git a/deb/snapshot_bench_test.go b/deb/snapshot_bench_test.go index c6bb94a2c..4475ca57b 100644 --- a/deb/snapshot_bench_test.go +++ b/deb/snapshot_bench_test.go @@ -18,10 +18,11 @@ func BenchmarkSnapshotCollectionForEach(b *testing.B) { defer db.Close() collection := NewSnapshotCollection(db) + reflistCollection := NewRefListCollection(db) for i := 0; i < count; i++ { - snapshot := NewSnapshotFromRefList(fmt.Sprintf("snapshot%d", i), nil, NewPackageRefList(), fmt.Sprintf("Snapshot number %d", i)) - if collection.Add(snapshot) != nil { + snapshot := NewSnapshotFromRefList(fmt.Sprintf("snapshot%d", i), nil, NewSplitRefList(), fmt.Sprintf("Snapshot number %d", i)) + if collection.Add(snapshot, reflistCollection) != nil { b.FailNow() } } @@ -47,11 +48,12 @@ func BenchmarkSnapshotCollectionByUUID(b *testing.B) { defer db.Close() collection := NewSnapshotCollection(db) + reflistCollection := NewRefListCollection(db) uuids := []string{} for i := 0; i < count; i++ { - snapshot := NewSnapshotFromRefList(fmt.Sprintf("snapshot%d", i), nil, NewPackageRefList(), fmt.Sprintf("Snapshot number %d", i)) - if collection.Add(snapshot) != nil { + snapshot := NewSnapshotFromRefList(fmt.Sprintf("snapshot%d", i), nil, NewSplitRefList(), fmt.Sprintf("Snapshot number %d", i)) + if collection.Add(snapshot, reflistCollection) != nil { b.FailNow() } uuids = append(uuids, snapshot.UUID) @@ -78,10 +80,11 @@ func BenchmarkSnapshotCollectionByName(b *testing.B) { defer db.Close() collection := NewSnapshotCollection(db) + reflistCollection := NewRefListCollection(db) for i := 0; i < count; i++ { - snapshot := NewSnapshotFromRefList(fmt.Sprintf("snapshot%d", i), nil, NewPackageRefList(), fmt.Sprintf("Snapshot number %d", i)) - if collection.Add(snapshot) != nil { + snapshot := NewSnapshotFromRefList(fmt.Sprintf("snapshot%d", i), nil, NewSplitRefList(), fmt.Sprintf("Snapshot number %d", i)) + if collection.Add(snapshot, reflistCollection) != nil { b.FailNow() } } diff --git a/deb/snapshot_test.go b/deb/snapshot_test.go index d27c42269..805ccc8e5 100644 --- a/deb/snapshot_test.go +++ b/deb/snapshot_test.go @@ -109,6 +109,7 @@ type SnapshotCollectionSuite struct { snapshot1, snapshot2 *Snapshot snapshot3, snapshot4 *Snapshot collection *SnapshotCollection + reflistCollection *RefListCollection } var _ = Suite(&SnapshotCollectionSuite{}) @@ -116,6 +117,7 @@ var _ = Suite(&SnapshotCollectionSuite{}) func (s *SnapshotCollectionSuite) SetUpTest(c *C) { s.db, _ = goleveldb.NewOpenDB(c.MkDir()) s.collection = NewSnapshotCollection(s.db) + s.reflistCollection = NewRefListCollection(s.db) s.SetUpPackages() s.repo1, _ = NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) @@ -143,10 +145,10 @@ func (s *SnapshotCollectionSuite) TestAddByNameByUUID(c *C) { _, err := s.collection.ByName("snap1") c.Assert(err, ErrorMatches, "*.not found") - c.Assert(s.collection.Add(s.snapshot1), IsNil) - c.Assert(s.collection.Add(s.snapshot1), ErrorMatches, ".*already exists") + c.Assert(s.collection.Add(s.snapshot1, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.snapshot1, s.reflistCollection), ErrorMatches, ".*already exists") - c.Assert(s.collection.Add(s.snapshot2), IsNil) + c.Assert(s.collection.Add(s.snapshot2, s.reflistCollection), IsNil) snapshot, err := s.collection.ByName("snap1") c.Assert(err, IsNil) @@ -167,20 +169,20 @@ func (s *SnapshotCollectionSuite) TestAddByNameByUUID(c *C) { } func (s *SnapshotCollectionSuite) TestUpdateLoadComplete(c *C) { - c.Assert(s.collection.Update(s.snapshot1), IsNil) + c.Assert(s.collection.Update(s.snapshot1, s.reflistCollection), IsNil) collection := NewSnapshotCollection(s.db) snapshot, err := collection.ByName("snap1") c.Assert(err, IsNil) c.Assert(snapshot.packageRefs, IsNil) - c.Assert(s.collection.LoadComplete(snapshot), IsNil) + c.Assert(s.collection.LoadComplete(snapshot, s.reflistCollection), IsNil) c.Assert(snapshot.NumPackages(), Equals, 3) } func (s *SnapshotCollectionSuite) TestForEachAndLen(c *C) { - s.collection.Add(s.snapshot1) - s.collection.Add(s.snapshot2) + s.collection.Add(s.snapshot1, s.reflistCollection) + s.collection.Add(s.snapshot2, s.reflistCollection) count := 0 err := s.collection.ForEach(func(*Snapshot) error { @@ -200,10 +202,10 @@ func (s *SnapshotCollectionSuite) TestForEachAndLen(c *C) { } func (s *SnapshotCollectionSuite) TestForEachSorted(c *C) { - s.collection.Add(s.snapshot2) - s.collection.Add(s.snapshot1) - s.collection.Add(s.snapshot4) - s.collection.Add(s.snapshot3) + s.collection.Add(s.snapshot2, s.reflistCollection) + s.collection.Add(s.snapshot1, s.reflistCollection) + s.collection.Add(s.snapshot4, s.reflistCollection) + s.collection.Add(s.snapshot3, s.reflistCollection) names := []string{} @@ -217,8 +219,8 @@ func (s *SnapshotCollectionSuite) TestForEachSorted(c *C) { } func (s *SnapshotCollectionSuite) TestFindByRemoteRepoSource(c *C) { - c.Assert(s.collection.Add(s.snapshot1), IsNil) - c.Assert(s.collection.Add(s.snapshot2), IsNil) + c.Assert(s.collection.Add(s.snapshot1, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.snapshot2, s.reflistCollection), IsNil) c.Check(s.collection.ByRemoteRepoSource(s.repo1), DeepEquals, []*Snapshot{s.snapshot1}) c.Check(s.collection.ByRemoteRepoSource(s.repo2), DeepEquals, []*Snapshot{s.snapshot2}) @@ -229,10 +231,10 @@ func (s *SnapshotCollectionSuite) TestFindByRemoteRepoSource(c *C) { } func (s *SnapshotCollectionSuite) TestFindByLocalRepoSource(c *C) { - c.Assert(s.collection.Add(s.snapshot1), IsNil) - c.Assert(s.collection.Add(s.snapshot2), IsNil) - c.Assert(s.collection.Add(s.snapshot3), IsNil) - c.Assert(s.collection.Add(s.snapshot4), IsNil) + c.Assert(s.collection.Add(s.snapshot1, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.snapshot2, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.snapshot3, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.snapshot4, s.reflistCollection), IsNil) c.Check(s.collection.ByLocalRepoSource(s.lrepo1), DeepEquals, []*Snapshot{s.snapshot3}) c.Check(s.collection.ByLocalRepoSource(s.lrepo2), DeepEquals, []*Snapshot{s.snapshot4}) @@ -247,11 +249,11 @@ func (s *SnapshotCollectionSuite) TestFindSnapshotSource(c *C) { snapshot4 := NewSnapshotFromRefList("snap4", []*Snapshot{s.snapshot1}, s.reflist, "desc2") snapshot5 := NewSnapshotFromRefList("snap5", []*Snapshot{snapshot3}, s.reflist, "desc3") - c.Assert(s.collection.Add(s.snapshot1), IsNil) - c.Assert(s.collection.Add(s.snapshot2), IsNil) - c.Assert(s.collection.Add(snapshot3), IsNil) - c.Assert(s.collection.Add(snapshot4), IsNil) - c.Assert(s.collection.Add(snapshot5), IsNil) + c.Assert(s.collection.Add(s.snapshot1, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.snapshot2, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(snapshot3, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(snapshot4, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(snapshot5, s.reflistCollection), IsNil) list := s.collection.BySnapshotSource(s.snapshot1) sorter, _ := newSnapshotSorter("name", list) @@ -263,8 +265,8 @@ func (s *SnapshotCollectionSuite) TestFindSnapshotSource(c *C) { } func (s *SnapshotCollectionSuite) TestDrop(c *C) { - s.collection.Add(s.snapshot1) - s.collection.Add(s.snapshot2) + s.collection.Add(s.snapshot1, s.reflistCollection) + s.collection.Add(s.snapshot2, s.reflistCollection) snap, _ := s.collection.ByUUID(s.snapshot1.UUID) c.Check(snap, Equals, s.snapshot1) diff --git a/system/t08_db/CleanupDB10Test_gold b/system/t08_db/CleanupDB10Test_gold index 138adc294..faa25944e 100644 --- a/system/t08_db/CleanupDB10Test_gold +++ b/system/t08_db/CleanupDB10Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (0)... +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (0)... diff --git a/system/t08_db/CleanupDB11Test_gold b/system/t08_db/CleanupDB11Test_gold index be3767e9c..33be8b2a4 100644 --- a/system/t08_db/CleanupDB11Test_gold +++ b/system/t08_db/CleanupDB11Test_gold @@ -14,6 +14,7 @@ Loading mirrors: Loading local repos: Loading snapshots: Loading published repositories: +Split 11 reflist(s) into 510 bucket(s) (123181 segment(s)) Loading list of all packages... Deleting unreferenced packages (7)... List of package keys to delete: @@ -24,6 +25,7 @@ List of package keys to delete: - Pi386 gnuplot-nox 4.6.1-1~maverick2 17785995cf0f815 - Pi386 gnuplot-x11 4.6.1-1~maverick2 d42e1d0d2f23740 - Psource gnuplot 4.6.1-1~maverick2 b8cd36358f5db41f +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (9)... diff --git a/system/t08_db/CleanupDB12Test_gold b/system/t08_db/CleanupDB12Test_gold index 31da9b23c..4e88abc21 100644 --- a/system/t08_db/CleanupDB12Test_gold +++ b/system/t08_db/CleanupDB12Test_gold @@ -14,6 +14,7 @@ Loading mirrors: Loading local repos: Loading snapshots: Loading published repositories: +Skipped splitting 11 reflist(s) into 510 bucket(s) (123181 segment(s)), as -dry-run has been requested. Loading list of all packages... Deleting unreferenced packages (7)... List of package keys to delete: @@ -25,6 +26,7 @@ List of package keys to delete: - Pi386 gnuplot-x11 4.6.1-1~maverick2 d42e1d0d2f23740 - Psource gnuplot 4.6.1-1~maverick2 b8cd36358f5db41f Skipped deletion, as -dry-run has been requested. +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (9)... diff --git a/system/t08_db/CleanupDB1Test_gold b/system/t08_db/CleanupDB1Test_gold index 138adc294..faa25944e 100644 --- a/system/t08_db/CleanupDB1Test_gold +++ b/system/t08_db/CleanupDB1Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (0)... +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (0)... diff --git a/system/t08_db/CleanupDB2Test_gold b/system/t08_db/CleanupDB2Test_gold index 1f289e677..4e84de6a8 100644 --- a/system/t08_db/CleanupDB2Test_gold +++ b/system/t08_db/CleanupDB2Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (73270)... +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (0)... diff --git a/system/t08_db/CleanupDB3Test_gold b/system/t08_db/CleanupDB3Test_gold index 73279e145..73c82f87d 100644 --- a/system/t08_db/CleanupDB3Test_gold +++ b/system/t08_db/CleanupDB3Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (7)... +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (9)... diff --git a/system/t08_db/CleanupDB4Test_gold b/system/t08_db/CleanupDB4Test_gold index 138adc294..faa25944e 100644 --- a/system/t08_db/CleanupDB4Test_gold +++ b/system/t08_db/CleanupDB4Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (0)... +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (0)... diff --git a/system/t08_db/CleanupDB5Test_gold b/system/t08_db/CleanupDB5Test_gold index 73279e145..47bba4e2b 100644 --- a/system/t08_db/CleanupDB5Test_gold +++ b/system/t08_db/CleanupDB5Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (7)... +Deleting unreferenced reflist buckets (1)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (9)... diff --git a/system/t08_db/CleanupDB6Test_gold b/system/t08_db/CleanupDB6Test_gold index 138adc294..faa25944e 100644 --- a/system/t08_db/CleanupDB6Test_gold +++ b/system/t08_db/CleanupDB6Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (0)... +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (0)... diff --git a/system/t08_db/CleanupDB7Test_gold b/system/t08_db/CleanupDB7Test_gold index 138adc294..faa25944e 100644 --- a/system/t08_db/CleanupDB7Test_gold +++ b/system/t08_db/CleanupDB7Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (0)... +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (0)... diff --git a/system/t08_db/CleanupDB8Test_gold b/system/t08_db/CleanupDB8Test_gold index f769f203f..43ebe9aaa 100644 --- a/system/t08_db/CleanupDB8Test_gold +++ b/system/t08_db/CleanupDB8Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (4)... +Deleting unreferenced reflist buckets (1)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (6)... diff --git a/system/t08_db/CleanupDB9Test_gold b/system/t08_db/CleanupDB9Test_gold index 138adc294..faa25944e 100644 --- a/system/t08_db/CleanupDB9Test_gold +++ b/system/t08_db/CleanupDB9Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (0)... +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (0)... From 462dd32204baeec26bb5e917a664217058be5c52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Roth?= Date: Sun, 21 Apr 2024 12:17:08 +0200 Subject: [PATCH 3/7] fix golangci-lint error --- api/files.go | 2 +- api/snapshot.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/api/files.go b/api/files.go index eea5a169d..fc460c5d2 100644 --- a/api/files.go +++ b/api/files.go @@ -131,7 +131,7 @@ func apiFilesListFiles(c *gin.Context) { listLock := &sync.Mutex{} root := filepath.Join(context.UploadPath(), utils.SanitizePath(c.Params.ByName("dir"))) - err := walker.Walk(root, func(path string, info os.FileInfo) error { + err := walker.Walk(root, func(path string, _ os.FileInfo) error { if path == root { return nil } diff --git a/api/snapshot.go b/api/snapshot.go index 4374b5a75..b41679887 100644 --- a/api/snapshot.go +++ b/api/snapshot.go @@ -204,7 +204,7 @@ func apiSnapshotsCreateFromRepository(c *gin.Context) { // including snapshot resource key resources := []string{string(repo.Key()), "S" + b.Name} taskName := fmt.Sprintf("Create snapshot of repo %s", name) - maybeRunTaskInBackground(c, taskName, resources, func(out aptly.Progress, detail *task.Detail) (*task.ProcessReturnValue, error) { + maybeRunTaskInBackground(c, taskName, resources, func(_ aptly.Progress, _ *task.Detail) (*task.ProcessReturnValue, error) { err := collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, err From 4af241656583afbbb7ef3542b4274994a44bbfd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Roth?= Date: Sun, 21 Apr 2024 15:30:48 +0200 Subject: [PATCH 4/7] fix golangci-lint errors --- deb/reflist.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/deb/reflist.go b/deb/reflist.go index e039de127..55588a998 100644 --- a/deb/reflist.go +++ b/deb/reflist.go @@ -786,7 +786,7 @@ func segmentIndexKey(prefix []byte, idx int) []byte { func (collection *RefListCollection) AllBucketDigests() (*RefListDigestSet, error) { digests := NewRefListDigestSet() - err := collection.db.ProcessByPrefix([]byte("F"), func(key []byte, value []byte) error { + err := collection.db.ProcessByPrefix([]byte("F"), func(key []byte, _ []byte) error { if !bytes.HasSuffix(key, []byte("-0000")) { // Ignore additional segments for the same digest. return nil @@ -818,7 +818,7 @@ func (collection *RefListCollection) AllBucketDigests() (*RefListDigestSet, erro // the bucket is no longer referenced by any saved reflists. func (collection *RefListCollection) UnsafeDropBucket(digest []byte, batch database.Batch) error { prefix := segmentPrefix(bucketDigestEncoding.EncodeToString(digest)) - return collection.db.ProcessByPrefix(prefix, func(key []byte, value []byte) error { + return collection.db.ProcessByPrefix(prefix, func(key []byte, _ []byte) error { return batch.Delete(key) }) } @@ -866,7 +866,7 @@ func (collection *RefListCollection) loadBuckets(sl *SplitRefList) error { if bucket == nil { bucket = NewPackageRefList() prefix := segmentPrefix(bucketDigestEncoding.EncodeToString(digest)) - err := collection.db.ProcessByPrefix(prefix, func(digest []byte, value []byte) error { + err := collection.db.ProcessByPrefix(prefix, func(_ []byte, value []byte) error { var l PackageRefList if err := l.Decode(append([]byte{}, value...)); err != nil { return err From 74e0d93a52de3d7f711f570d32b3ab98730df03c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Roth?= Date: Mon, 17 Jun 2024 13:50:49 +0200 Subject: [PATCH 5/7] fix unit tests --- deb/local.go | 3 +++ deb/remote.go | 3 +++ 2 files changed, 6 insertions(+) diff --git a/deb/local.go b/deb/local.go index d88904c28..4bb3843a6 100644 --- a/deb/local.go +++ b/deb/local.go @@ -48,6 +48,9 @@ func (repo *LocalRepo) String() string { // NumPackages return number of packages in local repo func (repo *LocalRepo) NumPackages() int { + if repo.packageRefs == nil { + return 0 + } return repo.packageRefs.Len() } diff --git a/deb/remote.go b/deb/remote.go index 15efb54f7..674336f6f 100644 --- a/deb/remote.go +++ b/deb/remote.go @@ -164,6 +164,9 @@ func (repo *RemoteRepo) IsFlat() bool { // NumPackages return number of packages retrieved from remote repo func (repo *RemoteRepo) NumPackages() int { + if repo.packageRefs == nil { + return 0 + } return repo.packageRefs.Len() } From 55598a7a9f52647d144f780bf331fe7586330b2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Roth?= Date: Mon, 17 Jun 2024 15:18:23 +0200 Subject: [PATCH 6/7] make compatible with go 1.19 --- deb/reflist.go | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/deb/reflist.go b/deb/reflist.go index 55588a998..df1b9b2e6 100644 --- a/deb/reflist.go +++ b/deb/reflist.go @@ -428,7 +428,10 @@ func bucketRefPrefix(ref []byte) []byte { ref = ref[len(libPrefix):] } - prefixLen := min(maxPrefixLen, len(ref)) + prefixLen := len(ref) + if maxPrefixLen < prefixLen { + prefixLen = maxPrefixLen + } prefix, _, _ := bytes.Cut(ref[:prefixLen], []byte{' '}) return prefix } @@ -715,9 +718,16 @@ func (set *RefListDigestSet) ForEach(handler func(digest []byte) error) error { return nil } +// workaround for: conversion of slices to arrays requires go1.20 or later +func newRefListArray(digest []byte) reflistDigestArray { + var array reflistDigestArray + copy(array[:], digest) + return array +} + // Add adds digest to set, doing nothing if the digest was already present func (set *RefListDigestSet) Add(digest []byte) { - set.items[reflistDigestArray(digest)] = struct{}{} + set.items[newRefListArray(digest)] = struct{}{} } // AddAllInRefList adds all the bucket digests in a SplitRefList to the set @@ -731,13 +741,13 @@ func (set *RefListDigestSet) AddAllInRefList(sl *SplitRefList) { // Has checks whether a digest is part of set func (set *RefListDigestSet) Has(digest []byte) bool { - _, ok := set.items[reflistDigestArray(digest)] + _, ok := set.items[newRefListArray(digest)] return ok } // Remove removes a digest from set func (set *RefListDigestSet) Remove(digest []byte) { - delete(set.items, reflistDigestArray(digest)) + delete(set.items, newRefListArray(digest)) } // RemoveAll removes all the digests in other from the current set @@ -776,10 +786,20 @@ func segmentPrefix(encodedDigest string) []byte { return []byte(fmt.Sprintf("F%s-", encodedDigest)) } +// workaround for go 1.19 instead of bytes.Clone +func cloneBytes(b []byte) []byte { + if b == nil { + return nil + } + cloned := make([]byte, len(b)) + copy(cloned, b) + return cloned +} + func segmentIndexKey(prefix []byte, idx int) []byte { // Assume most buckets won't have more than 0xFFFF = ~65k segments (which // would be an extremely large bucket!). - return append(bytes.Clone(prefix), []byte(fmt.Sprintf("%04x", idx))...) + return append(cloneBytes(prefix), []byte(fmt.Sprintf("%04x", idx))...) } // AllBucketDigests returns a set of all the bucket digests in the database @@ -861,7 +881,7 @@ func (collection *RefListCollection) loadBuckets(sl *SplitRefList) error { var bucket *PackageRefList if digest := sl.Buckets[idx]; len(digest) > 0 { - cacheKey := reflistDigestArray(digest) + cacheKey := newRefListArray(digest) bucket = collection.cache[cacheKey] if bucket == nil { bucket = NewPackageRefList() From 3d130562cad9f903cdd4a08cfb18537c3508497c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Roth?= Date: Thu, 20 Jun 2024 09:39:21 +0200 Subject: [PATCH 7/7] handle packageRefs == nil --- deb/snapshot.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/deb/snapshot.go b/deb/snapshot.go index 0aeac4a34..00995e7a1 100644 --- a/deb/snapshot.go +++ b/deb/snapshot.go @@ -114,6 +114,9 @@ func (s *Snapshot) String() string { // NumPackages returns number of packages in snapshot func (s *Snapshot) NumPackages() int { + if s.packageRefs == nil { + return 0 + } return s.packageRefs.Len() }