Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TASK-7118 - When migrating from XetaBase 1.x to 2.x users can't specify the Organization name #2523

Merged
merged 9 commits into from
Oct 29, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -1726,8 +1726,7 @@ public static DataStore defaultDataStore(CatalogManager catalogManager, Project
return defaultDataStore(catalogManager.getConfiguration().getDatabasePrefix(), project.getFqn());
}

public static DataStore defaultDataStore(String databasePrefix, String projectFqnStr)
throws CatalogException {
public static DataStore defaultDataStore(String databasePrefix, String projectFqnStr) {
CatalogFqn projectFqn = CatalogFqn.extractFqnFromProjectFqn(projectFqnStr);

String dbName = buildDatabaseName(databasePrefix, projectFqn.getOrganizationId(), projectFqn.getProjectId());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ private void runMigrationToV3() throws Exception {
setCatalogDatabaseCredentials(options, options.commonOptions);

OrganizationMigration organizationMigration = new OrganizationMigration(configuration, options.commonOptions.adminPassword,
options.user);
options.user, options.organizationId, Paths.get(appHome));
organizationMigration.execute();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ public class OrganizationMigrationCommandOptions extends AdminCliOptionsParser.C
@Parameter(names = {"--user"}, description = "User whose data is going to be migrated. If more than one user of type FULL contains"
+ " projects and studies, only the one provided will keep the data and will be fully migrated.")
public String user;

@Parameter(names = {"--organization-id"}, description = "Optional parameter to specify how the new organization will be named." +
" By default, if not provided, the organization id will match the user id that is currently owning the data.")
public String organizationId;
}

@Parameters(commandNames = {"summary"}, commandDescription = "Obtain migrations status summary")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,42 +1,52 @@
package org.opencb.opencga.app.migrations.v3.v3_0_0;

import com.mongodb.client.*;
import com.mongodb.client.model.Filters;
import com.mongodb.client.model.InsertOneModel;
import com.mongodb.client.model.Projections;
import com.mongodb.client.model.Updates;
import com.mongodb.client.model.*;
import com.mongodb.client.result.DeleteResult;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.bson.Document;
import org.bson.conversions.Bson;
import org.opencb.commons.datastore.mongodb.MongoDataStore;
import org.opencb.commons.utils.CryptoUtils;
import org.opencb.commons.utils.FileUtils;
import org.opencb.opencga.analysis.variant.manager.VariantStorageManager;
import org.opencb.opencga.catalog.auth.authentication.CatalogAuthenticationManager;
import org.opencb.opencga.catalog.db.api.StudyDBAdaptor;
import org.opencb.opencga.catalog.db.mongodb.MongoDBAdaptorFactory;
import org.opencb.opencga.catalog.db.mongodb.OrganizationMongoDBAdaptorFactory;
import org.opencb.opencga.catalog.exceptions.CatalogAuthorizationException;
import org.opencb.opencga.catalog.exceptions.CatalogDBException;
import org.opencb.opencga.catalog.exceptions.CatalogException;
import org.opencb.opencga.catalog.exceptions.CatalogIOException;
import org.opencb.opencga.catalog.io.CatalogIOManager;
import org.opencb.opencga.catalog.managers.CatalogManager;
import org.opencb.opencga.catalog.migration.Migration;
import org.opencb.opencga.catalog.migration.MigrationException;
import org.opencb.opencga.catalog.migration.MigrationTool;
import org.opencb.opencga.catalog.utils.FqnUtils;
import org.opencb.opencga.catalog.utils.ParamUtils;
import org.opencb.opencga.core.api.ParamConstants;
import org.opencb.opencga.core.common.TimeUtils;
import org.opencb.opencga.core.config.AuthenticationOrigin;
import org.opencb.opencga.core.config.Configuration;
import org.opencb.opencga.core.models.migration.MigrationRun;
import org.opencb.opencga.core.models.organizations.OrganizationCreateParams;
import org.opencb.opencga.core.models.project.DataStore;
import org.opencb.opencga.storage.core.StorageEngineFactory;
import org.opencb.opencga.storage.core.exceptions.StorageEngineException;
import org.opencb.opencga.storage.core.variant.VariantStorageEngine;
import org.reflections.Reflections;
import org.reflections.scanners.SubTypesScanner;
import org.reflections.scanners.TypeAnnotationsScanner;
import org.reflections.util.ClasspathHelper;
import org.reflections.util.ConfigurationBuilder;

import java.io.IOException;
import java.lang.reflect.Modifier;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.security.NoSuchAlgorithmException;
import java.util.*;
Expand All @@ -62,19 +72,25 @@ private enum MigrationStatus {
ERROR
}

public OrganizationMigration(Configuration configuration, String adminPassword, String userId) throws CatalogException {
public OrganizationMigration(Configuration configuration, String adminPassword, String userId, String organizationId, Path appHome)
throws CatalogException, IOException {
this.configuration = configuration;
this.adminPassword = adminPassword;
this.userId = userId;
this.organizationId = organizationId;
this.appHome = appHome;

this.status = checkAndInit();
}

private MigrationStatus checkAndInit() throws CatalogException {
private MigrationStatus checkAndInit() throws CatalogException, IOException {
this.oldDatabase = configuration.getDatabasePrefix() + "_catalog";
this.mongoDBAdaptorFactory = new MongoDBAdaptorFactory(configuration);
this.oldDatastore = mongoDBAdaptorFactory.getMongoManager().get(oldDatabase, mongoDBAdaptorFactory.getMongoDbConfiguration());

FileUtils.checkDirectory(appHome);
readStorageConfiguration();

MongoCollection<Document> userCol = oldDatastore.getDb().getCollection(OrganizationMongoDBAdaptorFactory.USER_COLLECTION);
FindIterable<Document> iterable = userCol.find(Filters.eq("id", ParamConstants.OPENCGA_USER_ID));
try (MongoCursor<Document> cursor = iterable.cursor()) {
Expand Down Expand Up @@ -193,7 +209,10 @@ private MigrationStatus checkAndInit() throws CatalogException {
this.userIdsToDiscardData = new HashSet<>();
}

this.organizationId = this.userId;
if (StringUtils.isEmpty(this.organizationId)) {
this.organizationId = this.userId;
}
ParamUtils.checkIdentifier(this.organizationId, "Organization id");
this.catalogManager = new CatalogManager(configuration);
return MigrationStatus.PENDING_MIGRATION;
}
Expand Down Expand Up @@ -360,6 +379,10 @@ protected void run() throws Exception {

CatalogIOManager ioManager = new CatalogIOManager(configuration);

Map<String, String> organizationOwnerMap = new HashMap<>();
organizationOwnerMap.put(ParamConstants.ADMIN_ORGANIZATION, ParamConstants.OPENCGA_USER_ID);
organizationOwnerMap.put(this.organizationId, this.userId);

// Loop over all organizations to perform additional data model changes
for (String organizationId : mongoDBAdaptorFactory.getOrganizationIds()) {
ioManager.createOrganization(organizationId);
Expand Down Expand Up @@ -400,13 +423,14 @@ protected void run() throws Exception {
}

// Add owner as admin of every study and remove _ownerId field
String ownerId = organizationOwnerMap.get(organizationId);
for (String collection : Arrays.asList(OrganizationMongoDBAdaptorFactory.STUDY_COLLECTION, OrganizationMongoDBAdaptorFactory.DELETED_STUDY_COLLECTION)) {
MongoCollection<Document> mongoCollection = database.getCollection(collection);
mongoCollection.updateMany(
Filters.eq(StudyDBAdaptor.QueryParams.GROUP_ID.key(), ParamConstants.ADMINS_GROUP),
Updates.combine(
Updates.unset("_ownerId"),
Updates.push("groups.$.userIds", organizationId)
Updates.push("groups.$.userIds", ownerId)
));
}

Expand Down Expand Up @@ -434,16 +458,122 @@ protected void run() throws Exception {
// Set organization counter, owner and authOrigins
orgCol.updateOne(Filters.eq("id", organizationId), Updates.combine(
Updates.set("_idCounter", counter),
Updates.set("owner", organizationId),
Updates.set("owner", ownerId),
Updates.set("configuration.authenticationOrigins", authOrigins)
));
}

// If the user didn't want to use the userId as the new organization id, we then need to change all the fqn's
if (!this.organizationId.equals(this.userId)) {
logger.info("New organization id '{}' is different from original userId '{}'. Changing FQN's from projects and studies"
, this.organizationId, this.userId);
changeFqns();
}

// Skip current migration for both organizations
catalogManager.getMigrationManager().skipPendingMigrations(ParamConstants.ADMIN_ORGANIZATION, opencgaToken);
catalogManager.getMigrationManager().skipPendingMigrations(organizationId, opencgaToken);
}

private void changeFqns() throws CatalogDBException, MigrationException {
this.dbAdaptorFactory = this.mongoDBAdaptorFactory;
String date = TimeUtils.getTime();

StorageEngineFactory storageEngineFactory = StorageEngineFactory.get(readStorageConfiguration());

// Change project fqn's
for (String projectCol : Arrays.asList(OrganizationMongoDBAdaptorFactory.PROJECT_COLLECTION,
j-coll marked this conversation as resolved.
Show resolved Hide resolved
OrganizationMongoDBAdaptorFactory.DELETED_PROJECT_COLLECTION)) {
migrateCollection(projectCol, new Document(), Projections.include("_id", "id", "fqn", "internal.datastores.variant"), (document, bulk) -> {
String oldFqn = document.getString("fqn");
String projectId = document.getString("id");
String newFqn = FqnUtils.buildFqn(this.organizationId, projectId);
logger.info("Changing project fqn from '{}' to '{}'", oldFqn, newFqn);

Document set = new Document()
.append("fqn", newFqn)
.append("attributes.OPENCGA.3_0_0", new Document()
.append("date", date)
.append("oldFqn", oldFqn)
);

Document internal = document.get("internal", Document.class);
if (internal != null) {
Document datastores = internal.get("datastores", Document.class);
if (datastores != null) {
Document variant = datastores.get("variant", Document.class);
if (variant == null) {
DataStore dataStore = VariantStorageManager.defaultDataStore(configuration.getDatabasePrefix(), oldFqn);
logger.info("Undefined variant \"internal.datastores.variant\" at project '{}'.", oldFqn);

// Update only if the project exists in the variant storage
try (VariantStorageEngine variantStorageEngine = storageEngineFactory
.getVariantStorageEngine(dataStore.getStorageEngine(), dataStore.getDbName())) {
if (variantStorageEngine.getMetadataManager().exists()) {
logger.info("Project exists in the variant storage. Setting variant data store: {}", dataStore);
set.append("internal.datastores.variant", new Document()
.append("storageEngine", dataStore.getStorageEngine())
.append("dbName", dataStore.getDbName())
.append("options", new Document()));
} else {
logger.info("Project does not exist in the variant storage. Skipping");
}
} catch (StorageEngineException | IOException e) {
throw new RuntimeException(e);
}

}
}
}

bulk.add(new UpdateOneModel<>(
Filters.eq("_id", document.get("_id")),
new Document("$set", set))
);
logger.info("-------");
});
}

MongoDatabase database = mongoDBAdaptorFactory.getMongoDataStore(organizationId).getDb();
MongoCollection<Document> jobCollection = database.getCollection(OrganizationMongoDBAdaptorFactory.JOB_COLLECTION);
MongoCollection<Document> jobDeletedCollection = database.getCollection(OrganizationMongoDBAdaptorFactory.DELETED_JOB_COLLECTION);

// Change study fqn's
for (String studyCol : Arrays.asList(OrganizationMongoDBAdaptorFactory.STUDY_COLLECTION,
OrganizationMongoDBAdaptorFactory.DELETED_STUDY_COLLECTION)) {
migrateCollection(studyCol, new Document(), Projections.include("_id", "uid", "fqn"), (document, bulk) -> {
long studyUid = document.get("uid", Number.class).longValue();

String oldStudyFqn = document.getString("fqn");
FqnUtils.FQN oldFqnInstance = FqnUtils.parse(oldStudyFqn);
String newFqn = FqnUtils.buildFqn(this.organizationId, oldFqnInstance.getProject(), oldFqnInstance.getStudy());
logger.info("Changing study fqn from '{}' to '{}'", oldStudyFqn, newFqn);
bulk.add(new UpdateOneModel<>(
Filters.eq("_id", document.get("_id")),
new Document("$set", new Document()
.append("fqn", newFqn)
.append("attributes.OPENCGA.3_0_0", new Document()
.append("date", date)
.append("oldFqn", oldStudyFqn)
)
))
);

// Change fqn in all jobs that were pointing to this study
Bson jobQuery = Filters.eq("studyUid", studyUid);
Bson update = new Document("$set", new Document()
.append("study.id", newFqn)
.append("attributes.OPENCGA.3_0_0", new Document()
.append("date", date)
.append("oldStudyFqn", oldStudyFqn)
)
);
jobCollection.updateMany(jobQuery, update);
jobDeletedCollection.updateMany(jobQuery, update);
});
}
}

Set<Class<? extends MigrationTool>> getAvailableMigrations() {
Reflections reflections = new Reflections(new ConfigurationBuilder()
.setScanners(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,17 @@ protected final void migrateCollection(MongoCollection<Document> inputCollection
.cursor()) {
while (it.hasNext()) {
Document document = it.next();
migrateFunc.accept(document, list);
try {
migrateFunc.accept(document, list);
} catch (Exception e) {
try {
logger.error("Error migrating document: {}", document.toJson());
} catch (Exception e1) {
e.addSuppressed(e1);
logger.error("Error migrating document: {}", e.getMessage());
}
throw e;
}

if (list.size() >= batchSize) {
count += list.size();
Expand Down