From c532d25d2499aba3679d58fa0ea5eb0f79ef2b82 Mon Sep 17 00:00:00 2001 From: Sushant Raikar Date: Thu, 6 Jun 2024 14:01:15 -0700 Subject: [PATCH 1/8] Add method to provision table space --- .../cluster/storage/BaseStorage.java | 35 +++++++++++++++++++ .../openhouse/cluster/storage/Storage.java | 28 +++++++++++++++ .../cluster/storage/hdfs/HdfsStorage.java | 19 ++++++++++ .../cluster/storage/local/LocalStorage.java | 19 ++++++++++ .../impl/InternalRepositoryUtils.java | 6 ++-- .../impl/OpenHouseInternalRepositoryImpl.java | 10 +++--- .../mock/storage/hdfs/HdfsStorageTest.java | 15 ++++++++ .../mock/storage/local/LocalStorageTest.java | 16 +++++++++ 8 files changed, 142 insertions(+), 6 deletions(-) diff --git a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/BaseStorage.java b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/BaseStorage.java index 2c992bfe..d02e9d2d 100644 --- a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/BaseStorage.java +++ b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/BaseStorage.java @@ -1,6 +1,8 @@ package com.linkedin.openhouse.cluster.storage; +import com.google.common.base.Preconditions; import com.linkedin.openhouse.cluster.storage.configs.StorageProperties; +import java.net.URI; import java.util.HashMap; import java.util.Map; import java.util.Optional; @@ -41,4 +43,37 @@ public Map getProperties() { .map(HashMap::new) .orElseGet(HashMap::new); } + + /** + * Allocates Table Space for the storage. + * + *

Default tableLocation looks like: {endpoint}/{rootPrefix}/{databaseId}/{tableId}-{tableUUID} + * + * @return the table location where the table data should be stored + */ + @Override + public String allocateTableSpace( + String databaseId, + String tableId, + String tableUUID, + String tableCreator, + boolean skipProvisioning) { + Preconditions.checkArgument(databaseId != null, "Database ID cannot be null"); + Preconditions.checkArgument(tableId != null, "Table ID cannot be null"); + Preconditions.checkArgument(tableUUID != null, "Table UUID cannot be null"); + Preconditions.checkState( + storageProperties.getTypes().containsKey(getType().getValue()), + "Storage properties doesn't contain type: " + getType().getValue()); + return URI.create( + getClient().getEndpoint() + + getClient().getRootPrefix() + + "/" + + databaseId + + "/" + + tableId + + "-" + + tableUUID) + .normalize() + .toString(); + } } diff --git a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/Storage.java b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/Storage.java index b0a8f079..14f0c799 100644 --- a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/Storage.java +++ b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/Storage.java @@ -48,4 +48,32 @@ public interface Storage { * @return a client to interact with the storage */ StorageClient getClient(); + + /** + * Allocates Table Space for the storage. + * + *

Allocating involves creating directory structure/ creating bucket, and setting appropriate + * permissions etc. Please note that this method should avoid creating TableFormat specific + * directories (ex: /data, /metadata for Iceberg or _delta_log for DeltaLake). Such provisioning + * should be done by the TableFormat implementation. + * + *

After allocation is done, this method should return the table location where the table data + * should be stored. Example: /rootPrefix/databaseId/tableId-UUID for HDFS storage + * /tmp/databaseId/tableId-UUID for Local storage s3://bucket/databaseId/tableId-UUID for S3 + * storage + * + * @param databaseId the database id of the table + * @param tableId the table id of the table + * @param tableUUID the UUID of the table + * @param tableCreator the creator of the table + * @param skipProvisioning Set to true if heavy-lifting allocation work needs to be skipped and + * only the table location needs to be returned + * @return the table location after provisioning is done + */ + String allocateTableSpace( + String databaseId, + String tableId, + String tableUUID, + String tableCreator, + boolean skipProvisioning); } diff --git a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/hdfs/HdfsStorage.java b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/hdfs/HdfsStorage.java index a6ad210c..66e0feff 100644 --- a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/hdfs/HdfsStorage.java +++ b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/hdfs/HdfsStorage.java @@ -3,6 +3,7 @@ import com.linkedin.openhouse.cluster.storage.BaseStorage; import com.linkedin.openhouse.cluster.storage.StorageClient; import com.linkedin.openhouse.cluster.storage.StorageType; +import java.nio.file.Paths; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Lazy; import org.springframework.stereotype.Component; @@ -36,4 +37,22 @@ public StorageType.Type getType() { public StorageClient getClient() { return hdfsStorageClient; } + + /** + * Allocates Table Space for the HDFS storage. + * + *

tableLocation looks like: /{rootPrefix}/{databaseId}/{tableId}-{tableUUID} We strip the + * endpoint as it's not needed for HDFS. + * + * @return the table location + */ + @Override + public String allocateTableSpace( + String databaseId, + String tableId, + String tableUUID, + String tableCreator, + boolean skipProvisioning) { + return Paths.get(getClient().getRootPrefix(), databaseId, tableId + "-" + tableUUID).toString(); + } } diff --git a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/local/LocalStorage.java b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/local/LocalStorage.java index f3ee5e8c..740023e3 100644 --- a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/local/LocalStorage.java +++ b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/local/LocalStorage.java @@ -4,6 +4,7 @@ import com.linkedin.openhouse.cluster.storage.StorageClient; import com.linkedin.openhouse.cluster.storage.StorageType; import com.linkedin.openhouse.cluster.storage.configs.StorageProperties; +import java.nio.file.Paths; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Lazy; import org.springframework.stereotype.Component; @@ -51,4 +52,22 @@ public StorageType.Type getType() { public StorageClient getClient() { return localStorageClient; } + + /** + * Allocates Table Space for the Local Storage. + * + *

tableLocation looks like: /{rootPrefix}/{databaseId}/{tableId}-{tableUUID} We strip the + * endpoint as it's not needed for Local Storage. + * + * @return the table location + */ + @Override + public String allocateTableSpace( + String databaseId, + String tableId, + String tableUUID, + String tableCreator, + boolean skipProvisioning) { + return Paths.get(getClient().getRootPrefix(), databaseId, tableId + "-" + tableUUID).toString(); + } } diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/InternalRepositoryUtils.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/InternalRepositoryUtils.java index cd6d7323..22d5eb38 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/InternalRepositoryUtils.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/InternalRepositoryUtils.java @@ -19,6 +19,7 @@ import java.util.HashSet; import java.util.Map; import java.util.Set; +import org.apache.commons.lang3.StringUtils; import org.apache.iceberg.Table; import org.apache.iceberg.UpdateProperties; @@ -142,8 +143,9 @@ static TableDto convertToTableDto( .tableUUID(megaProps.get(getCanonicalFieldName("tableUUID"))) .tableLocation( URI.create( - storage.getClient().getEndpoint() - + megaProps.get(getCanonicalFieldName("tableLocation"))) + StringUtils.prependIfMissing( + megaProps.get(getCanonicalFieldName("tableLocation")), + storage.getClient().getEndpoint())) .normalize() .toString()) .tableVersion(megaProps.get(getCanonicalFieldName("tableVersion"))) diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/OpenHouseInternalRepositoryImpl.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/OpenHouseInternalRepositoryImpl.java index 6f5fccaf..5301a77b 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/OpenHouseInternalRepositoryImpl.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/OpenHouseInternalRepositoryImpl.java @@ -111,12 +111,14 @@ public TableDto save(TableDto tableDto) { tableIdentifier, writeSchema, partitionSpec, - constructTablePath( - storageManager, + storageManager + .getDefaultStorage() + .allocateTableSpace( tableDto.getDatabaseId(), tableDto.getTableId(), - tableDto.getTableUUID()) - .toString(), + tableDto.getTableUUID(), + tableDto.getTableCreator(), + false), computePropsForTableCreation(tableDto)); meterRegistry.counter(MetricsConstant.REPO_TABLE_CREATED_CTR).increment(); log.info( diff --git a/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/storage/hdfs/HdfsStorageTest.java b/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/storage/hdfs/HdfsStorageTest.java index f4c508ac..96884a54 100644 --- a/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/storage/hdfs/HdfsStorageTest.java +++ b/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/storage/hdfs/HdfsStorageTest.java @@ -56,4 +56,19 @@ public void testHdfsStoragePropertiesReturned() { "/data/openhouse", "hdfs://localhost:9000", testMap))); assertEquals(testMap, hdfsStorage.getProperties()); } + + @Test + public void testAllocateTableSpace() { + String databaseId = "db1"; + String tableId = "table1"; + String tableUUID = "uuid1"; + String tableCreator = "creator1"; + boolean skipProvisioning = false; + when(hdfsStorageClient.getRootPrefix()).thenReturn("/data/openhouse"); + String expected = "/data/openhouse/db1/table1-uuid1"; + assertEquals( + expected, + hdfsStorage.allocateTableSpace( + databaseId, tableId, tableUUID, tableCreator, skipProvisioning)); + } } diff --git a/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/storage/local/LocalStorageTest.java b/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/storage/local/LocalStorageTest.java index f7a0d84c..5dda1771 100644 --- a/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/storage/local/LocalStorageTest.java +++ b/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/storage/local/LocalStorageTest.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.tables.mock.storage.local; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.when; @@ -83,4 +84,19 @@ public void testLocalStorageGetClient() { when(localStorageClient.getNativeClient()).thenReturn(localFileSystem); assertTrue(localStorage.getClient().getNativeClient().equals(localFileSystem)); } + + @Test + public void testAllocateTableSpace() { + String databaseId = "db1"; + String tableId = "table1"; + String tableUUID = "uuid1"; + String tableCreator = "creator1"; + boolean skipProvisioning = false; + when(localStorageClient.getRootPrefix()).thenReturn("/tmp"); + String expected = "/tmp/db1/table1-uuid1"; + assertEquals( + expected, + localStorage.allocateTableSpace( + databaseId, tableId, tableUUID, tableCreator, skipProvisioning)); + } } From 3e02b4ed186232a33b0e9ddc2ee22c17f4a1c183 Mon Sep 17 00:00:00 2001 From: Sushant Raikar Date: Thu, 6 Jun 2024 14:05:11 -0700 Subject: [PATCH 2/8] Add method to provision table space --- .../java/com/linkedin/openhouse/cluster/storage/Storage.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/Storage.java b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/Storage.java index 14f0c799..bad9bc07 100644 --- a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/Storage.java +++ b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/Storage.java @@ -58,8 +58,8 @@ public interface Storage { * should be done by the TableFormat implementation. * *

After allocation is done, this method should return the table location where the table data - * should be stored. Example: /rootPrefix/databaseId/tableId-UUID for HDFS storage - * /tmp/databaseId/tableId-UUID for Local storage s3://bucket/databaseId/tableId-UUID for S3 + * should be stored. Example: /rootPrefix/databaseId/tableId-UUID for HDFS storage; + * /tmp/databaseId/tableId-UUID for Local storage; s3://bucket/databaseId/tableId-UUID for S3 * storage * * @param databaseId the database id of the table From eeb9a8197d7da64b71f14e54d2c9a1ce14c24612 Mon Sep 17 00:00:00 2001 From: Sushant Raikar Date: Thu, 6 Jun 2024 16:21:55 -0700 Subject: [PATCH 3/8] table spaces location --- .../linkedin/openhouse/cluster/storage/BaseStorage.java | 8 +++++++- .../com/linkedin/openhouse/cluster/storage/Storage.java | 4 ++-- .../openhouse/cluster/storage/hdfs/HdfsStorage.java | 2 +- .../openhouse/cluster/storage/local/LocalStorage.java | 2 +- .../repository/impl/OpenHouseInternalRepositoryImpl.java | 2 +- .../tables/mock/storage/hdfs/HdfsStorageTest.java | 2 +- .../tables/mock/storage/local/LocalStorageTest.java | 2 +- 7 files changed, 14 insertions(+), 8 deletions(-) diff --git a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/BaseStorage.java b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/BaseStorage.java index d02e9d2d..c524d52a 100644 --- a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/BaseStorage.java +++ b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/BaseStorage.java @@ -49,10 +49,16 @@ public Map getProperties() { * *

Default tableLocation looks like: {endpoint}/{rootPrefix}/{databaseId}/{tableId}-{tableUUID} * + * @param databaseId the database id of the table + * @param tableId the table id of the table + * @param tableUUID the UUID of the table + * @param tableCreator the creator of the table + * @param skipProvisioning Set to true if heavy-lifting allocation work needs to be skipped and + * only the table location needs to be returned * @return the table location where the table data should be stored */ @Override - public String allocateTableSpace( + public String allocateTableLocation( String databaseId, String tableId, String tableUUID, diff --git a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/Storage.java b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/Storage.java index bad9bc07..9a706dc9 100644 --- a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/Storage.java +++ b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/Storage.java @@ -50,7 +50,7 @@ public interface Storage { StorageClient getClient(); /** - * Allocates Table Space for the storage. + * Allocates Table Storage and return location. * *

Allocating involves creating directory structure/ creating bucket, and setting appropriate * permissions etc. Please note that this method should avoid creating TableFormat specific @@ -70,7 +70,7 @@ public interface Storage { * only the table location needs to be returned * @return the table location after provisioning is done */ - String allocateTableSpace( + String allocateTableLocation( String databaseId, String tableId, String tableUUID, diff --git a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/hdfs/HdfsStorage.java b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/hdfs/HdfsStorage.java index 66e0feff..c9f6f5b7 100644 --- a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/hdfs/HdfsStorage.java +++ b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/hdfs/HdfsStorage.java @@ -47,7 +47,7 @@ public StorageClient getClient() { * @return the table location */ @Override - public String allocateTableSpace( + public String allocateTableLocation( String databaseId, String tableId, String tableUUID, diff --git a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/local/LocalStorage.java b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/local/LocalStorage.java index 740023e3..0ae5bdf2 100644 --- a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/local/LocalStorage.java +++ b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/local/LocalStorage.java @@ -62,7 +62,7 @@ public StorageClient getClient() { * @return the table location */ @Override - public String allocateTableSpace( + public String allocateTableLocation( String databaseId, String tableId, String tableUUID, diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/OpenHouseInternalRepositoryImpl.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/OpenHouseInternalRepositoryImpl.java index 5301a77b..8d4f740d 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/OpenHouseInternalRepositoryImpl.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/OpenHouseInternalRepositoryImpl.java @@ -113,7 +113,7 @@ public TableDto save(TableDto tableDto) { partitionSpec, storageManager .getDefaultStorage() - .allocateTableSpace( + .allocateTableLocation( tableDto.getDatabaseId(), tableDto.getTableId(), tableDto.getTableUUID(), diff --git a/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/storage/hdfs/HdfsStorageTest.java b/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/storage/hdfs/HdfsStorageTest.java index 96884a54..598e278f 100644 --- a/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/storage/hdfs/HdfsStorageTest.java +++ b/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/storage/hdfs/HdfsStorageTest.java @@ -68,7 +68,7 @@ public void testAllocateTableSpace() { String expected = "/data/openhouse/db1/table1-uuid1"; assertEquals( expected, - hdfsStorage.allocateTableSpace( + hdfsStorage.allocateTableLocation( databaseId, tableId, tableUUID, tableCreator, skipProvisioning)); } } diff --git a/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/storage/local/LocalStorageTest.java b/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/storage/local/LocalStorageTest.java index 5dda1771..c9cfeb78 100644 --- a/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/storage/local/LocalStorageTest.java +++ b/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/storage/local/LocalStorageTest.java @@ -96,7 +96,7 @@ public void testAllocateTableSpace() { String expected = "/tmp/db1/table1-uuid1"; assertEquals( expected, - localStorage.allocateTableSpace( + localStorage.allocateTableLocation( databaseId, tableId, tableUUID, tableCreator, skipProvisioning)); } } From 21d5fb4c43cbd9bd4b6e75a9e47db50f91e36234 Mon Sep 17 00:00:00 2001 From: Sushant Raikar Date: Thu, 6 Jun 2024 16:22:53 -0700 Subject: [PATCH 4/8] fix documentation --- .../com/linkedin/openhouse/cluster/storage/BaseStorage.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/BaseStorage.java b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/BaseStorage.java index c524d52a..4cebd71a 100644 --- a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/BaseStorage.java +++ b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/BaseStorage.java @@ -45,7 +45,7 @@ public Map getProperties() { } /** - * Allocates Table Space for the storage. + * Allocates Table Storage and return location. * *

Default tableLocation looks like: {endpoint}/{rootPrefix}/{databaseId}/{tableId}-{tableUUID} * From 1b8a5ace48f4d1fd8b9486f8603321ac5c86df7b Mon Sep 17 00:00:00 2001 From: Sushant Raikar Date: Fri, 7 Jun 2024 11:51:41 -0700 Subject: [PATCH 5/8] resolve comments --- .../openhouse/cluster/storage/Storage.java | 4 +- .../cluster/storage/hdfs/HdfsStorage.java | 3 +- .../cluster/storage/local/LocalStorage.java | 5 +- .../mock/storage/base/BaseStorageTest.java | 105 ++++++++++++++++++ 4 files changed, 111 insertions(+), 6 deletions(-) create mode 100644 services/tables/src/test/java/com/linkedin/openhouse/tables/mock/storage/base/BaseStorageTest.java diff --git a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/Storage.java b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/Storage.java index 9a706dc9..dd696d14 100644 --- a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/Storage.java +++ b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/Storage.java @@ -58,8 +58,8 @@ public interface Storage { * should be done by the TableFormat implementation. * *

After allocation is done, this method should return the table location where the table data - * should be stored. Example: /rootPrefix/databaseId/tableId-UUID for HDFS storage; - * /tmp/databaseId/tableId-UUID for Local storage; s3://bucket/databaseId/tableId-UUID for S3 + * should be stored. Example: hdfs:///rootPrefix/databaseId/tableId-UUID for HDFS storage; + * file:/tmp/databaseId/tableId-UUID for Local storage; s3://bucket/databaseId/tableId-UUID for S3 * storage * * @param databaseId the database id of the table diff --git a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/hdfs/HdfsStorage.java b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/hdfs/HdfsStorage.java index c9f6f5b7..d9b6fd8e 100644 --- a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/hdfs/HdfsStorage.java +++ b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/hdfs/HdfsStorage.java @@ -42,7 +42,8 @@ public StorageClient getClient() { * Allocates Table Space for the HDFS storage. * *

tableLocation looks like: /{rootPrefix}/{databaseId}/{tableId}-{tableUUID} We strip the - * endpoint as it's not needed for HDFS. + * endpoint to ensure backward-compatibility. This override should be removed after resolving * * @return the table location */ diff --git a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/local/LocalStorage.java b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/local/LocalStorage.java index 0ae5bdf2..2722079d 100644 --- a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/local/LocalStorage.java +++ b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/local/LocalStorage.java @@ -57,9 +57,8 @@ public StorageClient getClient() { * Allocates Table Space for the Local Storage. * *

tableLocation looks like: /{rootPrefix}/{databaseId}/{tableId}-{tableUUID} We strip the - * endpoint as it's not needed for Local Storage. - * - * @return the table location + * endpoint to ensure backward-compatibility. This override should be removed after resolving */ @Override public String allocateTableLocation( diff --git a/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/storage/base/BaseStorageTest.java b/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/storage/base/BaseStorageTest.java new file mode 100644 index 00000000..9c404f90 --- /dev/null +++ b/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/storage/base/BaseStorageTest.java @@ -0,0 +1,105 @@ +package com.linkedin.openhouse.tables.mock.storage.base; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.when; + +import com.google.common.collect.ImmutableMap; +import com.linkedin.openhouse.cluster.storage.BaseStorage; +import com.linkedin.openhouse.cluster.storage.StorageClient; +import com.linkedin.openhouse.cluster.storage.StorageType; +import com.linkedin.openhouse.cluster.storage.configs.StorageProperties; +import com.linkedin.openhouse.cluster.storage.hdfs.HdfsStorageClient; +import org.apache.hadoop.fs.FileSystem; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.boot.test.context.TestComponent; +import org.springframework.boot.test.mock.mockito.MockBean; + +@SpringBootTest +public class BaseStorageTest { + + @MockBean private StorageProperties storageProperties; + + @MockBean private HdfsStorageClient hdfsStorageClient; + + @TestComponent + public static class DummyBaseStorage extends BaseStorage { + + @Autowired private HdfsStorageClient hdfsStorageClient; + + @Override + public StorageType.Type getType() { + return StorageType.HDFS; // return a dummy type + } + + @Override + public StorageClient getClient() { + return hdfsStorageClient; // return a dummy client + } + } + + @Autowired private DummyBaseStorage baseStorage; + + private static final String databaseId = "db1"; + private static final String tableId = "table1"; + private static final String tableUUID = "uuid1"; + private static final String tableCreator = "creator1"; + private static final boolean skipProvisioning = false; + + @Test + public void testAllocateTableLocationPattern1() { + mockStorageProperties("hdfs://localhost:9000", "/data/openhouse"); + assertEquals( + "hdfs://localhost:9000/data/openhouse/db1/table1-uuid1", + baseStorage.allocateTableLocation( + databaseId, tableId, tableUUID, tableCreator, skipProvisioning)); + } + + @Test + public void testAllocateTableLocationPattern2() { + mockStorageProperties("hdfs://localhost:9000/", "/data/openhouse"); + assertEquals( + "hdfs://localhost:9000/data/openhouse/db1/table1-uuid1", + baseStorage.allocateTableLocation( + databaseId, tableId, tableUUID, tableCreator, skipProvisioning)); + } + + @Test + public void testAllocateTableLocationPattern3() { + mockStorageProperties("hdfs://localhost:9000/", "data/openhouse"); + assertEquals( + "hdfs://localhost:9000/data/openhouse/db1/table1-uuid1", + baseStorage.allocateTableLocation( + databaseId, tableId, tableUUID, tableCreator, skipProvisioning)); + } + + @Test + public void testAllocateTableLocationPattern4() { + mockStorageProperties("hdfs://localhost:9000/", "data"); + assertEquals( + "hdfs://localhost:9000/data/db1/table1-uuid1", + baseStorage.allocateTableLocation( + databaseId, tableId, tableUUID, tableCreator, skipProvisioning)); + } + + @Test + public void testAllocateTableLocationPattern5() { + mockStorageProperties("hdfs:///", "data/openhouse"); + assertEquals( + "hdfs:///data/openhouse/db1/table1-uuid1", + baseStorage.allocateTableLocation( + databaseId, tableId, tableUUID, tableCreator, skipProvisioning)); + } + + void mockStorageProperties(String endpoint, String rootPrefix) { + when(hdfsStorageClient.getEndpoint()).thenReturn(endpoint); + when(hdfsStorageClient.getRootPrefix()).thenReturn(rootPrefix); + when(storageProperties.getTypes()) + .thenReturn( + ImmutableMap.of( + StorageType.HDFS.getValue(), + new StorageProperties.StorageTypeProperties( + rootPrefix, endpoint, ImmutableMap.of()))); + } +} From 0b15baed1b2facb784b67229fec2f23d7ddbc09a Mon Sep 17 00:00:00 2001 From: Sushant Raikar Date: Fri, 7 Jun 2024 12:01:33 -0700 Subject: [PATCH 6/8] remove skip-provisioning --- .../linkedin/openhouse/cluster/storage/BaseStorage.java | 8 +------- .../com/linkedin/openhouse/cluster/storage/Storage.java | 8 +------- .../openhouse/cluster/storage/hdfs/HdfsStorage.java | 6 +----- .../openhouse/cluster/storage/local/LocalStorage.java | 6 +----- .../repository/impl/OpenHouseInternalRepositoryImpl.java | 3 +-- .../tables/mock/storage/hdfs/HdfsStorageTest.java | 4 +--- 6 files changed, 6 insertions(+), 29 deletions(-) diff --git a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/BaseStorage.java b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/BaseStorage.java index 4cebd71a..20443d18 100644 --- a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/BaseStorage.java +++ b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/BaseStorage.java @@ -53,17 +53,11 @@ public Map getProperties() { * @param tableId the table id of the table * @param tableUUID the UUID of the table * @param tableCreator the creator of the table - * @param skipProvisioning Set to true if heavy-lifting allocation work needs to be skipped and - * only the table location needs to be returned * @return the table location where the table data should be stored */ @Override public String allocateTableLocation( - String databaseId, - String tableId, - String tableUUID, - String tableCreator, - boolean skipProvisioning) { + String databaseId, String tableId, String tableUUID, String tableCreator) { Preconditions.checkArgument(databaseId != null, "Database ID cannot be null"); Preconditions.checkArgument(tableId != null, "Table ID cannot be null"); Preconditions.checkArgument(tableUUID != null, "Table UUID cannot be null"); diff --git a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/Storage.java b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/Storage.java index dd696d14..7f78b6d9 100644 --- a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/Storage.java +++ b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/Storage.java @@ -66,14 +66,8 @@ public interface Storage { * @param tableId the table id of the table * @param tableUUID the UUID of the table * @param tableCreator the creator of the table - * @param skipProvisioning Set to true if heavy-lifting allocation work needs to be skipped and - * only the table location needs to be returned * @return the table location after provisioning is done */ String allocateTableLocation( - String databaseId, - String tableId, - String tableUUID, - String tableCreator, - boolean skipProvisioning); + String databaseId, String tableId, String tableUUID, String tableCreator); } diff --git a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/hdfs/HdfsStorage.java b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/hdfs/HdfsStorage.java index d9b6fd8e..2a9ae378 100644 --- a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/hdfs/HdfsStorage.java +++ b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/hdfs/HdfsStorage.java @@ -49,11 +49,7 @@ public StorageClient getClient() { */ @Override public String allocateTableLocation( - String databaseId, - String tableId, - String tableUUID, - String tableCreator, - boolean skipProvisioning) { + String databaseId, String tableId, String tableUUID, String tableCreator) { return Paths.get(getClient().getRootPrefix(), databaseId, tableId + "-" + tableUUID).toString(); } } diff --git a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/local/LocalStorage.java b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/local/LocalStorage.java index 2722079d..76a390b2 100644 --- a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/local/LocalStorage.java +++ b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/local/LocalStorage.java @@ -62,11 +62,7 @@ public StorageClient getClient() { */ @Override public String allocateTableLocation( - String databaseId, - String tableId, - String tableUUID, - String tableCreator, - boolean skipProvisioning) { + String databaseId, String tableId, String tableUUID, String tableCreator) { return Paths.get(getClient().getRootPrefix(), databaseId, tableId + "-" + tableUUID).toString(); } } diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/OpenHouseInternalRepositoryImpl.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/OpenHouseInternalRepositoryImpl.java index 8d4f740d..0992bf05 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/OpenHouseInternalRepositoryImpl.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/OpenHouseInternalRepositoryImpl.java @@ -117,8 +117,7 @@ public TableDto save(TableDto tableDto) { tableDto.getDatabaseId(), tableDto.getTableId(), tableDto.getTableUUID(), - tableDto.getTableCreator(), - false), + tableDto.getTableCreator()), computePropsForTableCreation(tableDto)); meterRegistry.counter(MetricsConstant.REPO_TABLE_CREATED_CTR).increment(); log.info( diff --git a/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/storage/hdfs/HdfsStorageTest.java b/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/storage/hdfs/HdfsStorageTest.java index 598e278f..f3bfffa6 100644 --- a/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/storage/hdfs/HdfsStorageTest.java +++ b/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/storage/hdfs/HdfsStorageTest.java @@ -67,8 +67,6 @@ public void testAllocateTableSpace() { when(hdfsStorageClient.getRootPrefix()).thenReturn("/data/openhouse"); String expected = "/data/openhouse/db1/table1-uuid1"; assertEquals( - expected, - hdfsStorage.allocateTableLocation( - databaseId, tableId, tableUUID, tableCreator, skipProvisioning)); + expected, hdfsStorage.allocateTableLocation(databaseId, tableId, tableUUID, tableCreator)); } } From 656f09b4d16dd0cd2c06bb58ae37b64de5694fc1 Mon Sep 17 00:00:00 2001 From: Sushant Raikar Date: Fri, 7 Jun 2024 12:57:08 -0700 Subject: [PATCH 7/8] Add test --- .../mock/storage/base/BaseStorageTest.java | 30 ++++++++++--------- .../mock/storage/local/LocalStorageTest.java | 4 +-- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/storage/base/BaseStorageTest.java b/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/storage/base/BaseStorageTest.java index 9c404f90..82c66d1b 100644 --- a/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/storage/base/BaseStorageTest.java +++ b/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/storage/base/BaseStorageTest.java @@ -9,6 +9,8 @@ import com.linkedin.openhouse.cluster.storage.StorageType; import com.linkedin.openhouse.cluster.storage.configs.StorageProperties; import com.linkedin.openhouse.cluster.storage.hdfs.HdfsStorageClient; +import javax.annotation.PostConstruct; +import lombok.Setter; import org.apache.hadoop.fs.FileSystem; import org.junit.jupiter.api.Test; import org.springframework.beans.factory.annotation.Autowired; @@ -24,13 +26,14 @@ public class BaseStorageTest { @MockBean private HdfsStorageClient hdfsStorageClient; @TestComponent + @Setter public static class DummyBaseStorage extends BaseStorage { - @Autowired private HdfsStorageClient hdfsStorageClient; + HdfsStorageClient hdfsStorageClient; @Override public StorageType.Type getType() { - return StorageType.HDFS; // return a dummy type + return new StorageType.Type("TEST"); // return a dummy type } @Override @@ -41,19 +44,22 @@ public StorageClient getClient() { @Autowired private DummyBaseStorage baseStorage; + @PostConstruct + public void setupTest() { + baseStorage.setHdfsStorageClient(hdfsStorageClient); + } + private static final String databaseId = "db1"; private static final String tableId = "table1"; private static final String tableUUID = "uuid1"; private static final String tableCreator = "creator1"; - private static final boolean skipProvisioning = false; @Test public void testAllocateTableLocationPattern1() { mockStorageProperties("hdfs://localhost:9000", "/data/openhouse"); assertEquals( "hdfs://localhost:9000/data/openhouse/db1/table1-uuid1", - baseStorage.allocateTableLocation( - databaseId, tableId, tableUUID, tableCreator, skipProvisioning)); + baseStorage.allocateTableLocation(databaseId, tableId, tableUUID, tableCreator)); } @Test @@ -61,8 +67,7 @@ public void testAllocateTableLocationPattern2() { mockStorageProperties("hdfs://localhost:9000/", "/data/openhouse"); assertEquals( "hdfs://localhost:9000/data/openhouse/db1/table1-uuid1", - baseStorage.allocateTableLocation( - databaseId, tableId, tableUUID, tableCreator, skipProvisioning)); + baseStorage.allocateTableLocation(databaseId, tableId, tableUUID, tableCreator)); } @Test @@ -70,8 +75,7 @@ public void testAllocateTableLocationPattern3() { mockStorageProperties("hdfs://localhost:9000/", "data/openhouse"); assertEquals( "hdfs://localhost:9000/data/openhouse/db1/table1-uuid1", - baseStorage.allocateTableLocation( - databaseId, tableId, tableUUID, tableCreator, skipProvisioning)); + baseStorage.allocateTableLocation(databaseId, tableId, tableUUID, tableCreator)); } @Test @@ -79,8 +83,7 @@ public void testAllocateTableLocationPattern4() { mockStorageProperties("hdfs://localhost:9000/", "data"); assertEquals( "hdfs://localhost:9000/data/db1/table1-uuid1", - baseStorage.allocateTableLocation( - databaseId, tableId, tableUUID, tableCreator, skipProvisioning)); + baseStorage.allocateTableLocation(databaseId, tableId, tableUUID, tableCreator)); } @Test @@ -88,8 +91,7 @@ public void testAllocateTableLocationPattern5() { mockStorageProperties("hdfs:///", "data/openhouse"); assertEquals( "hdfs:///data/openhouse/db1/table1-uuid1", - baseStorage.allocateTableLocation( - databaseId, tableId, tableUUID, tableCreator, skipProvisioning)); + baseStorage.allocateTableLocation(databaseId, tableId, tableUUID, tableCreator)); } void mockStorageProperties(String endpoint, String rootPrefix) { @@ -98,7 +100,7 @@ void mockStorageProperties(String endpoint, String rootPrefix) { when(storageProperties.getTypes()) .thenReturn( ImmutableMap.of( - StorageType.HDFS.getValue(), + "TEST", new StorageProperties.StorageTypeProperties( rootPrefix, endpoint, ImmutableMap.of()))); } diff --git a/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/storage/local/LocalStorageTest.java b/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/storage/local/LocalStorageTest.java index c9cfeb78..d6f00890 100644 --- a/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/storage/local/LocalStorageTest.java +++ b/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/storage/local/LocalStorageTest.java @@ -95,8 +95,6 @@ public void testAllocateTableSpace() { when(localStorageClient.getRootPrefix()).thenReturn("/tmp"); String expected = "/tmp/db1/table1-uuid1"; assertEquals( - expected, - localStorage.allocateTableLocation( - databaseId, tableId, tableUUID, tableCreator, skipProvisioning)); + expected, localStorage.allocateTableLocation(databaseId, tableId, tableUUID, tableCreator)); } } From 913b6c027f04a33504ebb1b5b4a55be5fc0102c7 Mon Sep 17 00:00:00 2001 From: Sushant Raikar Date: Fri, 7 Jun 2024 13:11:02 -0700 Subject: [PATCH 8/8] added config --- .../tables/repository/impl/InternalRepositoryUtils.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/InternalRepositoryUtils.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/InternalRepositoryUtils.java index 22d5eb38..ec86deb1 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/InternalRepositoryUtils.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/InternalRepositoryUtils.java @@ -143,7 +143,8 @@ static TableDto convertToTableDto( .tableUUID(megaProps.get(getCanonicalFieldName("tableUUID"))) .tableLocation( URI.create( - StringUtils.prependIfMissing( + StringUtils.prependIfMissing( // remove after resolving + // https://github.com/linkedin/openhouse/issues/121 megaProps.get(getCanonicalFieldName("tableLocation")), storage.getClient().getEndpoint())) .normalize()