From 8eaddbc0380f0b33f099250bd195ffea77b9ed35 Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Wed, 31 Jul 2024 19:52:41 +0200 Subject: [PATCH 01/19] feat: track expected output columns in query builder Signed-off-by: Robin Appelman --- .../DB/QueryBuilder/ExtendedQueryBuilder.php | 4 +++ lib/private/DB/QueryBuilder/QueryBuilder.php | 29 +++++++++++++++++++ lib/public/DB/QueryBuilder/IQueryBuilder.php | 8 +++++ 3 files changed, 41 insertions(+) diff --git a/lib/private/DB/QueryBuilder/ExtendedQueryBuilder.php b/lib/private/DB/QueryBuilder/ExtendedQueryBuilder.php index bde6523567f61..f96ed76f6bda8 100644 --- a/lib/private/DB/QueryBuilder/ExtendedQueryBuilder.php +++ b/lib/private/DB/QueryBuilder/ExtendedQueryBuilder.php @@ -288,4 +288,8 @@ public function executeQuery(?IDBConnection $connection = null): IResult { public function executeStatement(?IDBConnection $connection = null): int { return $this->builder->executeStatement($connection); } + + public function getOutputColumns(): array { + return $this->builder->getOutputColumns(); + } } diff --git a/lib/private/DB/QueryBuilder/QueryBuilder.php b/lib/private/DB/QueryBuilder/QueryBuilder.php index 4c4786f02b656..c539d672c9f28 100644 --- a/lib/private/DB/QueryBuilder/QueryBuilder.php +++ b/lib/private/DB/QueryBuilder/QueryBuilder.php @@ -49,6 +49,7 @@ class QueryBuilder implements IQueryBuilder { /** @var string */ protected $lastInsertedTable; + private array $selectedColumns = []; /** * Initializes a new QueryBuilder. @@ -470,6 +471,7 @@ public function select(...$selects) { if (count($selects) === 1 && is_array($selects[0])) { $selects = $selects[0]; } + $this->addOutputColumns($selects); $this->queryBuilder->select( $this->helper->quoteColumnNames($selects) @@ -497,6 +499,7 @@ public function selectAlias($select, $alias) { $this->queryBuilder->addSelect( $this->helper->quoteColumnName($select) . ' AS ' . $this->helper->quoteColumnName($alias) ); + $this->addOutputColumns([$alias]); return $this; } @@ -518,6 +521,7 @@ public function selectDistinct($select) { if (!is_array($select)) { $select = [$select]; } + $this->addOutputColumns($select); $quotedSelect = $this->helper->quoteColumnNames($select); @@ -547,6 +551,7 @@ public function addSelect(...$selects) { if (count($selects) === 1 && is_array($selects[0])) { $selects = $selects[0]; } + $this->addOutputColumns($selects); $this->queryBuilder->addSelect( $this->helper->quoteColumnNames($selects) @@ -555,6 +560,30 @@ public function addSelect(...$selects) { return $this; } + private function addOutputColumns(array $columns) { + foreach ($columns as $column) { + if (is_array($column)) { + $this->addOutputColumns($column); + } elseif (is_string($column) && !str_contains($column, '*')) { + if (str_contains($column, '.')) { + [, $column] = explode('.', $column); + } + $this->selectedColumns[] = $column; + } + } + } + + public function getOutputColumns(): array { + return array_unique(array_map(function (string $column) { + if (str_contains($column, '.')) { + [, $column] = explode('.', $column); + return $column; + } else { + return $column; + } + }, $this->selectedColumns)); + } + /** * Turns the query being built into a bulk delete query that ranges over * a certain table. diff --git a/lib/public/DB/QueryBuilder/IQueryBuilder.php b/lib/public/DB/QueryBuilder/IQueryBuilder.php index 11f9737ba2f54..129787fa9c483 100644 --- a/lib/public/DB/QueryBuilder/IQueryBuilder.php +++ b/lib/public/DB/QueryBuilder/IQueryBuilder.php @@ -1018,4 +1018,12 @@ public function getTableName($table); * @since 9.0.0 */ public function getColumnName($column, $tableAlias = ''); + + /** + * Get a list of column names that are expected in the query output + * + * @return array + * @since 30.0.0 + */ + public function getOutputColumns(): array; } From b1744e70a5396d184125a3f0d743260047f971de Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Thu, 8 Aug 2024 17:24:12 +0200 Subject: [PATCH 02/19] fix: don't make ICacheFactory depend on database Signed-off-by: Robin Appelman --- lib/private/Server.php | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/private/Server.php b/lib/private/Server.php index a0072b43ee20a..cf0d6a72bce6d 100644 --- a/lib/private/Server.php +++ b/lib/private/Server.php @@ -643,11 +643,11 @@ public function __construct($webRoot, \OC\Config $config) { ArrayCache::class, ArrayCache::class ); - /** @var \OCP\IConfig $config */ - $config = $c->get(\OCP\IConfig::class); + /** @var SystemConfig $config */ + $config = $c->get(SystemConfig::class); - if ($config->getSystemValueBool('installed', false) && !(defined('PHPUNIT_RUN') && PHPUNIT_RUN)) { - if (!$config->getSystemValueBool('log_query')) { + if ($config->getValue('installed', false) && !(defined('PHPUNIT_RUN') && PHPUNIT_RUN)) { + if (!$config->getValue('log_query')) { try { $v = \OC_App::getAppVersions(); } catch (\Doctrine\DBAL\Exception $e) { @@ -671,10 +671,10 @@ public function __construct($webRoot, \OC\Config $config) { return new \OC\Memcache\Factory($prefix, $c->get(LoggerInterface::class), $profiler, - $config->getSystemValue('memcache.local', null), - $config->getSystemValue('memcache.distributed', null), - $config->getSystemValue('memcache.locking', null), - $config->getSystemValueString('redis_log_file') + $config->getValue('memcache.local', null), + $config->getValue('memcache.distributed', null), + $config->getValue('memcache.locking', null), + $config->getValue('redis_log_file') ); } return $arrayCacheFactory; From 8f57d46a0bbe452bb153890a08d686b39daa5614 Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Tue, 20 Aug 2024 16:46:49 +0200 Subject: [PATCH 03/19] fix: delay calculating global cache prefix untill a cache is created Signed-off-by: Robin Appelman --- lib/private/Memcache/Factory.php | 45 +++++++++++++++++++++----- lib/private/Server.php | 51 +++++++++++++++++------------- tests/lib/Memcache/FactoryTest.php | 6 ++-- 3 files changed, 69 insertions(+), 33 deletions(-) diff --git a/lib/private/Memcache/Factory.php b/lib/private/Memcache/Factory.php index c0f4f78720022..931c871d0f140 100644 --- a/lib/private/Memcache/Factory.php +++ b/lib/private/Memcache/Factory.php @@ -16,7 +16,7 @@ class Factory implements ICacheFactory { public const NULL_CACHE = NullCache::class; - private string $globalPrefix; + private ?string $globalPrefix = null; private LoggerInterface $logger; @@ -40,17 +40,23 @@ class Factory implements ICacheFactory { private IProfiler $profiler; /** - * @param string $globalPrefix + * @param callable $globalPrefixClosure * @param LoggerInterface $logger * @param ?class-string $localCacheClass * @param ?class-string $distributedCacheClass * @param ?class-string $lockingCacheClass * @param string $logFile */ - public function __construct(string $globalPrefix, LoggerInterface $logger, IProfiler $profiler, - ?string $localCacheClass = null, ?string $distributedCacheClass = null, ?string $lockingCacheClass = null, string $logFile = '') { + public function __construct( + private $globalPrefixClosure, + LoggerInterface $logger, + IProfiler $profiler, + ?string $localCacheClass = null, + ?string $distributedCacheClass = null, + ?string $lockingCacheClass = null, + string $logFile = '' + ) { $this->logFile = $logFile; - $this->globalPrefix = $globalPrefix; if (!$localCacheClass) { $localCacheClass = self::NULL_CACHE; @@ -59,6 +65,7 @@ public function __construct(string $globalPrefix, LoggerInterface $logger, IProf if (!$distributedCacheClass) { $distributedCacheClass = $localCacheClass; } + $distributedCacheClass = ltrim($distributedCacheClass, '\\'); $missingCacheMessage = 'Memcache {class} not available for {use} cache'; @@ -85,6 +92,13 @@ public function __construct(string $globalPrefix, LoggerInterface $logger, IProf $this->profiler = $profiler; } + private function getGlobalPrefix(): ?string { + if (is_null($this->globalPrefix)) { + $this->globalPrefix = ($this->globalPrefixClosure)(); + } + return $this->globalPrefix; + } + /** * create a cache instance for storing locks * @@ -92,8 +106,13 @@ public function __construct(string $globalPrefix, LoggerInterface $logger, IProf * @return IMemcache */ public function createLocking(string $prefix = ''): IMemcache { + $globalPrefix = $this->getGlobalPrefix(); + if (is_null($globalPrefix)) { + return new ArrayCache($prefix); + } + assert($this->lockingCacheClass !== null); - $cache = new $this->lockingCacheClass($this->globalPrefix . '/' . $prefix); + $cache = new $this->lockingCacheClass($globalPrefix . '/' . $prefix); if ($this->lockingCacheClass === Redis::class && $this->profiler->isEnabled()) { // We only support the profiler with Redis $cache = new ProfilerWrapperCache($cache, 'Locking'); @@ -114,8 +133,13 @@ public function createLocking(string $prefix = ''): IMemcache { * @return ICache */ public function createDistributed(string $prefix = ''): ICache { + $globalPrefix = $this->getGlobalPrefix(); + if (is_null($globalPrefix)) { + return new ArrayCache($prefix); + } + assert($this->distributedCacheClass !== null); - $cache = new $this->distributedCacheClass($this->globalPrefix . '/' . $prefix); + $cache = new $this->distributedCacheClass($globalPrefix . '/' . $prefix); if ($this->distributedCacheClass === Redis::class && $this->profiler->isEnabled()) { // We only support the profiler with Redis $cache = new ProfilerWrapperCache($cache, 'Distributed'); @@ -136,8 +160,13 @@ public function createDistributed(string $prefix = ''): ICache { * @return ICache */ public function createLocal(string $prefix = ''): ICache { + $globalPrefix = $this->getGlobalPrefix(); + if (is_null($globalPrefix)) { + return new ArrayCache($prefix); + } + assert($this->localCacheClass !== null); - $cache = new $this->localCacheClass($this->globalPrefix . '/' . $prefix); + $cache = new $this->localCacheClass($globalPrefix . '/' . $prefix); if ($this->localCacheClass === Redis::class && $this->profiler->isEnabled()) { // We only support the profiler with Redis $cache = new ProfilerWrapperCache($cache, 'Local'); diff --git a/lib/private/Server.php b/lib/private/Server.php index cf0d6a72bce6d..35a301a9d1c8f 100644 --- a/lib/private/Server.php +++ b/lib/private/Server.php @@ -637,7 +637,7 @@ public function __construct($webRoot, \OC\Config $config) { $this->registerService(Factory::class, function (Server $c) { $profiler = $c->get(IProfiler::class); - $arrayCacheFactory = new \OC\Memcache\Factory('', $c->get(LoggerInterface::class), + $arrayCacheFactory = new \OC\Memcache\Factory(fn () => '', $c->get(LoggerInterface::class), $profiler, ArrayCache::class, ArrayCache::class, @@ -647,33 +647,40 @@ public function __construct($webRoot, \OC\Config $config) { $config = $c->get(SystemConfig::class); if ($config->getValue('installed', false) && !(defined('PHPUNIT_RUN') && PHPUNIT_RUN)) { - if (!$config->getValue('log_query')) { - try { - $v = \OC_App::getAppVersions(); - } catch (\Doctrine\DBAL\Exception $e) { - // Database service probably unavailable - // Probably related to https://github.com/nextcloud/server/issues/37424 - return $arrayCacheFactory; + $logQuery = $config->getValue('log_query'); + $prefixClosure = function () use ($logQuery) { + if (!$logQuery) { + try { + $v = \OC_App::getAppVersions(); + } catch (\Doctrine\DBAL\Exception $e) { + // Database service probably unavailable + // Probably related to https://github.com/nextcloud/server/issues/37424 + return null; + } + } else { + // If the log_query is enabled, we can not get the app versions + // as that does a query, which will be logged and the logging + // depends on redis and here we are back again in the same function. + $v = [ + 'log_query' => 'enabled', + ]; } - } else { - // If the log_query is enabled, we can not get the app versions - // as that does a query, which will be logged and the logging - // depends on redis and here we are back again in the same function. - $v = [ - 'log_query' => 'enabled', - ]; - } - $v['core'] = implode(',', \OC_Util::getVersion()); - $version = implode(',', $v); - $instanceId = \OC_Util::getInstanceId(); - $path = \OC::$SERVERROOT; - $prefix = md5($instanceId . '-' . $version . '-' . $path); - return new \OC\Memcache\Factory($prefix, + $v['core'] = implode(',', \OC_Util::getVersion()); + $version = implode(',', $v); + $instanceId = \OC_Util::getInstanceId(); + $path = \OC::$SERVERROOT; + return md5($instanceId . '-' . $version . '-' . $path); + }; + return new \OC\Memcache\Factory($prefixClosure, $c->get(LoggerInterface::class), $profiler, + /** @psalm-taint-escape callable */ $config->getValue('memcache.local', null), + /** @psalm-taint-escape callable */ $config->getValue('memcache.distributed', null), + /** @psalm-taint-escape callable */ $config->getValue('memcache.locking', null), + /** @psalm-taint-escape callable */ $config->getValue('redis_log_file') ); } diff --git a/tests/lib/Memcache/FactoryTest.php b/tests/lib/Memcache/FactoryTest.php index 5a1509eb3d92d..b973b5065ea62 100644 --- a/tests/lib/Memcache/FactoryTest.php +++ b/tests/lib/Memcache/FactoryTest.php @@ -110,7 +110,7 @@ public function testCacheAvailability($localCache, $distributedCache, $lockingCa $expectedLocalCache, $expectedDistributedCache, $expectedLockingCache) { $logger = $this->getMockBuilder(LoggerInterface::class)->getMock(); $profiler = $this->getMockBuilder(IProfiler::class)->getMock(); - $factory = new \OC\Memcache\Factory('abc', $logger, $profiler, $localCache, $distributedCache, $lockingCache); + $factory = new \OC\Memcache\Factory(fn () => 'abc', $logger, $profiler, $localCache, $distributedCache, $lockingCache); $this->assertTrue(is_a($factory->createLocal(), $expectedLocalCache)); $this->assertTrue(is_a($factory->createDistributed(), $expectedDistributedCache)); $this->assertTrue(is_a($factory->createLocking(), $expectedLockingCache)); @@ -124,13 +124,13 @@ public function testCacheNotAvailableException($localCache, $distributedCache) { $logger = $this->getMockBuilder(LoggerInterface::class)->getMock(); $profiler = $this->getMockBuilder(IProfiler::class)->getMock(); - new \OC\Memcache\Factory('abc', $logger, $profiler, $localCache, $distributedCache); + new \OC\Memcache\Factory(fn () => 'abc', $logger, $profiler, $localCache, $distributedCache); } public function testCreateInMemory(): void { $logger = $this->getMockBuilder(LoggerInterface::class)->getMock(); $profiler = $this->getMockBuilder(IProfiler::class)->getMock(); - $factory = new \OC\Memcache\Factory('abc', $logger, $profiler, null, null, null); + $factory = new \OC\Memcache\Factory(fn () => 'abc', $logger, $profiler, null, null, null); $cache = $factory->createInMemory(); $cache->set('test', 48); From 4ec53e723ee77007f0063effccfa7a3f0facddd6 Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Thu, 13 Jun 2024 18:34:35 +0200 Subject: [PATCH 04/19] feat: add option to automatically partition queries by specific tables Signed-off-by: Robin Appelman --- lib/composer/composer/autoload_classmap.php | 7 + lib/composer/composer/autoload_static.php | 7 + lib/private/DB/ArrayResult.php | 74 ++++ lib/private/DB/Connection.php | 19 +- .../InvalidPartitionedQueryException.php | 79 ++++ .../Partitioned/JoinCondition.php | 173 ++++++++ .../Partitioned/PartitionQuery.php | 75 ++++ .../Partitioned/PartitionSplit.php | 74 ++++ .../Partitioned/PartitionedQueryBuilder.php | 415 ++++++++++++++++++ .../Partitioned/PartitionedResult.php | 61 +++ lib/private/DB/QueryBuilder/QueryBuilder.php | 2 +- lib/private/Share20/DefaultShareProvider.php | 1 + .../Partitioned/JoinConditionTest.php | 78 ++++ .../PartitionedQueryBuilderTest.php | 212 +++++++++ 14 files changed, 1275 insertions(+), 2 deletions(-) create mode 100644 lib/private/DB/ArrayResult.php create mode 100644 lib/private/DB/QueryBuilder/Partitioned/InvalidPartitionedQueryException.php create mode 100644 lib/private/DB/QueryBuilder/Partitioned/JoinCondition.php create mode 100644 lib/private/DB/QueryBuilder/Partitioned/PartitionQuery.php create mode 100644 lib/private/DB/QueryBuilder/Partitioned/PartitionSplit.php create mode 100644 lib/private/DB/QueryBuilder/Partitioned/PartitionedQueryBuilder.php create mode 100644 lib/private/DB/QueryBuilder/Partitioned/PartitionedResult.php create mode 100644 tests/lib/DB/QueryBuilder/Partitioned/JoinConditionTest.php create mode 100644 tests/lib/DB/QueryBuilder/Partitioned/PartitionedQueryBuilderTest.php diff --git a/lib/composer/composer/autoload_classmap.php b/lib/composer/composer/autoload_classmap.php index fca8883ae6cf2..dfb0449687179 100644 --- a/lib/composer/composer/autoload_classmap.php +++ b/lib/composer/composer/autoload_classmap.php @@ -1375,6 +1375,7 @@ 'OC\\DB\\AdapterOCI8' => $baseDir . '/lib/private/DB/AdapterOCI8.php', 'OC\\DB\\AdapterPgSql' => $baseDir . '/lib/private/DB/AdapterPgSql.php', 'OC\\DB\\AdapterSqlite' => $baseDir . '/lib/private/DB/AdapterSqlite.php', + 'OC\\DB\\ArrayResult' => $baseDir . '/lib/private/DB/ArrayResult.php', 'OC\\DB\\BacktraceDebugStack' => $baseDir . '/lib/private/DB/BacktraceDebugStack.php', 'OC\\DB\\Connection' => $baseDir . '/lib/private/DB/Connection.php', 'OC\\DB\\ConnectionAdapter' => $baseDir . '/lib/private/DB/ConnectionAdapter.php', @@ -1408,6 +1409,12 @@ 'OC\\DB\\QueryBuilder\\FunctionBuilder\\SqliteFunctionBuilder' => $baseDir . '/lib/private/DB/QueryBuilder/FunctionBuilder/SqliteFunctionBuilder.php', 'OC\\DB\\QueryBuilder\\Literal' => $baseDir . '/lib/private/DB/QueryBuilder/Literal.php', 'OC\\DB\\QueryBuilder\\Parameter' => $baseDir . '/lib/private/DB/QueryBuilder/Parameter.php', + 'OC\\DB\\QueryBuilder\\Partitioned\\InvalidPartitionedQueryException' => $baseDir . '/lib/private/DB/QueryBuilder/Partitioned/InvalidPartitionedQueryException.php', + 'OC\\DB\\QueryBuilder\\Partitioned\\JoinCondition' => $baseDir . '/lib/private/DB/QueryBuilder/Partitioned/JoinCondition.php', + 'OC\\DB\\QueryBuilder\\Partitioned\\PartitionQuery' => $baseDir . '/lib/private/DB/QueryBuilder/Partitioned/PartitionQuery.php', + 'OC\\DB\\QueryBuilder\\Partitioned\\PartitionSplit' => $baseDir . '/lib/private/DB/QueryBuilder/Partitioned/PartitionSplit.php', + 'OC\\DB\\QueryBuilder\\Partitioned\\PartitionedQueryBuilder' => $baseDir . '/lib/private/DB/QueryBuilder/Partitioned/PartitionedQueryBuilder.php', + 'OC\\DB\\QueryBuilder\\Partitioned\\PartitionedResult' => $baseDir . '/lib/private/DB/QueryBuilder/Partitioned/PartitionedResult.php', 'OC\\DB\\QueryBuilder\\QueryBuilder' => $baseDir . '/lib/private/DB/QueryBuilder/QueryBuilder.php', 'OC\\DB\\QueryBuilder\\QueryFunction' => $baseDir . '/lib/private/DB/QueryBuilder/QueryFunction.php', 'OC\\DB\\QueryBuilder\\QuoteHelper' => $baseDir . '/lib/private/DB/QueryBuilder/QuoteHelper.php', diff --git a/lib/composer/composer/autoload_static.php b/lib/composer/composer/autoload_static.php index f780ee8ac31f0..84e1c0a1188a6 100644 --- a/lib/composer/composer/autoload_static.php +++ b/lib/composer/composer/autoload_static.php @@ -1408,6 +1408,7 @@ class ComposerStaticInit749170dad3f5e7f9ca158f5a9f04f6a2 'OC\\DB\\AdapterOCI8' => __DIR__ . '/../../..' . '/lib/private/DB/AdapterOCI8.php', 'OC\\DB\\AdapterPgSql' => __DIR__ . '/../../..' . '/lib/private/DB/AdapterPgSql.php', 'OC\\DB\\AdapterSqlite' => __DIR__ . '/../../..' . '/lib/private/DB/AdapterSqlite.php', + 'OC\\DB\\ArrayResult' => __DIR__ . '/../../..' . '/lib/private/DB/ArrayResult.php', 'OC\\DB\\BacktraceDebugStack' => __DIR__ . '/../../..' . '/lib/private/DB/BacktraceDebugStack.php', 'OC\\DB\\Connection' => __DIR__ . '/../../..' . '/lib/private/DB/Connection.php', 'OC\\DB\\ConnectionAdapter' => __DIR__ . '/../../..' . '/lib/private/DB/ConnectionAdapter.php', @@ -1441,6 +1442,12 @@ class ComposerStaticInit749170dad3f5e7f9ca158f5a9f04f6a2 'OC\\DB\\QueryBuilder\\FunctionBuilder\\SqliteFunctionBuilder' => __DIR__ . '/../../..' . '/lib/private/DB/QueryBuilder/FunctionBuilder/SqliteFunctionBuilder.php', 'OC\\DB\\QueryBuilder\\Literal' => __DIR__ . '/../../..' . '/lib/private/DB/QueryBuilder/Literal.php', 'OC\\DB\\QueryBuilder\\Parameter' => __DIR__ . '/../../..' . '/lib/private/DB/QueryBuilder/Parameter.php', + 'OC\\DB\\QueryBuilder\\Partitioned\\InvalidPartitionedQueryException' => __DIR__ . '/../../..' . '/lib/private/DB/QueryBuilder/Partitioned/InvalidPartitionedQueryException.php', + 'OC\\DB\\QueryBuilder\\Partitioned\\JoinCondition' => __DIR__ . '/../../..' . '/lib/private/DB/QueryBuilder/Partitioned/JoinCondition.php', + 'OC\\DB\\QueryBuilder\\Partitioned\\PartitionQuery' => __DIR__ . '/../../..' . '/lib/private/DB/QueryBuilder/Partitioned/PartitionQuery.php', + 'OC\\DB\\QueryBuilder\\Partitioned\\PartitionSplit' => __DIR__ . '/../../..' . '/lib/private/DB/QueryBuilder/Partitioned/PartitionSplit.php', + 'OC\\DB\\QueryBuilder\\Partitioned\\PartitionedQueryBuilder' => __DIR__ . '/../../..' . '/lib/private/DB/QueryBuilder/Partitioned/PartitionedQueryBuilder.php', + 'OC\\DB\\QueryBuilder\\Partitioned\\PartitionedResult' => __DIR__ . '/../../..' . '/lib/private/DB/QueryBuilder/Partitioned/PartitionedResult.php', 'OC\\DB\\QueryBuilder\\QueryBuilder' => __DIR__ . '/../../..' . '/lib/private/DB/QueryBuilder/QueryBuilder.php', 'OC\\DB\\QueryBuilder\\QueryFunction' => __DIR__ . '/../../..' . '/lib/private/DB/QueryBuilder/QueryFunction.php', 'OC\\DB\\QueryBuilder\\QuoteHelper' => __DIR__ . '/../../..' . '/lib/private/DB/QueryBuilder/QuoteHelper.php', diff --git a/lib/private/DB/ArrayResult.php b/lib/private/DB/ArrayResult.php new file mode 100644 index 0000000000000..5d094533a3f39 --- /dev/null +++ b/lib/private/DB/ArrayResult.php @@ -0,0 +1,74 @@ +count = count($this->rows); + } + + public function closeCursor(): bool { + // noop + return true; + } + + public function fetch(int $fetchMode = PDO::FETCH_ASSOC) { + $row = array_shift($this->rows); + if (!$row) { + return false; + } + return match ($fetchMode) { + PDO::FETCH_ASSOC => $row, + PDO::FETCH_NUM => array_values($row), + PDO::FETCH_COLUMN => current($row), + default => throw new \InvalidArgumentException("Fetch mode not supported for array result"), + }; + + } + + public function fetchAll(int $fetchMode = PDO::FETCH_ASSOC): array { + return match ($fetchMode) { + PDO::FETCH_ASSOC => $this->rows, + PDO::FETCH_NUM => array_map(function ($row) { + return array_values($row); + }, $this->rows), + PDO::FETCH_COLUMN => array_map(function ($row) { + return current($row); + }, $this->rows), + default => throw new \InvalidArgumentException("Fetch mode not supported for array result"), + }; + } + + public function fetchColumn() { + return $this->fetchOne(); + } + + public function fetchOne() { + $row = $this->fetch(); + if ($row) { + return current($row); + } else { + return false; + } + } + + public function rowCount(): int { + return $this->count; + } +} diff --git a/lib/private/DB/Connection.php b/lib/private/DB/Connection.php index 3cdd5fd06c01b..87d21f5b13ef6 100644 --- a/lib/private/DB/Connection.php +++ b/lib/private/DB/Connection.php @@ -23,6 +23,8 @@ use Doctrine\DBAL\Result; use Doctrine\DBAL\Schema\Schema; use Doctrine\DBAL\Statement; +use OC\DB\QueryBuilder\Partitioned\PartitionSplit; +use OC\DB\QueryBuilder\Partitioned\PartitionedQueryBuilder; use OC\DB\QueryBuilder\QueryBuilder; use OC\SystemConfig; use OCP\DB\QueryBuilder\IQueryBuilder; @@ -75,6 +77,9 @@ class Connection extends PrimaryReadReplicaConnection { protected bool $logRequestId; protected string $requestId; + /** @var array> */ + protected array $partitions; + /** * Initializes a new instance of the Connection class. * @@ -117,6 +122,8 @@ public function __construct( $this->_config->setSQLLogger($debugStack); } + $this->partitions = $this->systemConfig->getValue('db.partitions', []); + $this->setNestTransactionsWithSavepoints(true); } @@ -168,11 +175,21 @@ public function getStats(): array { */ public function getQueryBuilder(): IQueryBuilder { $this->queriesBuilt++; - return new QueryBuilder( + $builder = new QueryBuilder( new ConnectionAdapter($this), $this->systemConfig, $this->logger ); + if (count($this->partitions) > 0) { + $builder = new PartitionedQueryBuilder($builder); + foreach ($this->partitions as $name => $tables) { + $partition = new PartitionSplit($name, $tables); + $builder->addPartition($partition); + } + return $builder; + } else { + return $builder; + } } /** diff --git a/lib/private/DB/QueryBuilder/Partitioned/InvalidPartitionedQueryException.php b/lib/private/DB/QueryBuilder/Partitioned/InvalidPartitionedQueryException.php new file mode 100644 index 0000000000000..3a5aa2f3e0e13 --- /dev/null +++ b/lib/private/DB/QueryBuilder/Partitioned/InvalidPartitionedQueryException.php @@ -0,0 +1,79 @@ +select("mount_point", "mimetype") + * ->from("mounts", "m") + * ->innerJoin("m", "filecache", "f", $query->expr()->eq("root_id", "fileid")); + * ``` + * will not work, as the query builder doesn't know that the `mimetype` column belongs to the "filecache partition". + * Instead, you need to do + * ``` + * $query->select("mount_point", "f.mimetype") + * ->from("mounts", "m") + * ->innerJoin("m", "filecache", "f", $query->expr()->eq("m.root_id", "f.fileid")); + * ``` + * + * 2. The "ON" condition for the join can only perform a comparison between both sides of the join once. + * + * For example: + * ``` + * $query->select("mount_point", "mimetype") + * ->from("mounts", "m") + * ->innerJoin("m", "filecache", "f", $query->expr()->andX($query->expr()->eq("m.root_id", "f.fileid"), $query->expr()->eq("m.storage_id", "f.storage"))); + * ``` + * will not work. + * + * 3. An "OR" expression in the "WHERE" cannot mention both sides of the join, this does not apply to "AND" expressions. + * + * For example: + * ``` + * $query->select("mount_point", "mimetype") + * ->from("mounts", "m") + * ->innerJoin("m", "filecache", "f", $query->expr()->eq("m.root_id", "f.fileid"))) + * ->where($query->expr()->orX( + * $query->expr()-eq("m.user_id", $query->createNamedParameter("test"))), + * $query->expr()-eq("f.name", $query->createNamedParameter("test"))), + * )); + * ``` + * will not work, but. + * ``` + * $query->select("mount_point", "mimetype") + * ->from("mounts", "m") + * ->innerJoin("m", "filecache", "f", $query->expr()->eq("m.root_id", "f.fileid"))) + * ->where($query->expr()->andX( + * $query->expr()-eq("m.user_id", $query->createNamedParameter("test"))), + * $query->expr()-eq("f.name", $query->createNamedParameter("test"))), + * )); + * ``` + * will. + * + * 4. Queries that join cross-partition cannot use position parameters, only named parameters are allowed + * 5. The "ON" condition of a join cannot contain and "OR" expression. + * 6. Right-joins are not allowed. + * 7. Update, delete and insert statements aren't allowed to contain cross-partition joins. + * 8. Queries that "GROUP BY" a column from the joined partition are not allowed. + * 9. Any `join` call needs to be made before any `where` call. + * 10. Queries that join cross-partition with an "INNER JOIN" or "LEFT JOIN" with a condition on the left side + * cannot use "LIMIT" or "OFFSET" in queries. + * + * The part of the query running on the sharded table has some additional limitations, + * see the `InvalidShardedQueryException` documentation for more information. + */ +class InvalidPartitionedQueryException extends \Exception { + +} diff --git a/lib/private/DB/QueryBuilder/Partitioned/JoinCondition.php b/lib/private/DB/QueryBuilder/Partitioned/JoinCondition.php new file mode 100644 index 0000000000000..54d913251d477 --- /dev/null +++ b/lib/private/DB/QueryBuilder/Partitioned/JoinCondition.php @@ -0,0 +1,173 @@ + + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +namespace OC\DB\QueryBuilder\Partitioned; + +use OC\DB\QueryBuilder\CompositeExpression; +use OC\DB\QueryBuilder\QueryFunction; +use OCP\DB\QueryBuilder\IQueryFunction; + +/** + * Utility class for working with join conditions + */ +class JoinCondition { + public function __construct( + public string|IQueryFunction $fromColumn, + public ?string $fromAlias, + public string|IQueryFunction $toColumn, + public ?string $toAlias, + public array $fromConditions, + public array $toConditions, + ) { + if (is_string($this->fromColumn) && str_starts_with($this->fromColumn, '(')) { + $this->fromColumn = new QueryFunction($this->fromColumn); + } + if (is_string($this->toColumn) && str_starts_with($this->toColumn, '(')) { + $this->toColumn = new QueryFunction($this->toColumn); + } + } + + /** + * @param JoinCondition[] $conditions + * @return JoinCondition + */ + public static function merge(array $conditions): JoinCondition { + $fromColumn = ""; + $toColumn = ""; + $fromAlias = null; + $toAlias = null; + $fromConditions = []; + $toConditions = []; + foreach ($conditions as $condition) { + if (($condition->fromColumn && $fromColumn) ||($condition->toColumn && $toColumn)) { + throw new InvalidPartitionedQueryException("Can't join from {$condition->fromColumn} to {$condition->toColumn} as it already join froms {$fromColumn} to {$toColumn}"); + } + if ($condition->fromColumn) { + $fromColumn = $condition->fromColumn; + } + if ($condition->toColumn) { + $toColumn = $condition->toColumn; + } + if ($condition->fromAlias) { + $fromAlias = $condition->fromAlias; + } + if ($condition->toAlias) { + $toAlias = $condition->toAlias; + } + $fromConditions = array_merge($fromConditions, $condition->fromConditions); + $toConditions = array_merge($toConditions, $condition->toConditions); + } + return new JoinCondition($fromColumn, $fromAlias, $toColumn, $toAlias, $fromConditions, $toConditions); + } + + /** + * @param null|string|CompositeExpression $condition + * @param string $join + * @param string $alias + * @param string $fromAlias + * @return JoinCondition + * @throws InvalidPartitionedQueryException + */ + public static function parse($condition, string $join, string $alias, string $fromAlias): JoinCondition { + if ($condition === null) { + throw new InvalidPartitionedQueryException("Can't join on $join without a condition"); + } + + $result = self::parseSubCondition($condition, $join, $alias, $fromAlias); + if (!$result->fromColumn || !$result->toColumn) { + throw new InvalidPartitionedQueryException("No join condition found from $fromAlias to $alias"); + } + return $result; + } + + private static function parseSubCondition($condition, string $join, string $alias, string $fromAlias): JoinCondition { + if ($condition instanceof CompositeExpression) { + if ($condition->getType() === CompositeExpression::TYPE_OR) { + throw new InvalidPartitionedQueryException("Cannot join on $join with an OR expression"); + } + return self::merge(array_map(function ($subCondition) use ($join, $alias, $fromAlias) { + return self::parseSubCondition($subCondition, $join, $alias, $fromAlias); + }, $condition->getParts())); + } + + $condition = (string)$condition; + $isSubCondition = self::isExtraCondition($condition); + if ($isSubCondition) { + if (self::mentionsAlias($condition, $fromAlias)) { + return new JoinCondition("", null, "", null, [$condition], []); + } else { + return new JoinCondition("", null, "", null, [], [$condition]); + } + } + + $condition = str_replace('`', '', $condition); + + // expect a condition in the form of 'alias1.column1 = alias2.column2' + if (!str_contains($condition, ' = ')) { + throw new InvalidPartitionedQueryException("Can only join on $join with an `eq` condition"); + } + $parts = explode(' = ', $condition, 2); + $parts = array_map(function (string $part) { + return self::clearConditionPart($part); + }, $parts); + + if (!self::isSingleCondition($parts[0]) || !self::isSingleCondition($parts[1])) { + throw new InvalidPartitionedQueryException("Can only join on $join with a single condition"); + } + + + if (self::mentionsAlias($parts[0], $fromAlias)) { + return new JoinCondition($parts[0], self::getAliasForPart($parts[0]), $parts[1], self::getAliasForPart($parts[1]), [], []); + } elseif (self::mentionsAlias($parts[1], $fromAlias)) { + return new JoinCondition($parts[1], self::getAliasForPart($parts[1]), $parts[0], self::getAliasForPart($parts[0]), [], []); + } else { + throw new InvalidPartitionedQueryException("join condition for $join needs to explicitly refer to the table by alias"); + } + } + + private static function isSingleCondition(string $condition): bool { + return !(str_contains($condition, ' OR ') || str_contains($condition, ' AND ')); + } + + private static function getAliasForPart(string $part): ?string { + if (str_contains($part, ' ')) { + return uniqid('join_alias_'); + } else { + return null; + } + } + + private static function clearConditionPart(string $part): string { + if (str_starts_with($part, 'CAST(')) { + // pgsql/mysql cast + $part = substr($part, strlen('CAST(')); + [$part] = explode(' AS ', $part); + } elseif (str_starts_with($part, 'to_number(to_char(')) { + // oracle cast to int + $part = substr($part, strlen('to_number(to_char('), -2); + } elseif (str_starts_with($part, 'to_number(to_char(')) { + // oracle cast to string + $part = substr($part, strlen('to_char('), -1); + } + return $part; + } + + /** + * Check that a condition is an extra limit on the from/to part, and not the join condition + * + * This is done by checking that only one of the halves of the condition references a column + */ + private static function isExtraCondition(string $condition): bool { + $parts = explode(' ', $condition, 2); + return str_contains($parts[0], '`') xor str_contains($parts[1], '`'); + } + + private static function mentionsAlias(string $condition, string $alias): bool { + return str_contains($condition, "$alias."); + } +} diff --git a/lib/private/DB/QueryBuilder/Partitioned/PartitionQuery.php b/lib/private/DB/QueryBuilder/Partitioned/PartitionQuery.php new file mode 100644 index 0000000000000..8504c62d6d169 --- /dev/null +++ b/lib/private/DB/QueryBuilder/Partitioned/PartitionQuery.php @@ -0,0 +1,75 @@ +joinFromColumn); + $joinToColumn = preg_replace('/\w+\./', '', $this->joinToColumn); + + $joinFromValues = array_map(function (array $row) use ($joinFromColumn) { + return $row[$joinFromColumn]; + }, $rows); + $joinFromValues = array_filter($joinFromValues, function ($value) { + return $value !== null; + }); + $this->query->andWhere($this->query->expr()->in($this->joinToColumn, $this->query->createNamedParameter($joinFromValues, IQueryBuilder::PARAM_STR_ARRAY, ':' . uniqid()))); + + $s = $this->query->getSQL(); + $partitionedRows = $this->query->executeQuery()->fetchAll(); + + $columns = $this->query->getOutputColumns(); + $nullResult = array_combine($columns, array_fill(0, count($columns), null)); + + $partitionedRowsByKey = []; + foreach ($partitionedRows as $partitionedRow) { + $partitionedRowsByKey[$partitionedRow[$joinToColumn]][] = $partitionedRow; + } + $result = []; + foreach ($rows as $row) { + if (isset($partitionedRowsByKey[$row[$joinFromColumn]])) { + if ($this->joinMode !== self::JOIN_MODE_LEFT_NULL) { + foreach ($partitionedRowsByKey[$row[$joinFromColumn]] as $partitionedRow) { + $result[] = array_merge($row, $partitionedRow); + } + } + } elseif ($this->joinMode === self::JOIN_MODE_LEFT || $this->joinMode === self::JOIN_MODE_LEFT_NULL) { + $result[] = array_merge($nullResult, $row); + } + } + return $result; + } +} diff --git a/lib/private/DB/QueryBuilder/Partitioned/PartitionSplit.php b/lib/private/DB/QueryBuilder/Partitioned/PartitionSplit.php new file mode 100644 index 0000000000000..ad4c0fab05533 --- /dev/null +++ b/lib/private/DB/QueryBuilder/Partitioned/PartitionSplit.php @@ -0,0 +1,74 @@ + + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +namespace OC\DB\QueryBuilder\Partitioned; + +/** + * Information about a database partition, containing the tables in the partition and any active alias + */ +class PartitionSplit { + /** @var array */ + public array $aliases = []; + + /** + * @param string[] $tables + */ + public function __construct( + public string $name, + public array $tables, + ) { + } + + public function addAlias(string $table, string $alias): void { + if ($this->containsTable($table)) { + $this->aliases[$alias] = $table; + } + } + + public function addTable(string $table): void { + if (!$this->containsTable($table)) { + $this->tables[] = $table; + } + } + + public function containsTable(string $table): bool { + return in_array($table, $this->tables); + } + + public function containsAlias(string $alias): bool { + return array_key_exists($alias, $this->aliases); + } + + private function getTablesAndAliases(): array { + return array_keys($this->aliases) + $this->tables; + } + + /** + * Check if a query predicate mentions a table or alias from this partition + * + * @param string $predicate + * @return bool + */ + public function checkPredicateForTable(string $predicate): bool { + foreach ($this->getTablesAndAliases() as $name) { + if (str_contains($predicate, "`$name`.`")) { + return true; + } + } + return false; + } + + public function isColumnInPartition(string $column): bool { + foreach ($this->getTablesAndAliases() as $name) { + if (str_starts_with($column, "$name.")) { + return true; + } + } + return false; + } +} diff --git a/lib/private/DB/QueryBuilder/Partitioned/PartitionedQueryBuilder.php b/lib/private/DB/QueryBuilder/Partitioned/PartitionedQueryBuilder.php new file mode 100644 index 0000000000000..1ce2bb67620ca --- /dev/null +++ b/lib/private/DB/QueryBuilder/Partitioned/PartitionedQueryBuilder.php @@ -0,0 +1,415 @@ + $splitQueries */ + private array $splitQueries = []; + /** @var list */ + private array $partitions = []; + + /** @var array{'select': string|array, 'alias': ?string}[] */ + private array $selects = []; + private ?PartitionSplit $mainPartition = null; + private bool $hasPositionalParameter = false; + private QuoteHelper $quoteHelper; + private ?int $limit = null; + private ?int $offset = null; + + public function __construct( + IQueryBuilder $builder, + ) { + parent::__construct($builder); + $this->quoteHelper = new QuoteHelper(); + } + + private function newQuery(): IQueryBuilder { + return $this->builder->getConnection()->getQueryBuilder(); + } + + // we need to save selects until we know all the table aliases + public function select(...$selects) { + $this->selects = []; + $this->addSelect(...$selects); + return $this; + } + + public function addSelect(...$selects) { + $selects = array_map(function($select) { + return ['select' => $select, 'alias' => null]; + }, $select); + $this->selects = array_merge($this->selects, $select); + return $this; + } + + public function selectAlias($select, $alias) { + $this->selects[] = ['select' => $select, 'alias' => $alias]; + return $this; + } + + /** + * Ensure that a column is being selected by the query + * + * This is mainly used to ensure that the returned rows from both sides of a partition contains the columns of the join predicate + * + * @param string $column + * @return void + */ + private function ensureSelect(string|IQueryFunction $column, ?string $alias = null): void { + $checkColumn = $alias ?: $column; + if (str_contains($checkColumn, '.')) { + [, $checkColumn] = explode('.', $checkColumn); + } + foreach ($this->selects as $select) { + if ($select['select'] === $checkColumn || $select['select'] === '*' || str_ends_with($select['select'], '.' . $checkColumn)) { + return; + } + } + if ($alias) { + $this->selectAlias($column, $alias); + } else { + $this->addSelect($column); + } + } + + /** + * Distribute the select statements to the correct partition + * + * This is done at the end instead of when the `select` call is made, because the `select` calls are generally done + * before we know what tables are involved in the query + * + * @return void + */ + private function applySelects(): void { + foreach ($this->selects as $select) { + foreach ($this->partitions as $partition) { + if (is_string($select['select']) && ( + $select['select'] === '*' || + $partition->isColumnInPartition($select['select'])) + ) { + if (isset($this->splitQueries[$partition->name])) { + if ($select['alias']) { + $this->splitQueries[$partition->name]->query->selectAlias($select['select'], $select['alias']); + } else { + $this->splitQueries[$partition->name]->query->addSelect($select['select']); + } + if ($select['select'] !== '*') { + continue 2; + } + } + } + } + + if ($select['alias']) { + parent::selectAlias($select['select'], $select['alias']); + } else { + parent::addSelect($select['select']); + } + } + $this->selects = []; + } + + + public function addPartition(PartitionSplit $partition): void { + $this->partitions[] = $partition; + } + + private function getPartition(string $table): ?PartitionSplit { + foreach ($this->partitions as $partition) { + if ($partition->containsTable($table) || $partition->containsAlias($table)) { + return $partition; + } + } + return null; + } + + public function from($from, $alias = null) { + if (is_string($from) && $partition = $this->getPartition($from)) { + $this->mainPartition = $partition; + if ($alias) { + $this->mainPartition->addAlias($from, $alias); + } + } + return parent::from($from, $alias); + } + + public function innerJoin($fromAlias, $join, $alias, $condition = null): self { + return $this->join($fromAlias, $join, $alias, $condition); + } + + public function leftJoin($fromAlias, $join, $alias, $condition = null): self { + return $this->join($fromAlias, $join, $alias, $condition, PartitionQuery::JOIN_MODE_LEFT); + } + + public function join($fromAlias, $join, $alias, $condition = null, $joinMode = PartitionQuery::JOIN_MODE_INNER): self { + $partition = $this->getPartition($join); + $fromPartition = $this->getPartition($fromAlias); + if ($partition && $partition !== $this->mainPartition) { + // join from the main db to a partition + + $joinCondition = JoinCondition::parse($condition, $join, $alias, $fromAlias); + $partition->addAlias($join, $alias); + + if (!isset($this->splitQueries[$partition->name])) { + $this->splitQueries[$partition->name] = new PartitionQuery( + $this->newQuery(), + $joinCondition->fromAlias ?? $joinCondition->fromColumn, $joinCondition->toAlias ?? $joinCondition->toColumn, + $joinMode + ); + $this->splitQueries[$partition->name]->query->from($join, $alias); + $this->ensureSelect($joinCondition->fromColumn, $joinCondition->fromAlias); + $this->ensureSelect($joinCondition->toColumn, $joinCondition->toAlias); + } else { + $query = $this->splitQueries[$partition->name]->query; + if ($partition->containsAlias($fromAlias)) { + $query->innerJoin($fromAlias, $join, $alias, $condition); + } else { + throw new InvalidPartitionedQueryException("Can't join across partition boundaries more than once"); + } + } + $this->splitQueries[$partition->name]->query->andWhere(...$joinCondition->toConditions); + parent::andWhere(...$joinCondition->fromConditions); + return $this; + } elseif ($fromPartition && $fromPartition !== $partition) { + // join from partition, to the main db + + $joinCondition = JoinCondition::parse($condition, $join, $alias, $fromAlias); + if (str_starts_with($fromPartition->name, 'from_')) { + $partitionName = $fromPartition->name; + } else { + $partitionName = 'from_' . $fromPartition->name; + } + + if (!isset($this->splitQueries[$partitionName])) { + $newPartition = new PartitionSplit($partitionName, [$join]); + $newPartition->addAlias($join, $alias); + $this->partitions[] = $newPartition; + + $this->splitQueries[$partitionName] = new PartitionQuery( + $this->newQuery(), + $joinCondition->fromAlias ?? $joinCondition->fromColumn, $joinCondition->toAlias ?? $joinCondition->toColumn, + $joinMode + ); + $this->ensureSelect($joinCondition->fromColumn, $joinCondition->fromAlias); + $this->ensureSelect($joinCondition->toColumn, $joinCondition->toAlias); + $this->splitQueries[$partitionName]->query->from($join, $alias); + $this->splitQueries[$partitionName]->query->andWhere(...$joinCondition->toConditions); + parent::andWhere(...$joinCondition->fromConditions); + } else { + $fromPartition->addTable($join); + $fromPartition->addAlias($join, $alias); + + $query = $this->splitQueries[$partitionName]->query; + $query->innerJoin($fromAlias, $join, $alias, $condition); + } + return $this; + } else { + // join within the main db or a partition + if ($joinMode === PartitionQuery::JOIN_MODE_INNER) { + return parent::innerJoin($fromAlias, $join, $alias, $condition); + } elseif ($joinMode === PartitionQuery::JOIN_MODE_LEFT) { + return parent::leftJoin($fromAlias, $join, $alias, $condition); + } elseif ($joinMode === PartitionQuery::JOIN_MODE_RIGHT) { + return parent::rightJoin($fromAlias, $join, $alias, $condition); + } else { + throw new \InvalidArgumentException("Invalid join mode: $joinMode"); + } + } + } + + /** + * Flatten a list of predicates by merging the parts of any "AND" expression into the list of predicates + * + * @param array $predicates + * @return array + */ + private function flattenPredicates(array $predicates): array { + $result = []; + foreach ($predicates as $predicate) { + if ($predicate instanceof CompositeExpression && $predicate->getType() === CompositeExpression::TYPE_AND) { + $result = array_merge($result, $this->flattenPredicates($predicate->getParts())); + } else { + $result[] = $predicate; + } + } + return $result; + } + + /** + * Split an array of predicates (WHERE query parts) by the partition they reference + * @param array $predicates + * @return array + */ + private function splitPredicatesByParts(array $predicates): array { + $predicates = $this->flattenPredicates($predicates); + + $partitionPredicates = []; + foreach ($predicates as $predicate) { + $partition = $this->getPartitionForPredicate((string) $predicate); + if ($this->mainPartition === $partition) { + $partitionPredicates[''][] = $predicate; + } elseif ($partition) { + $partitionPredicates[$partition->name][] = $predicate; + } else { + $partitionPredicates[''][] = $predicate; + } + } + return $partitionPredicates; + } + + public function where(...$predicates) { + return $this->andWhere(...$predicates); + } + + public function andWhere(...$where) { + if ($where) { + foreach ($this->splitPredicatesByParts($where) as $alias => $predicates) { + if (isset($this->splitQueries[$alias])) { + // when there is a condition on a table being left-joined it starts to behave as if it's an inner join + // since any joined column that doesn't have the left part will not match the condition + // when there the condition is `$joinToColumn IS NULL` we instead mark the query as excluding the left half + if ($this->splitQueries[$alias]->joinMode === PartitionQuery::JOIN_MODE_LEFT) { + $this->splitQueries[$alias]->joinMode = PartitionQuery::JOIN_MODE_INNER; + + $column = $this->quoteHelper->quoteColumnName($this->splitQueries[$alias]->joinToColumn); + foreach ($predicates as $predicate) { + if ((string)$predicate === "$column IS NULL") { + $this->splitQueries[$alias]->joinMode = PartitionQuery::JOIN_MODE_LEFT_NULL; + } else { + $this->splitQueries[$alias]->query->andWhere($predicate); + } + } + } else { + $this->splitQueries[$alias]->query->andWhere(...$predicates); + } + } else { + parent::andWhere(...$predicates); + } + } + } + return $this; + } + + + private function getPartitionForPredicate(string $predicate): ?PartitionSplit { + foreach ($this->partitions as $partition) { + + if (str_contains($predicate, '?')) { + $this->hasPositionalParameter = true; + } + if ($partition->checkPredicateForTable($predicate)) { + return $partition; + } + } + return null; + } + + public function update($update = null, $alias = null) { + return parent::update($update, $alias); + } + + public function insert($insert = null) { + return parent::insert($insert); + } + + public function delete($delete = null, $alias = null) { + return parent::delete($delete, $alias); + } + + public function setMaxResults($maxResults) { + if ($maxResults > 0) { + $this->limit = (int)$maxResults; + } + return parent::setMaxResults($maxResults); + } + + public function setFirstResult($firstResult) { + if ($firstResult > 0) { + $this->offset = (int)$firstResult; + } + return parent::setFirstResult($firstResult); + } + + public function executeQuery(?IDBConnection $connection = null): IResult { + $this->applySelects(); + if ($this->splitQueries && $this->hasPositionalParameter) { + throw new InvalidPartitionedQueryException("Partitioned queries aren't allowed to to positional arguments"); + } + foreach ($this->splitQueries as $split) { + $split->query->setParameters($this->getParameters(), $this->getParameterTypes()); + } + if (count($this->splitQueries) > 0) { + $hasNonLeftJoins = array_reduce($this->splitQueries, function (bool $hasNonLeftJoins, PartitionQuery $query) { + return $hasNonLeftJoins || $query->joinMode !== PartitionQuery::JOIN_MODE_LEFT; + }, false); + if ($hasNonLeftJoins) { + if (is_int($this->limit)) { + throw new InvalidPartitionedQueryException("Limit is not allowed in partitioned queries"); + } + if (is_int($this->offset)) { + throw new InvalidPartitionedQueryException("Offset is not allowed in partitioned queries"); + } + } + } + + $s = $this->getSQL(); + $result = parent::executeQuery($connection); + if (count($this->splitQueries) > 0) { + return new PartitionedResult($this->splitQueries, $result); + } else { + return $result; + } + } + + public function executeStatement(?IDBConnection $connection = null): int { + if (count($this->splitQueries)) { + throw new InvalidPartitionedQueryException("Partitioning write queries isn't supported"); + } + return parent::executeStatement($connection); + } + + public function getSQL() { + $this->applySelects(); + return parent::getSQL(); + } + + public function getPartitionCount(): int { + return count($this->splitQueries) + 1; + } +} diff --git a/lib/private/DB/QueryBuilder/Partitioned/PartitionedResult.php b/lib/private/DB/QueryBuilder/Partitioned/PartitionedResult.php new file mode 100644 index 0000000000000..aa9cc43b38bf4 --- /dev/null +++ b/lib/private/DB/QueryBuilder/Partitioned/PartitionedResult.php @@ -0,0 +1,61 @@ +result->closeCursor(); + } + + public function fetch(int $fetchMode = PDO::FETCH_ASSOC) { + $this->fetchRows(); + return parent::fetch($fetchMode); + } + + public function fetchAll(int $fetchMode = PDO::FETCH_ASSOC): array { + $this->fetchRows(); + return parent::fetchAll($fetchMode); + } + + public function rowCount(): int { + $this->fetchRows(); + return parent::rowCount(); + } + + private function fetchRows(): void { + if (!$this->fetched) { + $this->fetched = true; + $this->rows = $this->result->fetchAll(); + foreach ($this->splitOfParts as $part) { + $this->rows = $part->mergeWith($this->rows); + } + $this->count = count($this->rows); + } + } +} diff --git a/lib/private/DB/QueryBuilder/QueryBuilder.php b/lib/private/DB/QueryBuilder/QueryBuilder.php index c539d672c9f28..23dc564e94e72 100644 --- a/lib/private/DB/QueryBuilder/QueryBuilder.php +++ b/lib/private/DB/QueryBuilder/QueryBuilder.php @@ -1329,7 +1329,7 @@ public function getTableName($table) { * @param string $table * @return string */ - protected function prefixTableName($table) { + public function prefixTableName($table) { if ($this->automaticTablePrefix === false || str_starts_with($table, '*PREFIX*')) { return $table; } diff --git a/lib/private/Share20/DefaultShareProvider.php b/lib/private/Share20/DefaultShareProvider.php index 6d1d04d3c0bb2..970b2840b4206 100644 --- a/lib/private/Share20/DefaultShareProvider.php +++ b/lib/private/Share20/DefaultShareProvider.php @@ -672,6 +672,7 @@ public function getSharesInFolder($userId, Folder $node, $reshares, $shallow = t foreach ($chunks as $chunk) { $qb->setParameter('chunk', $chunk, IQueryBuilder::PARAM_INT_ARRAY); + $a = $qb->getSQL(); $cursor = $qb->executeQuery(); while ($data = $cursor->fetch()) { $shares[$data['fileid']][] = $this->createShare($data); diff --git a/tests/lib/DB/QueryBuilder/Partitioned/JoinConditionTest.php b/tests/lib/DB/QueryBuilder/Partitioned/JoinConditionTest.php new file mode 100644 index 0000000000000..56a8e5783aa2b --- /dev/null +++ b/tests/lib/DB/QueryBuilder/Partitioned/JoinConditionTest.php @@ -0,0 +1,78 @@ + + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +namespace lib\DB\QueryBuilder\Partitioned; + +use OC\DB\ConnectionAdapter; +use OC\DB\QueryBuilder\Partitioned\JoinCondition; +use OC\DB\QueryBuilder\QueryBuilder; +use OC\SystemConfig; +use OCP\DB\QueryBuilder\IQueryBuilder; +use OCP\IDBConnection; +use Psr\Log\LoggerInterface; +use Test\TestCase; + +class JoinConditionTest extends TestCase { + protected function setUp(): void { + parent::setUp(); + } + + public function platformProvider(): array { + return [ + [IDBConnection::PLATFORM_SQLITE], + [IDBConnection::PLATFORM_POSTGRES], + [IDBConnection::PLATFORM_MYSQL], + [IDBConnection::PLATFORM_ORACLE], + ]; + } + + private function getBuilder(string $platform): IQueryBuilder { + $connection = $this->createMock(ConnectionAdapter::class); + $connection->method('getDatabaseProvider')->willReturn($platform); + return new QueryBuilder( + $connection, + $this->createMock(SystemConfig::class), + $this->createMock(LoggerInterface::class) + ); + } + + /** + * @dataProvider platformProvider + */ + public function testParseCondition(string $platform): void { + $query = $this->getBuilder($platform); + $param1 = $query->createNamedParameter('files'); + $param2 = $query->createNamedParameter("test"); + $condition = $query->expr()->andX( + $query->expr()->eq('tagmap.categoryid', 'tag.id'), + $query->expr()->eq('tag.type', $param1), + $query->expr()->eq('tag.uid', $param2) + ); + $parsed = JoinCondition::parse($condition, 'vcategory', 'tag', 'tagmap'); + $this->assertEquals('tagmap.categoryid', $parsed->fromColumn); + $this->assertEquals('tag.id', $parsed->toColumn); + $this->assertEquals([], $parsed->fromConditions); + $this->assertEquals([ + $query->expr()->eq('tag.type', $param1), + $query->expr()->eq('tag.uid', $param2), + ], $parsed->toConditions); + } + + /** + * @dataProvider platformProvider + */ + public function testParseCastCondition(string $platform): void { + $query = $this->getBuilder($platform); + + $condition = $query->expr()->eq($query->expr()->castColumn('m.objectid', IQueryBuilder::PARAM_INT), 'f.fileid'); + $parsed = JoinCondition::parse($condition, 'filecache', 'f', 'm'); + $this->assertEquals('m.objectid', $parsed->fromColumn); + $this->assertEquals('f.fileid', $parsed->toColumn); + $this->assertEquals([], $parsed->fromConditions); + } +} diff --git a/tests/lib/DB/QueryBuilder/Partitioned/PartitionedQueryBuilderTest.php b/tests/lib/DB/QueryBuilder/Partitioned/PartitionedQueryBuilderTest.php new file mode 100644 index 0000000000000..a893891a96992 --- /dev/null +++ b/tests/lib/DB/QueryBuilder/Partitioned/PartitionedQueryBuilderTest.php @@ -0,0 +1,212 @@ +connection = Server::get(IDBConnection::class); + + $this->setupFileCache(); + } + + protected function tearDown(): void { + $this->cleanupDb(); + parent::tearDown(); + } + + + private function getQueryBuilder(): PartitionedQueryBuilder { + $builder = $this->connection->getQueryBuilder(); + if ($builder instanceof PartitionedQueryBuilder) { + return $builder; + } else { + return new PartitionedQueryBuilder($builder); + } + } + + private function setupFileCache() { + $this->cleanupDb(); + $query = $this->getQueryBuilder(); + $query->insert('storages') + ->values([ + 'numeric_id' => $query->createNamedParameter(1001001, IQueryBuilder::PARAM_INT), + 'id' => $query->createNamedParameter('test1'), + ]); + $query->executeStatement(); + + $query = $this->getQueryBuilder(); + $query->insert('filecache') + ->values([ + 'storage' => $query->createNamedParameter(1001001, IQueryBuilder::PARAM_INT), + 'path' => $query->createNamedParameter('file1'), + 'path_hash' => $query->createNamedParameter(md5('file1')), + ]); + $query->executeStatement(); + $fileId = $query->getLastInsertId(); + + $query = $this->getQueryBuilder(); + $query->insert('filecache_extended') + ->hintShardKey('storage', 1001001) + ->values([ + 'fileid' => $query->createNamedParameter($fileId, IQueryBuilder::PARAM_INT), + 'upload_time' => $query->createNamedParameter(1234, IQueryBuilder::PARAM_INT), + ]); + $query->executeStatement(); + + $query = $this->getQueryBuilder(); + $query->insert('mounts') + ->values([ + 'storage_id' => $query->createNamedParameter(1001001, IQueryBuilder::PARAM_INT), + 'user_id' => $query->createNamedParameter('partitioned_test'), + 'mount_point' => $query->createNamedParameter('/mount/point'), + 'mount_provider_class' => $query->createNamedParameter('test'), + 'root_id' => $query->createNamedParameter($fileId, IQueryBuilder::PARAM_INT), + ]); + $query->executeStatement(); + } + + private function cleanupDb() { + $query = $this->getQueryBuilder(); + $query->delete('storages') + ->where($query->expr()->gt('numeric_id', $query->createNamedParameter(1000000, IQueryBuilder::PARAM_INT))); + $query->executeStatement(); + + $query = $this->getQueryBuilder(); + $query->delete('filecache') + ->where($query->expr()->gt('storage', $query->createNamedParameter(1000000, IQueryBuilder::PARAM_INT))) + ->runAcrossAllShards(); + $query->executeStatement(); + + $query = $this->getQueryBuilder(); + $query->delete('filecache_extended') + ->runAcrossAllShards(); + $query->executeStatement(); + + $query = $this->getQueryBuilder(); + $query->delete('mounts') + ->where($query->expr()->like('user_id', $query->createNamedParameter('partitioned_%'))); + $query->executeStatement(); + } + + public function testSimpleOnlyPartitionQuery() { + $builder = $this->getQueryBuilder(); + $builder->addPartition(new PartitionSplit('filecache', ['filecache'])); + + // query borrowed from UserMountCache + $query = $builder->select('path') + ->from('filecache') + ->where($builder->expr()->eq('storage', $builder->createNamedParameter(1001001, IQueryBuilder::PARAM_INT))); + + $results = $query->executeQuery()->fetchAll(); + $this->assertCount(1, $results); + $this->assertEquals($results[0]['path'], 'file1'); + } + + public function testSimplePartitionedQuery() { + $builder = $this->getQueryBuilder(); + $builder->addPartition(new PartitionSplit('filecache', ['filecache'])); + + // query borrowed from UserMountCache + $query = $builder->select('storage_id', 'root_id', 'user_id', 'mount_point', 'mount_id', 'f.path', 'mount_provider_class') + ->from('mounts', 'm') + ->innerJoin('m', 'filecache', 'f', $builder->expr()->eq('m.root_id', 'f.fileid')) + ->where($builder->expr()->eq('storage_id', $builder->createNamedParameter(1001001, IQueryBuilder::PARAM_INT))); + + $query->andWhere($builder->expr()->eq('user_id', $builder->createNamedParameter('partitioned_test'))); + + $this->assertEquals(2, $query->getPartitionCount()); + + $results = $query->executeQuery()->fetchAll(); + $this->assertCount(1, $results); + $this->assertEquals($results[0]['user_id'], 'partitioned_test'); + $this->assertEquals($results[0]['mount_point'], '/mount/point'); + $this->assertEquals($results[0]['mount_provider_class'], 'test'); + $this->assertEquals($results[0]['path'], 'file1'); + } + + public function testMultiTablePartitionedQuery() { + $builder = $this->getQueryBuilder(); + $builder->addPartition(new PartitionSplit('filecache', ['filecache', 'filecache_extended'])); + + $query = $builder->select('storage_id', 'root_id', 'user_id', 'mount_point', 'mount_id', 'f.path', 'mount_provider_class', 'fe.upload_time') + ->from('mounts', 'm') + ->innerJoin('m', 'filecache', 'f', $builder->expr()->eq('m.root_id', 'f.fileid')) + ->innerJoin('f', 'filecache_extended', 'fe', $builder->expr()->eq('f.fileid', 'fe.fileid')) + ->where($builder->expr()->eq('storage_id', $builder->createNamedParameter(1001001, IQueryBuilder::PARAM_INT))); + + $query->andWhere($builder->expr()->eq('user_id', $builder->createNamedParameter('partitioned_test'))); + + $this->assertEquals(2, $query->getPartitionCount()); + + $results = $query->executeQuery()->fetchAll(); + $this->assertCount(1, $results); + $this->assertEquals($results[0]['user_id'], 'partitioned_test'); + $this->assertEquals($results[0]['mount_point'], '/mount/point'); + $this->assertEquals($results[0]['mount_provider_class'], 'test'); + $this->assertEquals($results[0]['path'], 'file1'); + $this->assertEquals($results[0]['upload_time'], 1234); + } + + public function testPartitionedQueryFromSplit() { + $builder = $this->getQueryBuilder(); + $builder->addPartition(new PartitionSplit('filecache', ['filecache'])); + + $query = $builder->select('storage', 'm.root_id', 'm.user_id', 'm.mount_point', 'm.mount_id', 'path', 'm.mount_provider_class') + ->from('filecache', 'f') + ->innerJoin('f', 'mounts', 'm', $builder->expr()->eq('m.root_id', 'f.fileid')); + $query->where($builder->expr()->eq('storage', $builder->createNamedParameter(1001001, IQueryBuilder::PARAM_INT))); + + $query->andWhere($builder->expr()->eq('m.user_id', $builder->createNamedParameter('partitioned_test'))); + + $this->assertEquals(2, $query->getPartitionCount()); + + $results = $query->executeQuery()->fetchAll(); + $this->assertCount(1, $results); + $this->assertEquals($results[0]['user_id'], 'partitioned_test'); + $this->assertEquals($results[0]['mount_point'], '/mount/point'); + $this->assertEquals($results[0]['mount_provider_class'], 'test'); + $this->assertEquals($results[0]['path'], 'file1'); + } + + public function testMultiJoinPartitionedQuery() { + $builder = $this->getQueryBuilder(); + $builder->addPartition(new PartitionSplit('filecache', ['filecache'])); + + // query borrowed from UserMountCache + $query = $builder->select('storage_id', 'root_id', 'user_id', 'mount_point', 'mount_id', 'f.path', 'mount_provider_class') + ->selectAlias('s.id', 'storage_string_id') + ->from('mounts', 'm') + ->innerJoin('m', 'filecache', 'f', $builder->expr()->eq('m.root_id', 'f.fileid')) + ->innerJoin('f', 'storages', 's', $builder->expr()->eq('f.storage', 's.numeric_id')) + ->where($builder->expr()->eq('storage_id', $builder->createNamedParameter(1001001, IQueryBuilder::PARAM_INT))); + + $query->andWhere($builder->expr()->eq('user_id', $builder->createNamedParameter('partitioned_test'))); + + $this->assertEquals(3, $query->getPartitionCount()); + + $results = $query->executeQuery()->fetchAll(); + $this->assertCount(1, $results); + $this->assertEquals($results[0]['user_id'], 'partitioned_test'); + $this->assertEquals($results[0]['mount_point'], '/mount/point'); + $this->assertEquals($results[0]['mount_provider_class'], 'test'); + $this->assertEquals($results[0]['path'], 'file1'); + $this->assertEquals($results[0]['storage_string_id'], 'test1'); + } +} From 82d7eaf80a545d69f99134de5cc08fcf7bd700e7 Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Wed, 31 Jul 2024 18:41:11 +0200 Subject: [PATCH 05/19] feat: implement distributing partitioned queries over multiple shards Signed-off-by: Robin Appelman --- lib/composer/composer/autoload_classmap.php | 10 + lib/composer/composer/autoload_static.php | 10 + lib/private/DB/Connection.php | 74 +++- lib/private/DB/ConnectionAdapter.php | 10 + lib/private/DB/ConnectionFactory.php | 19 +- .../DB/QueryBuilder/ExtendedQueryBuilder.php | 14 + .../Partitioned/JoinCondition.php | 2 +- .../Partitioned/PartitionQuery.php | 8 +- .../Partitioned/PartitionedQueryBuilder.php | 33 +- lib/private/DB/QueryBuilder/QueryBuilder.php | 2 +- .../Sharded/AutoIncrementHandler.php | 152 +++++++ .../Sharded/CrossShardMoveHelper.php | 162 +++++++ .../QueryBuilder/Sharded/HashShardMapper.php | 21 + .../Sharded/InvalidShardedQueryException.php | 29 ++ .../Sharded/RoundRobinShardMapper.php | 20 + .../Sharded/ShardConnectionManager.php | 43 ++ .../QueryBuilder/Sharded/ShardDefinition.php | 66 +++ .../QueryBuilder/Sharded/ShardQueryRunner.php | 197 +++++++++ .../Sharded/ShardedQueryBuilder.php | 403 ++++++++++++++++++ lib/private/DB/SchemaWrapper.php | 3 + lib/private/Files/Cache/Cache.php | 96 ++++- lib/private/Server.php | 2 +- lib/public/DB/QueryBuilder/IQueryBuilder.php | 27 ++ .../DB/QueryBuilder/Sharded/IShardMapper.php | 25 ++ lib/public/IDBConnection.php | 19 + tests/lib/DB/ConnectionFactoryTest.php | 4 +- .../PartitionedQueryBuilderTest.php | 9 +- .../Sharded/SharedQueryBuilderTest.php | 125 ++++++ tests/lib/Files/Cache/CacheTest.php | 17 + tests/lib/TestCase.php | 1 + 30 files changed, 1575 insertions(+), 28 deletions(-) create mode 100644 lib/private/DB/QueryBuilder/Sharded/AutoIncrementHandler.php create mode 100644 lib/private/DB/QueryBuilder/Sharded/CrossShardMoveHelper.php create mode 100644 lib/private/DB/QueryBuilder/Sharded/HashShardMapper.php create mode 100644 lib/private/DB/QueryBuilder/Sharded/InvalidShardedQueryException.php create mode 100644 lib/private/DB/QueryBuilder/Sharded/RoundRobinShardMapper.php create mode 100644 lib/private/DB/QueryBuilder/Sharded/ShardConnectionManager.php create mode 100644 lib/private/DB/QueryBuilder/Sharded/ShardDefinition.php create mode 100644 lib/private/DB/QueryBuilder/Sharded/ShardQueryRunner.php create mode 100644 lib/private/DB/QueryBuilder/Sharded/ShardedQueryBuilder.php create mode 100644 lib/public/DB/QueryBuilder/Sharded/IShardMapper.php create mode 100644 tests/lib/DB/QueryBuilder/Sharded/SharedQueryBuilderTest.php diff --git a/lib/composer/composer/autoload_classmap.php b/lib/composer/composer/autoload_classmap.php index dfb0449687179..4094e647204b5 100644 --- a/lib/composer/composer/autoload_classmap.php +++ b/lib/composer/composer/autoload_classmap.php @@ -247,6 +247,7 @@ 'OCP\\DB\\QueryBuilder\\IParameter' => $baseDir . '/lib/public/DB/QueryBuilder/IParameter.php', 'OCP\\DB\\QueryBuilder\\IQueryBuilder' => $baseDir . '/lib/public/DB/QueryBuilder/IQueryBuilder.php', 'OCP\\DB\\QueryBuilder\\IQueryFunction' => $baseDir . '/lib/public/DB/QueryBuilder/IQueryFunction.php', + 'OCP\\DB\\QueryBuilder\\Sharded\\IShardMapper' => $baseDir . '/lib/public/DB/QueryBuilder/Sharded/IShardMapper.php', 'OCP\\DB\\Types' => $baseDir . '/lib/public/DB/Types.php', 'OCP\\Dashboard\\IAPIWidget' => $baseDir . '/lib/public/Dashboard/IAPIWidget.php', 'OCP\\Dashboard\\IAPIWidgetV2' => $baseDir . '/lib/public/Dashboard/IAPIWidgetV2.php', @@ -1418,6 +1419,15 @@ 'OC\\DB\\QueryBuilder\\QueryBuilder' => $baseDir . '/lib/private/DB/QueryBuilder/QueryBuilder.php', 'OC\\DB\\QueryBuilder\\QueryFunction' => $baseDir . '/lib/private/DB/QueryBuilder/QueryFunction.php', 'OC\\DB\\QueryBuilder\\QuoteHelper' => $baseDir . '/lib/private/DB/QueryBuilder/QuoteHelper.php', + 'OC\\DB\\QueryBuilder\\Sharded\\AutoIncrementHandler' => $baseDir . '/lib/private/DB/QueryBuilder/Sharded/AutoIncrementHandler.php', + 'OC\\DB\\QueryBuilder\\Sharded\\CrossShardMoveHelper' => $baseDir . '/lib/private/DB/QueryBuilder/Sharded/CrossShardMoveHelper.php', + 'OC\\DB\\QueryBuilder\\Sharded\\HashShardMapper' => $baseDir . '/lib/private/DB/QueryBuilder/Sharded/HashShardMapper.php', + 'OC\\DB\\QueryBuilder\\Sharded\\InvalidShardedQueryException' => $baseDir . '/lib/private/DB/QueryBuilder/Sharded/InvalidShardedQueryException.php', + 'OC\\DB\\QueryBuilder\\Sharded\\RoundRobinShardMapper' => $baseDir . '/lib/private/DB/QueryBuilder/Sharded/RoundRobinShardMapper.php', + 'OC\\DB\\QueryBuilder\\Sharded\\ShardConnectionManager' => $baseDir . '/lib/private/DB/QueryBuilder/Sharded/ShardConnectionManager.php', + 'OC\\DB\\QueryBuilder\\Sharded\\ShardDefinition' => $baseDir . '/lib/private/DB/QueryBuilder/Sharded/ShardDefinition.php', + 'OC\\DB\\QueryBuilder\\Sharded\\ShardQueryRunner' => $baseDir . '/lib/private/DB/QueryBuilder/Sharded/ShardQueryRunner.php', + 'OC\\DB\\QueryBuilder\\Sharded\\ShardedQueryBuilder' => $baseDir . '/lib/private/DB/QueryBuilder/Sharded/ShardedQueryBuilder.php', 'OC\\DB\\ResultAdapter' => $baseDir . '/lib/private/DB/ResultAdapter.php', 'OC\\DB\\SQLiteMigrator' => $baseDir . '/lib/private/DB/SQLiteMigrator.php', 'OC\\DB\\SQLiteSessionInit' => $baseDir . '/lib/private/DB/SQLiteSessionInit.php', diff --git a/lib/composer/composer/autoload_static.php b/lib/composer/composer/autoload_static.php index 84e1c0a1188a6..24608983d2565 100644 --- a/lib/composer/composer/autoload_static.php +++ b/lib/composer/composer/autoload_static.php @@ -280,6 +280,7 @@ class ComposerStaticInit749170dad3f5e7f9ca158f5a9f04f6a2 'OCP\\DB\\QueryBuilder\\IParameter' => __DIR__ . '/../../..' . '/lib/public/DB/QueryBuilder/IParameter.php', 'OCP\\DB\\QueryBuilder\\IQueryBuilder' => __DIR__ . '/../../..' . '/lib/public/DB/QueryBuilder/IQueryBuilder.php', 'OCP\\DB\\QueryBuilder\\IQueryFunction' => __DIR__ . '/../../..' . '/lib/public/DB/QueryBuilder/IQueryFunction.php', + 'OCP\\DB\\QueryBuilder\\Sharded\\IShardMapper' => __DIR__ . '/../../..' . '/lib/public/DB/QueryBuilder/Sharded/IShardMapper.php', 'OCP\\DB\\Types' => __DIR__ . '/../../..' . '/lib/public/DB/Types.php', 'OCP\\Dashboard\\IAPIWidget' => __DIR__ . '/../../..' . '/lib/public/Dashboard/IAPIWidget.php', 'OCP\\Dashboard\\IAPIWidgetV2' => __DIR__ . '/../../..' . '/lib/public/Dashboard/IAPIWidgetV2.php', @@ -1451,6 +1452,15 @@ class ComposerStaticInit749170dad3f5e7f9ca158f5a9f04f6a2 'OC\\DB\\QueryBuilder\\QueryBuilder' => __DIR__ . '/../../..' . '/lib/private/DB/QueryBuilder/QueryBuilder.php', 'OC\\DB\\QueryBuilder\\QueryFunction' => __DIR__ . '/../../..' . '/lib/private/DB/QueryBuilder/QueryFunction.php', 'OC\\DB\\QueryBuilder\\QuoteHelper' => __DIR__ . '/../../..' . '/lib/private/DB/QueryBuilder/QuoteHelper.php', + 'OC\\DB\\QueryBuilder\\Sharded\\AutoIncrementHandler' => __DIR__ . '/../../..' . '/lib/private/DB/QueryBuilder/Sharded/AutoIncrementHandler.php', + 'OC\\DB\\QueryBuilder\\Sharded\\CrossShardMoveHelper' => __DIR__ . '/../../..' . '/lib/private/DB/QueryBuilder/Sharded/CrossShardMoveHelper.php', + 'OC\\DB\\QueryBuilder\\Sharded\\HashShardMapper' => __DIR__ . '/../../..' . '/lib/private/DB/QueryBuilder/Sharded/HashShardMapper.php', + 'OC\\DB\\QueryBuilder\\Sharded\\InvalidShardedQueryException' => __DIR__ . '/../../..' . '/lib/private/DB/QueryBuilder/Sharded/InvalidShardedQueryException.php', + 'OC\\DB\\QueryBuilder\\Sharded\\RoundRobinShardMapper' => __DIR__ . '/../../..' . '/lib/private/DB/QueryBuilder/Sharded/RoundRobinShardMapper.php', + 'OC\\DB\\QueryBuilder\\Sharded\\ShardConnectionManager' => __DIR__ . '/../../..' . '/lib/private/DB/QueryBuilder/Sharded/ShardConnectionManager.php', + 'OC\\DB\\QueryBuilder\\Sharded\\ShardDefinition' => __DIR__ . '/../../..' . '/lib/private/DB/QueryBuilder/Sharded/ShardDefinition.php', + 'OC\\DB\\QueryBuilder\\Sharded\\ShardQueryRunner' => __DIR__ . '/../../..' . '/lib/private/DB/QueryBuilder/Sharded/ShardQueryRunner.php', + 'OC\\DB\\QueryBuilder\\Sharded\\ShardedQueryBuilder' => __DIR__ . '/../../..' . '/lib/private/DB/QueryBuilder/Sharded/ShardedQueryBuilder.php', 'OC\\DB\\ResultAdapter' => __DIR__ . '/../../..' . '/lib/private/DB/ResultAdapter.php', 'OC\\DB\\SQLiteMigrator' => __DIR__ . '/../../..' . '/lib/private/DB/SQLiteMigrator.php', 'OC\\DB\\SQLiteSessionInit' => __DIR__ . '/../../..' . '/lib/private/DB/SQLiteSessionInit.php', diff --git a/lib/private/DB/Connection.php b/lib/private/DB/Connection.php index 87d21f5b13ef6..0f70e732865d6 100644 --- a/lib/private/DB/Connection.php +++ b/lib/private/DB/Connection.php @@ -23,12 +23,19 @@ use Doctrine\DBAL\Result; use Doctrine\DBAL\Schema\Schema; use Doctrine\DBAL\Statement; -use OC\DB\QueryBuilder\Partitioned\PartitionSplit; use OC\DB\QueryBuilder\Partitioned\PartitionedQueryBuilder; +use OC\DB\QueryBuilder\Partitioned\PartitionSplit; use OC\DB\QueryBuilder\QueryBuilder; +use OC\DB\QueryBuilder\Sharded\AutoIncrementHandler; +use OC\DB\QueryBuilder\Sharded\CrossShardMoveHelper; +use OC\DB\QueryBuilder\Sharded\RoundRobinShardMapper; +use OC\DB\QueryBuilder\Sharded\ShardConnectionManager; +use OC\DB\QueryBuilder\Sharded\ShardDefinition; use OC\SystemConfig; use OCP\DB\QueryBuilder\IQueryBuilder; +use OCP\DB\QueryBuilder\Sharded\IShardMapper; use OCP\Diagnostics\IEventLogger; +use OCP\ICacheFactory; use OCP\IDBConnection; use OCP\IRequestId; use OCP\PreConditionNotMetException; @@ -79,6 +86,10 @@ class Connection extends PrimaryReadReplicaConnection { /** @var array> */ protected array $partitions; + /** @var ShardDefinition[] */ + protected array $shards = []; + protected ShardConnectionManager $shardConnectionManager; + protected AutoIncrementHandler $autoIncrementHandler; /** * Initializes a new instance of the Connection class. @@ -104,6 +115,13 @@ public function __construct( $this->adapter = new $params['adapter']($this); $this->tablePrefix = $params['tablePrefix']; + /** @psalm-suppress InvalidArrayOffset */ + $this->shardConnectionManager = $this->params['shard_connection_manager'] ?? Server::get(ShardConnectionManager::class); + /** @psalm-suppress InvalidArrayOffset */ + $this->autoIncrementHandler = $this->params['auto_increment_handler'] ?? new AutoIncrementHandler( + Server::get(ICacheFactory::class), + $this->shardConnectionManager, + ); $this->systemConfig = \OC::$server->getSystemConfig(); $this->clock = Server::get(ClockInterface::class); $this->logger = Server::get(LoggerInterface::class); @@ -122,11 +140,44 @@ public function __construct( $this->_config->setSQLLogger($debugStack); } - $this->partitions = $this->systemConfig->getValue('db.partitions', []); + // todo: only allow specific, pre-defined shard configurations, the current config exists for easy testing setup + $this->shards = array_map(function (array $config) { + $shardMapperClass = $config['mapper'] ?? RoundRobinShardMapper::class; + $shardMapper = Server::get($shardMapperClass); + if (!$shardMapper instanceof IShardMapper) { + throw new \Exception("Invalid shard mapper: $shardMapperClass"); + } + return new ShardDefinition( + $config['table'], + $config['primary_key'], + $config['companion_keys'], + $config['shard_key'], + $shardMapper, + $config['companion_tables'], + $config['shards'] + ); + }, $this->params['sharding']); + $this->partitions = array_map(function (ShardDefinition $shard) { + return array_merge([$shard->table], $shard->companionTables); + }, $this->shards); $this->setNestTransactionsWithSavepoints(true); } + /** + * @return IDBConnection[] + */ + public function getShardConnections(): array { + $connections = []; + foreach ($this->shards as $shardDefinition) { + foreach ($shardDefinition->getAllShards() as $shard) { + /** @var ConnectionAdapter $connection */ + $connections[] = $this->shardConnectionManager->getConnection($shardDefinition, $shard); + } + } + return $connections; + } + /** * @throws Exception */ @@ -175,13 +226,19 @@ public function getStats(): array { */ public function getQueryBuilder(): IQueryBuilder { $this->queriesBuilt++; + $builder = new QueryBuilder( new ConnectionAdapter($this), $this->systemConfig, $this->logger ); if (count($this->partitions) > 0) { - $builder = new PartitionedQueryBuilder($builder); + $builder = new PartitionedQueryBuilder( + $builder, + $this->shards, + $this->shardConnectionManager, + $this->autoIncrementHandler, + ); foreach ($this->partitions as $name => $tables) { $partition = new PartitionSplit($name, $tables); $builder->addPartition($partition); @@ -703,6 +760,9 @@ public function migrateToSchema(Schema $toSchema, bool $dryRun = false) { return $migrator->generateChangeScript($toSchema); } else { $migrator->migrate($toSchema); + foreach ($this->getShardConnections() as $shardConnection) { + $shardConnection->migrateToSchema($toSchema); + } } } @@ -819,4 +879,12 @@ public function logDatabaseException(\Exception $exception): void { } } } + + public function getShardDefinition(string $name): ?ShardDefinition { + return $this->shards[$name] ?? null; + } + + public function getCrossShardMoveHelper(): CrossShardMoveHelper { + return new CrossShardMoveHelper($this->shardConnectionManager); + } } diff --git a/lib/private/DB/ConnectionAdapter.php b/lib/private/DB/ConnectionAdapter.php index 88083711195d3..2baeda9cfb7c2 100644 --- a/lib/private/DB/ConnectionAdapter.php +++ b/lib/private/DB/ConnectionAdapter.php @@ -12,6 +12,8 @@ use Doctrine\DBAL\Platforms\AbstractPlatform; use Doctrine\DBAL\Schema\Schema; use OC\DB\Exceptions\DbalException; +use OC\DB\QueryBuilder\Sharded\CrossShardMoveHelper; +use OC\DB\QueryBuilder\Sharded\ShardDefinition; use OCP\DB\IPreparedStatement; use OCP\DB\IResult; use OCP\DB\QueryBuilder\IQueryBuilder; @@ -244,4 +246,12 @@ public function getServerVersion(): string { public function logDatabaseException(\Exception $exception) { $this->inner->logDatabaseException($exception); } + + public function getShardDefinition(string $name): ?ShardDefinition { + return $this->inner->getShardDefinition($name); + } + + public function getCrossShardMoveHelper(): CrossShardMoveHelper { + return $this->inner->getCrossShardMoveHelper(); + } } diff --git a/lib/private/DB/ConnectionFactory.php b/lib/private/DB/ConnectionFactory.php index dd041f1e41df7..d8764f07753a2 100644 --- a/lib/private/DB/ConnectionFactory.php +++ b/lib/private/DB/ConnectionFactory.php @@ -11,7 +11,11 @@ use Doctrine\DBAL\Configuration; use Doctrine\DBAL\DriverManager; use Doctrine\DBAL\Event\Listeners\OracleSessionInit; +use OC\DB\QueryBuilder\Sharded\AutoIncrementHandler; +use OC\DB\QueryBuilder\Sharded\ShardConnectionManager; use OC\SystemConfig; +use OCP\ICacheFactory; +use OCP\Server; /** * Takes care of creating and configuring Doctrine connections. @@ -54,9 +58,12 @@ class ConnectionFactory { ], ]; + private ShardConnectionManager $shardConnectionManager; + private ICacheFactory $cacheFactory; public function __construct( - private SystemConfig $config + private SystemConfig $config, + ?ICacheFactory $cacheFactory = null, ) { if ($this->config->getValue('mysql.utf8mb4', false)) { $this->defaultConnectionParams['mysql']['charset'] = 'utf8mb4'; @@ -65,6 +72,8 @@ public function __construct( if ($collationOverride) { $this->defaultConnectionParams['mysql']['collation'] = $collationOverride; } + $this->shardConnectionManager = new ShardConnectionManager($this->config, $this); + $this->cacheFactory = $cacheFactory ?? Server::get(ICacheFactory::class); } /** @@ -214,6 +223,14 @@ public function createConnectionParams(string $configPrefix = '', array $additio if ($this->config->getValue('dbpersistent', false)) { $connectionParams['persistent'] = true; } + + $connectionParams['sharding'] = $this->config->getValue('dbsharding', []); + $connectionParams['shard_connection_manager'] = $this->shardConnectionManager; + $connectionParams['auto_increment_handler'] = new AutoIncrementHandler( + $this->cacheFactory, + $this->shardConnectionManager, + ); + $connectionParams = array_merge($connectionParams, $additionalConnectionParams); $replica = $this->config->getValue($configPrefix . 'dbreplica', $this->config->getValue('dbreplica', [])) ?: [$connectionParams]; diff --git a/lib/private/DB/QueryBuilder/ExtendedQueryBuilder.php b/lib/private/DB/QueryBuilder/ExtendedQueryBuilder.php index f96ed76f6bda8..8ed88198c19fc 100644 --- a/lib/private/DB/QueryBuilder/ExtendedQueryBuilder.php +++ b/lib/private/DB/QueryBuilder/ExtendedQueryBuilder.php @@ -289,7 +289,21 @@ public function executeStatement(?IDBConnection $connection = null): int { return $this->builder->executeStatement($connection); } + public function hintShardKey(string $column, mixed $value) { + $this->builder->hintShardKey($column, $value); + return $this; + } + + public function runAcrossAllShards() { + $this->builder->runAcrossAllShards(); + return $this; + } + public function getOutputColumns(): array { return $this->builder->getOutputColumns(); } + + public function prefixTableName(string $table): string { + return $this->builder->prefixTableName($table); + } } diff --git a/lib/private/DB/QueryBuilder/Partitioned/JoinCondition.php b/lib/private/DB/QueryBuilder/Partitioned/JoinCondition.php index 54d913251d477..ff4e1da70b966 100644 --- a/lib/private/DB/QueryBuilder/Partitioned/JoinCondition.php +++ b/lib/private/DB/QueryBuilder/Partitioned/JoinCondition.php @@ -44,7 +44,7 @@ public static function merge(array $conditions): JoinCondition { $fromConditions = []; $toConditions = []; foreach ($conditions as $condition) { - if (($condition->fromColumn && $fromColumn) ||($condition->toColumn && $toColumn)) { + if (($condition->fromColumn && $fromColumn) || ($condition->toColumn && $toColumn)) { throw new InvalidPartitionedQueryException("Can't join from {$condition->fromColumn} to {$condition->toColumn} as it already join froms {$fromColumn} to {$toColumn}"); } if ($condition->fromColumn) { diff --git a/lib/private/DB/QueryBuilder/Partitioned/PartitionQuery.php b/lib/private/DB/QueryBuilder/Partitioned/PartitionQuery.php index 8504c62d6d169..a5024b478d3d6 100644 --- a/lib/private/DB/QueryBuilder/Partitioned/PartitionQuery.php +++ b/lib/private/DB/QueryBuilder/Partitioned/PartitionQuery.php @@ -14,12 +14,12 @@ * A sub-query from a partitioned join */ class PartitionQuery { - const JOIN_MODE_INNER = 'inner'; - const JOIN_MODE_LEFT = 'left'; + public const JOIN_MODE_INNER = 'inner'; + public const JOIN_MODE_LEFT = 'left'; // left-join where the left side IS NULL - const JOIN_MODE_LEFT_NULL = 'left_null'; + public const JOIN_MODE_LEFT_NULL = 'left_null'; - const JOIN_MODE_RIGHT = 'right'; + public const JOIN_MODE_RIGHT = 'right'; public function __construct( public IQueryBuilder $query, diff --git a/lib/private/DB/QueryBuilder/Partitioned/PartitionedQueryBuilder.php b/lib/private/DB/QueryBuilder/Partitioned/PartitionedQueryBuilder.php index 1ce2bb67620ca..8fcde0d24ae29 100644 --- a/lib/private/DB/QueryBuilder/Partitioned/PartitionedQueryBuilder.php +++ b/lib/private/DB/QueryBuilder/Partitioned/PartitionedQueryBuilder.php @@ -8,18 +8,15 @@ namespace OC\DB\QueryBuilder\Partitioned; -use OC\DB\ConnectionAdapter; use OC\DB\QueryBuilder\CompositeExpression; -use OC\DB\QueryBuilder\ExtendedQueryBuilder; use OC\DB\QueryBuilder\QuoteHelper; +use OC\DB\QueryBuilder\Sharded\AutoIncrementHandler; use OC\DB\QueryBuilder\Sharded\ShardConnectionManager; use OC\DB\QueryBuilder\Sharded\ShardedQueryBuilder; -use OC\SystemConfig; use OCP\DB\IResult; use OCP\DB\QueryBuilder\IQueryBuilder; use OCP\DB\QueryBuilder\IQueryFunction; use OCP\IDBConnection; -use Psr\Log\LoggerInterface; /** * A special query builder that automatically splits queries that span across multiple database partitions[1]. @@ -38,7 +35,7 @@ * * [1]: A set of tables which can't be queried together with the rest of the tables, such as when sharding is used. */ -class PartitionedQueryBuilder extends ExtendedQueryBuilder { +class PartitionedQueryBuilder extends ShardedQueryBuilder { /** @var array $splitQueries */ private array $splitQueries = []; /** @var list */ @@ -53,14 +50,28 @@ class PartitionedQueryBuilder extends ExtendedQueryBuilder { private ?int $offset = null; public function __construct( - IQueryBuilder $builder, + IQueryBuilder $builder, + array $shardDefinitions, + ShardConnectionManager $shardConnectionManager, + AutoIncrementHandler $autoIncrementHandler, ) { - parent::__construct($builder); + parent::__construct($builder, $shardDefinitions, $shardConnectionManager, $autoIncrementHandler); $this->quoteHelper = new QuoteHelper(); } private function newQuery(): IQueryBuilder { - return $this->builder->getConnection()->getQueryBuilder(); + // get a fresh, non-partitioning query builder + $builder = $this->builder->getConnection()->getQueryBuilder(); + if ($builder instanceof PartitionedQueryBuilder) { + $builder = $builder->builder; + } + + return new ShardedQueryBuilder( + $builder, + $this->shardDefinitions, + $this->shardConnectionManager, + $this->autoIncrementHandler, + ); } // we need to save selects until we know all the table aliases @@ -70,8 +81,8 @@ public function select(...$selects) { return $this; } - public function addSelect(...$selects) { - $selects = array_map(function($select) { + public function addSelect(...$select) { + $select = array_map(function ($select) { return ['select' => $select, 'alias' => null]; }, $select); $this->selects = array_merge($this->selects, $select); @@ -281,7 +292,7 @@ private function splitPredicatesByParts(array $predicates): array { $partitionPredicates = []; foreach ($predicates as $predicate) { - $partition = $this->getPartitionForPredicate((string) $predicate); + $partition = $this->getPartitionForPredicate((string)$predicate); if ($this->mainPartition === $partition) { $partitionPredicates[''][] = $predicate; } elseif ($partition) { diff --git a/lib/private/DB/QueryBuilder/QueryBuilder.php b/lib/private/DB/QueryBuilder/QueryBuilder.php index 23dc564e94e72..912d48cd34ece 100644 --- a/lib/private/DB/QueryBuilder/QueryBuilder.php +++ b/lib/private/DB/QueryBuilder/QueryBuilder.php @@ -1329,7 +1329,7 @@ public function getTableName($table) { * @param string $table * @return string */ - public function prefixTableName($table) { + public function prefixTableName(string $table): string { if ($this->automaticTablePrefix === false || str_starts_with($table, '*PREFIX*')) { return $table; } diff --git a/lib/private/DB/QueryBuilder/Sharded/AutoIncrementHandler.php b/lib/private/DB/QueryBuilder/Sharded/AutoIncrementHandler.php new file mode 100644 index 0000000000000..553644def4e1b --- /dev/null +++ b/lib/private/DB/QueryBuilder/Sharded/AutoIncrementHandler.php @@ -0,0 +1,152 @@ + + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +namespace OC\DB\QueryBuilder\Sharded; + +use OCP\ICacheFactory; +use OCP\IMemcache; +use OCP\IMemcacheTTL; + +/** + * A helper to atomically determine the next auto increment value for a sharded table + * + * Since we can't use the database's auto-increment (since each db doesn't know about the keys in the other shards) + * we need external logic for doing the auto increment + */ +class AutoIncrementHandler { + public const MIN_VALID_KEY = 1000; + public const TTL = 365 * 24 * 60 * 60; + + private ?IMemcache $cache = null; + + public function __construct( + private ICacheFactory $cacheFactory, + private ShardConnectionManager $shardConnectionManager, + ) { + if (PHP_INT_SIZE < 8) { + throw new \Exception("sharding is only supported with 64bit php"); + } + } + + private function getCache(): IMemcache { + if(is_null($this->cache)) { + $cache = $this->cacheFactory->createDistributed("shared_autoincrement"); + if ($cache instanceof IMemcache) { + $this->cache = $cache; + } else { + throw new \Exception('Distributed cache ' . get_class($cache) . ' is not suitable'); + } + } + return $this->cache; + } + + /** + * Get the next value for the given shard definition + * + * The returned key is unique and incrementing, but not sequential. + * The shard id is encoded in the first byte of the returned value + * + * @param ShardDefinition $shardDefinition + * @return int + * @throws \Exception + */ + public function getNextPrimaryKey(ShardDefinition $shardDefinition, int $shard): int { + $retries = 0; + while ($retries < 5) { + $next = $this->getNextInner($shardDefinition); + if ($next !== null) { + if ($next > ShardDefinition::MAX_PRIMARY_KEY) { + throw new \Exception("Max primary key of " . ShardDefinition::MAX_PRIMARY_KEY . " exceeded"); + } + // we encode the shard the primary key was originally inserted into to allow guessing the shard by primary key later on + return ($next << 8) | $shard; + } else { + $retries++; + } + } + throw new \Exception("Failed to get next primary key"); + } + + /** + * auto increment logic without retry + * + * @param ShardDefinition $shardDefinition + * @return int|null either the next primary key or null if the call needs to be retried + */ + private function getNextInner(ShardDefinition $shardDefinition): ?int { + $cache = $this->getCache(); + // because this function will likely be called concurrently from different requests + // the implementation needs to ensure that the cached value can be cleared, invalidated or re-calculated at any point between our cache calls + // care must be taken that the logic remains fully resilient against race conditions + + // in the ideal case, the last primary key is stored in the cache and we can just do an `inc` + // if that is not the case we find the highest used id in the database increment it, and save it in the cache + + // prevent inc from returning `1` if the key doesn't exist by setting it to a non-numeric value + $cache->add($shardDefinition->table, "empty-placeholder", self::TTL); + $next = $cache->inc($shardDefinition->table); + + if ($cache instanceof IMemcacheTTL) { + $cache->setTTL($shardDefinition->table, self::TTL); + } + + // the "add + inc" trick above isn't strictly atomic, so as a safety we reject any result that to small + // to handle the edge case of the stored value disappearing between the add and inc + if (is_int($next) && $next >= self::MIN_VALID_KEY) { + return $next; + } elseif (is_int($next)) { + // we hit the edge case, so invalidate the cached value + if (!$cache->cas($shardDefinition->table, $next, "empty-placeholder")) { + // someone else is changing the value concurrently, give up and retry + return null; + } + } + + // discard the encoded initial shard + $current = $this->getMaxFromDb($shardDefinition) >> 8; + $next = max($current, self::MIN_VALID_KEY) + 1; + if ($cache->cas($shardDefinition->table, "empty-placeholder", $next)) { + return $next; + } + + // another request set the cached value before us, so we should just be able to inc + $next = $cache->inc($shardDefinition->table); + if (is_int($next) && $next >= self::MIN_VALID_KEY) { + return $next; + } elseif(is_int($next)) { + // key got cleared, invalidate and retry + $cache->cas($shardDefinition->table, $next, "empty-placeholder"); + return null; + } else { + // cleanup any non-numeric value other than the placeholder if that got stored somehow + $cache->ncad($shardDefinition->table, "empty-placeholder"); + // retry + return null; + } + } + + /** + * Get the maximum primary key value from the shards + */ + private function getMaxFromDb(ShardDefinition $shardDefinition): int { + $max = 0; + foreach ($shardDefinition->getAllShards() as $shard) { + $connection = $this->shardConnectionManager->getConnection($shardDefinition, $shard); + $query = $connection->getQueryBuilder(); + $query->select($shardDefinition->primaryKey) + ->from($shardDefinition->table) + ->orderBy($shardDefinition->primaryKey, "DESC") + ->setMaxResults(1); + $result = $query->executeQuery()->fetchOne(); + if ($result) { + $max = max($max, $result); + } + } + return $max; + } +} diff --git a/lib/private/DB/QueryBuilder/Sharded/CrossShardMoveHelper.php b/lib/private/DB/QueryBuilder/Sharded/CrossShardMoveHelper.php new file mode 100644 index 0000000000000..ffc95e4e54c05 --- /dev/null +++ b/lib/private/DB/QueryBuilder/Sharded/CrossShardMoveHelper.php @@ -0,0 +1,162 @@ + + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +namespace OC\DB\QueryBuilder\Sharded; + +use OCP\DB\QueryBuilder\IQueryBuilder; +use OCP\IDBConnection; + +/** + * Utility methods for implementing logic that moves data across shards + */ +class CrossShardMoveHelper { + public function __construct( + private ShardConnectionManager $connectionManager + ) { + } + + public function getConnection(ShardDefinition $shardDefinition, int $shardKey): IDBConnection { + return $this->connectionManager->getConnection($shardDefinition, $shardDefinition->getShardForKey($shardKey)); + } + + /** + * Update the shard key of a set of rows, moving them to a different shard if needed + * + * @param ShardDefinition $shardDefinition + * @param string $table + * @param string $shardColumn + * @param int $sourceShardKey + * @param int $targetShardKey + * @param string $primaryColumn + * @param int[] $primaryKeys + * @return void + */ + public function moveCrossShards(ShardDefinition $shardDefinition, string $table, string $shardColumn, int $sourceShardKey, int $targetShardKey, string $primaryColumn, array $primaryKeys): void { + $sourceShard = $shardDefinition->getShardForKey($sourceShardKey); + $targetShard = $shardDefinition->getShardForKey($targetShardKey); + $sourceConnection = $this->connectionManager->getConnection($shardDefinition, $sourceShard); + if ($sourceShard === $targetShard) { + $this->updateItems($sourceConnection, $table, $shardColumn, $targetShardKey, $primaryColumn, $primaryKeys); + + return; + } + $targetConnection = $this->connectionManager->getConnection($shardDefinition, $targetShard); + + $sourceItems = $this->loadItems($sourceConnection, $table, $primaryColumn, $primaryKeys); + foreach ($sourceItems as &$sourceItem) { + $sourceItem[$shardColumn] = $targetShardKey; + } + if (!$sourceItems) { + return; + } + + $sourceConnection->beginTransaction(); + $targetConnection->beginTransaction(); + try { + $this->saveItems($targetConnection, $table, $sourceItems); + $this->deleteItems($sourceConnection, $table, $primaryColumn, $primaryKeys); + + $targetConnection->commit(); + $sourceConnection->commit(); + } catch (\Exception $e) { + $sourceConnection->rollback(); + $targetConnection->rollback(); + throw $e; + } + } + + /** + * Load rows from a table to move + * + * @param IDBConnection $connection + * @param string $table + * @param string $primaryColumn + * @param int[] $primaryKeys + * @return array[] + */ + public function loadItems(IDBConnection $connection, string $table, string $primaryColumn, array $primaryKeys): array { + $query = $connection->getQueryBuilder(); + $query->select('*') + ->from($table) + ->where($query->expr()->in($primaryColumn, $query->createParameter("keys"))); + + $chunks = array_chunk($primaryKeys, 1000); + + $results = []; + foreach ($chunks as $chunk) { + $query->setParameter("keys", $chunk, IQueryBuilder::PARAM_INT_ARRAY); + $results = array_merge($results, $query->execute()->fetchAll()); + } + + return $results; + } + + /** + * Save modified rows + * + * @param IDBConnection $connection + * @param string $table + * @param array[] $items + * @return void + */ + public function saveItems(IDBConnection $connection, string $table, array $items): void { + if (count($items) === 0) { + return; + } + $query = $connection->getQueryBuilder(); + $query->insert($table); + foreach ($items[0] as $column => $value) { + $query->setValue($column, $query->createParameter($column)); + } + + foreach ($items as $item) { + foreach ($item as $column => $value) { + if (is_int($column)) { + $query->setParameter($column, $value, IQueryBuilder::PARAM_INT); + } else { + $query->setParameter($column, $value); + } + } + $query->executeStatement(); + } + } + + /** + * @param IDBConnection $connection + * @param string $table + * @param string $primaryColumn + * @param int[] $primaryKeys + * @return void + */ + public function updateItems(IDBConnection $connection, string $table, string $shardColumn, int $targetShardKey, string $primaryColumn, array $primaryKeys): void { + $query = $connection->getQueryBuilder(); + $query->update($table) + ->set($shardColumn, $query->createNamedParameter($targetShardKey, IQueryBuilder::PARAM_INT)) + ->where($query->expr()->in($primaryColumn, $query->createNamedParameter($primaryKeys, IQueryBuilder::PARAM_INT_ARRAY))); + $query->executeQuery()->fetchAll(); + } + + /** + * @param IDBConnection $connection + * @param string $table + * @param string $primaryColumn + * @param int[] $primaryKeys + * @return void + */ + public function deleteItems(IDBConnection $connection, string $table, string $primaryColumn, array $primaryKeys): void { + $query = $connection->getQueryBuilder(); + $query->delete($table) + ->where($query->expr()->in($primaryColumn, $query->createParameter("keys"))); + $chunks = array_chunk($primaryKeys, 1000); + + foreach ($chunks as $chunk) { + $query->setParameter("keys", $chunk, IQueryBuilder::PARAM_INT_ARRAY); + $query->executeStatement(); + } + } +} diff --git a/lib/private/DB/QueryBuilder/Sharded/HashShardMapper.php b/lib/private/DB/QueryBuilder/Sharded/HashShardMapper.php new file mode 100644 index 0000000000000..af778489a2dfd --- /dev/null +++ b/lib/private/DB/QueryBuilder/Sharded/HashShardMapper.php @@ -0,0 +1,21 @@ + + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +namespace OC\DB\QueryBuilder\Sharded; + +use OCP\DB\QueryBuilder\Sharded\IShardMapper; + +/** + * Map string key to an int-range by hashing the key + */ +class HashShardMapper implements IShardMapper { + public function getShardForKey(int $key, int $count): int { + $int = unpack('L', substr(md5((string)$key, true), 0, 4))[1]; + return $int % $count; + } +} diff --git a/lib/private/DB/QueryBuilder/Sharded/InvalidShardedQueryException.php b/lib/private/DB/QueryBuilder/Sharded/InvalidShardedQueryException.php new file mode 100644 index 0000000000000..733a6acaf9da2 --- /dev/null +++ b/lib/private/DB/QueryBuilder/Sharded/InvalidShardedQueryException.php @@ -0,0 +1,29 @@ + + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +namespace OC\DB\QueryBuilder\Sharded; + +use OCP\DB\QueryBuilder\Sharded\IShardMapper; + +/** + * Map string key to an int-range by hashing the key + */ +class RoundRobinShardMapper implements IShardMapper { + public function getShardForKey(int $key, int $count): int { + return $key % $count; + } +} diff --git a/lib/private/DB/QueryBuilder/Sharded/ShardConnectionManager.php b/lib/private/DB/QueryBuilder/Sharded/ShardConnectionManager.php new file mode 100644 index 0000000000000..87cac58bc57ca --- /dev/null +++ b/lib/private/DB/QueryBuilder/Sharded/ShardConnectionManager.php @@ -0,0 +1,43 @@ + + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +namespace OC\DB\QueryBuilder\Sharded; + +use OC\DB\ConnectionAdapter; +use OC\DB\ConnectionFactory; +use OC\SystemConfig; +use OCP\IDBConnection; + +/** + * Keeps track of the db connections to the various shards + */ +class ShardConnectionManager { + /** @var array */ + private array $connections = []; + + public function __construct( + private SystemConfig $config, + private ConnectionFactory $factory, + ) { + } + + public function getConnection(ShardDefinition $shardDefinition, int $shard): IDBConnection { + $connectionKey = $shardDefinition->table . '_' . $shard; + if (!isset($this->connections[$connectionKey])) { + $this->connections[$connectionKey] = $this->createConnection($shardDefinition->shards[$shard]); + } + + return $this->connections[$connectionKey]; + } + + private function createConnection(array $shardConfig): IDBConnection { + $shardConfig['sharding'] = []; + $type = $this->config->getValue('dbtype', 'sqlite'); + return new ConnectionAdapter($this->factory->getConnection($type, $shardConfig)); + } +} diff --git a/lib/private/DB/QueryBuilder/Sharded/ShardDefinition.php b/lib/private/DB/QueryBuilder/Sharded/ShardDefinition.php new file mode 100644 index 0000000000000..5661ca079e1c1 --- /dev/null +++ b/lib/private/DB/QueryBuilder/Sharded/ShardDefinition.php @@ -0,0 +1,66 @@ + + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +namespace OC\DB\QueryBuilder\Sharded; + +use OCP\DB\QueryBuilder\Sharded\IShardMapper; + +/** + * Configuration for a shard setup + */ +class ShardDefinition { + // we reserve the bottom byte of the primary key for the initial shard, so the total shard count is limited to what we can fit there + public const MAX_SHARDS = 256; + + public const PRIMARY_KEY_MASK = 0x7F_FF_FF_FF_FF_FF_FF_00; + public const PRIMARY_KEY_SHARD_MASK = 0x00_00_00_00_00_00_00_FF; + // since we reserve 1 byte for the shard index, we only have 56 bits of primary key space + public const MAX_PRIMARY_KEY = PHP_INT_MAX >> 8; + + /** + * @param string $table + * @param string $primaryKey + * @param string $shardKey + * @param string[] $companionKeys + * @param IShardMapper $shardMapper + * @param string[] $companionTables + * @param array $shards + */ + public function __construct( + public string $table, + public string $primaryKey, + public array $companionKeys, + public string $shardKey, + public IShardMapper $shardMapper, + public array $companionTables = [], + public array $shards = [], + ) { + if (count($this->shards) >= self::MAX_SHARDS) { + throw new \Exception("Only allowed maximum of " . self::MAX_SHARDS . " shards allowed"); + } + } + + public function hasTable(string $table): bool { + if ($this->table === $table) { + return true; + } + return in_array($table, $this->companionTables); + } + + public function getShardForKey(int $key): int { + return $this->shardMapper->getShardForKey($key, count($this->shards)); + } + + public function getAllShards(): array { + return array_keys($this->shards); + } + + public function isKey(string $column): bool { + return $column === $this->primaryKey || in_array($column, $this->companionKeys); + } +} diff --git a/lib/private/DB/QueryBuilder/Sharded/ShardQueryRunner.php b/lib/private/DB/QueryBuilder/Sharded/ShardQueryRunner.php new file mode 100644 index 0000000000000..22b86a018b3dc --- /dev/null +++ b/lib/private/DB/QueryBuilder/Sharded/ShardQueryRunner.php @@ -0,0 +1,197 @@ + + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +namespace OC\DB\QueryBuilder\Sharded; + +use OC\DB\ArrayResult; +use OCP\DB\IResult; +use OCP\DB\QueryBuilder\IQueryBuilder; + +/** + * Logic for running a query across a number of shards, combining the results + */ +class ShardQueryRunner { + public function __construct( + private ShardConnectionManager $shardConnectionManager, + private ShardDefinition $shardDefinition, + ) { + } + + /** + * Get the shards for a specific query or null if the shards aren't known in advance + * + * @param bool $allShards + * @param int[] $shardKeys + * @return null|int[] + */ + public function getShards(bool $allShards, array $shardKeys): ?array { + if ($allShards) { + return $this->shardDefinition->getAllShards(); + } + $allConfiguredShards = $this->shardDefinition->getAllShards(); + if (count($allConfiguredShards) === 1) { + return $allConfiguredShards; + } + if (empty($shardKeys)) { + return null; + } + $shards = array_map(function ($shardKey) { + return $this->shardDefinition->getShardForKey((int)$shardKey); + }, $shardKeys); + return array_values(array_unique($shards)); + } + + /** + * Try to get the shards that the keys are likely to be in, based on the shard the row was created + * + * @param int[] $primaryKeys + * @return int[] + */ + private function getLikelyShards(array $primaryKeys): array { + $shards = []; + foreach ($primaryKeys as $primaryKey) { + $encodedShard = $primaryKey & ShardDefinition::PRIMARY_KEY_SHARD_MASK; + if ($encodedShard < count($this->shardDefinition->shards) && !in_array($encodedShard, $shards)) { + $shards[] = $encodedShard; + } + } + return $shards; + } + + /** + * Execute a SELECT statement across the configured shards + * + * @param IQueryBuilder $query + * @param bool $allShards + * @param int[] $shardKeys + * @param int[] $primaryKeys + * @param array{column: string, order: string}[] $sortList + * @param int|null $limit + * @param int|null $offset + * @return IResult + */ + public function executeQuery( + IQueryBuilder $query, + bool $allShards, + array $shardKeys, + array $primaryKeys, + ?array $sortList = null, + ?int $limit = null, + ?int $offset = null, + ): IResult { + $shards = $this->getShards($allShards, $shardKeys); + $results = []; + if ($shards && count($shards) === 1) { + // trivial case + return $query->executeQuery($this->shardConnectionManager->getConnection($this->shardDefinition, $shards[0])); + } + // we have to emulate limit and offset, so we select offset+limit from all shards to ensure we have enough rows + // and then filter them down after we merged the results + if ($limit !== null && $offset !== null) { + $query->setMaxResults($limit + $offset); + } + + if ($shards) { + // we know exactly what shards we need to query + foreach ($shards as $shard) { + $shardConnection = $this->shardConnectionManager->getConnection($this->shardDefinition, $shard); + $subResult = $query->executeQuery($shardConnection); + $results = array_merge($results, $subResult->fetchAll()); + $subResult->closeCursor(); + } + } else { + // we don't know for sure what shards we need to query, + // we first try the shards that are "likely" to have the rows we want, based on the shard that the row was + // originally created in. If we then still haven't found all rows we try the rest of the shards + $likelyShards = $this->getLikelyShards($primaryKeys); + $unlikelyShards = array_diff($this->shardDefinition->getAllShards(), $likelyShards); + $shards = array_merge($likelyShards, $unlikelyShards); + + foreach ($shards as $shard) { + $shardConnection = $this->shardConnectionManager->getConnection($this->shardDefinition, $shard); + $subResult = $query->executeQuery($shardConnection); + $rows = $subResult->fetchAll(); + $results = array_merge($results, $rows); + $subResult->closeCursor(); + + if (count($rows) >= count($primaryKeys)) { + // we have all the rows we're looking for + break; + } + } + } + + if ($sortList) { + usort($results, function ($a, $b) use ($sortList) { + foreach ($sortList as $sort) { + $valueA = $a[$sort['column']] ?? null; + $valueB = $b[$sort['column']] ?? null; + $cmp = $valueA <=> $valueB; + if ($cmp === 0) { + continue; + } + if ($sort['order'] === "DESC") { + $cmp = -$cmp; + } + return $cmp; + } + }); + } + + if ($limit !== null && $offset !== null) { + $results = array_slice($results, $offset, $limit); + } elseif ($limit !== null) { + $results = array_slice($results, 0, $limit); + } elseif ($offset !== null) { + $results = array_slice($results, $offset); + } + + return new ArrayResult($results); + } + + /** + * Execute an UPDATE or DELETE statement + * + * @param IQueryBuilder $query + * @param bool $allShards + * @param int[] $shardKeys + * @param int[] $primaryKeys + * @return int + * @throws \OCP\DB\Exception + */ + public function executeStatement(IQueryBuilder $query, bool $allShards, array $shardKeys, array $primaryKeys): int { + if ($query->getType() === \Doctrine\DBAL\Query\QueryBuilder::INSERT) { + throw new \Exception("insert queries need special handling"); + } + + $shards = $this->getShards($allShards, $shardKeys); + $maxCount = count($primaryKeys); + if ($shards && count($shards) === 1) { + return $query->executeStatement($this->shardConnectionManager->getConnection($this->shardDefinition, $shards[0])); + } elseif ($shards) { + $maxCount = PHP_INT_MAX; + } else { + // sort the likely shards before the rest, similar logic to `self::executeQuery` + $likelyShards = $this->getLikelyShards($primaryKeys); + $unlikelyShards = array_diff($this->shardDefinition->getAllShards(), $likelyShards); + $shards = array_merge($likelyShards, $unlikelyShards); + } + + $count = 0; + + foreach ($shards as $shard) { + $shardConnection = $this->shardConnectionManager->getConnection($this->shardDefinition, $shard); + $count += $query->executeStatement($shardConnection); + + if ($count >= $maxCount) { + break; + } + } + return $count; + } +} diff --git a/lib/private/DB/QueryBuilder/Sharded/ShardedQueryBuilder.php b/lib/private/DB/QueryBuilder/Sharded/ShardedQueryBuilder.php new file mode 100644 index 0000000000000..6496453a1a609 --- /dev/null +++ b/lib/private/DB/QueryBuilder/Sharded/ShardedQueryBuilder.php @@ -0,0 +1,403 @@ + + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +namespace OC\DB\QueryBuilder\Sharded; + +use OC\DB\QueryBuilder\CompositeExpression; +use OC\DB\QueryBuilder\ExtendedQueryBuilder; +use OC\DB\QueryBuilder\Parameter; +use OCP\DB\IResult; +use OCP\DB\QueryBuilder\IQueryBuilder; +use OCP\IDBConnection; + +/** + * A special query builder that automatically distributes queries over multiple database shards. + * + * This relies on `PartitionedQueryBuilder` to handle splitting of parts of the query that touch the sharded tables + * from the non-sharded tables. So the query build here should only either touch only sharded table or only non-sharded tables. + * + * Most of the logic in this class is concerned with extracting either the shard key (e.g. "storage") or primary key (e.g. "fileid") + * from the query. The logic for actually running the query across the shards is mostly delegated to `ShardQueryRunner`. + */ +class ShardedQueryBuilder extends ExtendedQueryBuilder { + private array $shardKeys = []; + private array $primaryKeys = []; + private ?ShardDefinition $shardDefinition = null; + /** @var bool Run the query across all shards */ + private bool $allShards = false; + private ?string $insertTable = null; + private mixed $lastInsertId = null; + private ?IDBConnection $lastInsertConnection = null; + private ?int $updateShardKey = null; + private ?int $limit = null; + private ?int $offset = null; + /** @var array{column: string, order: string}[] */ + private array $sortList = []; + private string $mainTable = ''; + + public function __construct( + IQueryBuilder $builder, + protected array $shardDefinitions, + protected ShardConnectionManager $shardConnectionManager, + protected AutoIncrementHandler $autoIncrementHandler, + ) { + parent::__construct($builder); + } + + public function getShardKeys(): array { + return $this->getKeyValues($this->shardKeys); + } + + public function getPrimaryKeys(): array { + return $this->getKeyValues($this->primaryKeys); + } + + private function getKeyValues(array $keys): array { + $values = []; + foreach ($keys as $key) { + $values = array_merge($values, $this->getKeyValue($key)); + } + return array_values(array_unique($values)); + } + + private function getKeyValue($value): array { + if ($value instanceof Parameter) { + $value = (string)$value; + } + if (is_string($value) && str_starts_with($value, ':')) { + $param = $this->getParameter(substr($value, 1)); + if (is_array($param)) { + return $param; + } else { + return [$param]; + } + } elseif ($value !== null) { + return [$value]; + } else { + return []; + } + } + + public function where(...$predicates) { + return $this->andWhere(...$predicates); + } + + public function andWhere(...$where) { + if ($where) { + foreach ($where as $predicate) { + $this->tryLoadShardKey($predicate); + } + parent::andWhere(...$where); + } + return $this; + } + + private function tryLoadShardKey($predicate): void { + if (!$this->shardDefinition) { + return; + } + if ($keys = $this->tryExtractShardKeys($predicate, $this->shardDefinition->shardKey)) { + $this->shardKeys += $keys; + } + if ($keys = $this->tryExtractShardKeys($predicate, $this->shardDefinition->primaryKey)) { + $this->primaryKeys += $keys; + } + foreach ($this->shardDefinition->companionKeys as $companionKey) { + if ($keys = $this->tryExtractShardKeys($predicate, $companionKey)) { + $this->primaryKeys += $keys; + } + } + } + + /** + * @param $predicate + * @param string $column + * @return string[] + */ + private function tryExtractShardKeys($predicate, string $column): array { + if ($predicate instanceof CompositeExpression) { + $values = []; + foreach ($predicate->getParts() as $part) { + $partValues = $this->tryExtractShardKeys($part, $column); + // for OR expressions, we can only rely on the predicate if all parts contain the comparison + if ($predicate->getType() === CompositeExpression::TYPE_OR && !$partValues) { + return []; + } + $values = array_merge($values, $partValues); + } + return $values; + } + $predicate = (string)$predicate; + // expect a condition in the form of 'alias1.column1 = placeholder' or 'alias1.column1 in placeholder' + if (substr_count($predicate, ' ') > 2) { + return []; + } + if (str_contains($predicate, ' = ')) { + $parts = explode(' = ', $predicate); + if ($parts[0] === "`{$column}`" || str_ends_with($parts[0], "`.`{$column}`")) { + return [$parts[1]]; + } else { + return []; + } + } + + if (str_contains($predicate, ' IN ')) { + $parts = explode(' IN ', $predicate); + if ($parts[0] === "`{$column}`" || str_ends_with($parts[0], "`.`{$column}`")) { + return [trim(trim($parts[1], '('), ')')]; + } else { + return []; + } + } + + return []; + } + + public function set($key, $value) { + if ($this->shardDefinition && $key === $this->shardDefinition->shardKey) { + $updateShardKey = $value; + } + return parent::set($key, $value); + } + + public function setValue($column, $value) { + if ($this->shardDefinition) { + if ($this->shardDefinition->isKey($column)) { + $this->primaryKeys[] = $value; + } + if ($column === $this->shardDefinition->shardKey) { + $this->shardKeys[] = $value; + } + } + return parent::setValue($column, $value); + } + + public function values(array $values) { + foreach ($values as $column => $value) { + $this->setValue($column, $value); + } + return $this; + } + + private function actOnTable(string $table): void { + $this->mainTable = $table; + foreach ($this->shardDefinitions as $shardDefinition) { + if ($shardDefinition->hasTable($table)) { + $this->shardDefinition = $shardDefinition; + } + } + } + + public function from($from, $alias = null) { + if (is_string($from) && $from) { + $this->actOnTable($from); + } + return parent::from($from, $alias); + } + + public function update($update = null, $alias = null) { + if (is_string($update) && $update) { + $this->actOnTable($update); + } + return parent::update($update, $alias); + } + + public function insert($insert = null) { + if (is_string($insert) && $insert) { + $this->insertTable = $insert; + $this->actOnTable($insert); + } + return parent::insert($insert); + } + + public function delete($delete = null, $alias = null) { + if (is_string($delete) && $delete) { + $this->actOnTable($delete); + } + return parent::delete($delete, $alias); + } + + private function checkJoin(string $table): void { + if ($this->shardDefinition) { + if ($table === $this->mainTable) { + throw new InvalidShardedQueryException("Sharded query on {$this->mainTable} isn't allowed to join on itself"); + } + if (!$this->shardDefinition->hasTable($table)) { + // this generally shouldn't happen as the partitioning logic should prevent this + // but the check is here just in case + throw new InvalidShardedQueryException("Sharded query on {$this->shardDefinition->table} isn't allowed to join on $table"); + } + } + } + + public function innerJoin($fromAlias, $join, $alias, $condition = null) { + $this->checkJoin($join); + return parent::innerJoin($fromAlias, $join, $alias, $condition); + } + + public function leftJoin($fromAlias, $join, $alias, $condition = null) { + $this->checkJoin($join); + return parent::leftJoin($fromAlias, $join, $alias, $condition); + } + + public function rightJoin($fromAlias, $join, $alias, $condition = null) { + if ($this->shardDefinition) { + throw new InvalidShardedQueryException("Sharded query on {$this->shardDefinition->table} isn't allowed to right join"); + } + return parent::rightJoin($fromAlias, $join, $alias, $condition); + } + + public function join($fromAlias, $join, $alias, $condition = null) { + return $this->innerJoin($fromAlias, $join, $alias, $condition); + } + + public function setMaxResults($maxResults) { + if ($maxResults > 0) { + $this->limit = (int)$maxResults; + } + return parent::setMaxResults($maxResults); + } + + public function setFirstResult($firstResult) { + if ($firstResult > 0) { + $this->offset = (int)$firstResult; + } + if ($this->shardDefinition && count($this->shardDefinition->shards) > 1) { + // we have to emulate offset + return $this; + } else { + return parent::setFirstResult($firstResult); + } + } + + public function addOrderBy($sort, $order = null) { + $this->registerOrder((string) $sort, (string)$order ?? "ASC"); + return parent::orderBy($sort, $order); + } + + public function orderBy($sort, $order = null) { + $this->sortList = []; + $this->registerOrder((string) $sort, (string)$order ?? "ASC"); + return parent::orderBy($sort, $order); + } + + private function registerOrder(string $column, string $order): void { + // handle `mime + 0` and similar by just sorting on the first part of the expression + [$column] = explode(' ', $column); + $column = trim($column, '`'); + $this->sortList[] = [ + 'column' => $column, + 'order' => strtoupper($order), + ]; + } + + public function hintShardKey(string $column, mixed $value) { + if ($this->shardDefinition?->isKey($column)) { + $this->primaryKeys[] = $value; + } + if ($column === $this->shardDefinition?->shardKey) { + $this->shardKeys[] = $value; + } + return $this; + } + + public function runAcrossAllShards() { + $this->allShards = true; + return $this; + } + + /** + * @throws InvalidShardedQueryException + */ + public function validate(): void { + if ($this->shardDefinition && $this->insertTable) { + if ($this->allShards) { + throw new InvalidShardedQueryException("Can't insert across all shards"); + } + if (empty($this->getShardKeys())) { + throw new InvalidShardedQueryException("Can't insert without shard key"); + } + } + if ($this->shardDefinition && !$this->allShards) { + if (empty($this->getShardKeys()) && empty($this->getPrimaryKeys())) { + throw new InvalidShardedQueryException("No shard key or primary key set for query"); + } + } + if ($this->shardDefinition && $this->updateShardKey) { + $newShardKey = $this->getKeyValue($this->updateShardKey); + $oldShardKeys = $this->getShardKeys(); + if (count($newShardKey) !== 1) { + throw new InvalidShardedQueryException("Can't set shard key to an array"); + } + $newShardKey = current($newShardKey); + if (empty($oldShardKeys)) { + throw new InvalidShardedQueryException("Can't update without shard key"); + } + $oldShards = array_values(array_unique(array_map(function ($shardKey) { + return $this->shardDefinition->getShardForKey((int)$shardKey); + }, $oldShardKeys))); + $newShard = $this->shardDefinition->getShardForKey((int)$newShardKey); + if ($oldShards === [$newShard]) { + throw new InvalidShardedQueryException("Update statement would move rows to a different shard"); + } + } + } + + public function executeQuery(?IDBConnection $connection = null): IResult { + $this->validate(); + if ($this->shardDefinition) { + $runner = new ShardQueryRunner($this->shardConnectionManager, $this->shardDefinition); + return $runner->executeQuery($this->builder, $this->allShards, $this->getShardKeys(), $this->getPrimaryKeys(), $this->sortList, $this->limit, $this->offset); + } + return parent::executeQuery($connection); + } + + public function executeStatement(?IDBConnection $connection = null): int { + $this->validate(); + if ($this->shardDefinition) { + $runner = new ShardQueryRunner($this->shardConnectionManager, $this->shardDefinition); + if ($this->insertTable) { + $shards = $runner->getShards($this->allShards, $this->getShardKeys()); + if (!$shards) { + throw new InvalidShardedQueryException("Can't insert without shard key"); + } + $count = 0; + foreach ($shards as $shard) { + $shardConnection = $this->shardConnectionManager->getConnection($this->shardDefinition, $shard); + if (!$this->primaryKeys && $this->shardDefinition->table === $this->insertTable) { + $id = $this->autoIncrementHandler->getNextPrimaryKey($this->shardDefinition, $shard); + parent::setValue($this->shardDefinition->primaryKey, $this->createParameter('__generated_primary_key')); + $this->setParameter('__generated_primary_key', $id, self::PARAM_INT); + $this->lastInsertId = $id; + } + $count += parent::executeStatement($shardConnection); + + $this->lastInsertConnection = $shardConnection; + } + return $count; + } else { + return $runner->executeStatement($this->builder, $this->allShards, $this->getShardKeys(), $this->getPrimaryKeys()); + } + } + return parent::executeStatement($connection); + } + + public function getLastInsertId(): int { + if ($this->lastInsertId) { + return $this->lastInsertId; + } + if ($this->lastInsertConnection) { + $table = $this->builder->prefixTableName($this->insertTable); + return $this->lastInsertConnection->lastInsertId($table); + } else { + return parent::getLastInsertId(); + } + } + + +} diff --git a/lib/private/DB/SchemaWrapper.php b/lib/private/DB/SchemaWrapper.php index 5720e10fbdbeb..473c000923798 100644 --- a/lib/private/DB/SchemaWrapper.php +++ b/lib/private/DB/SchemaWrapper.php @@ -36,6 +36,9 @@ public function getWrappedSchema() { public function performDropTableCalls() { foreach ($this->tablesToDelete as $tableName => $true) { $this->connection->dropTable($tableName); + foreach ($this->connection->getShardConnections() as $shardConnection) { + $shardConnection->dropTable($tableName); + } unset($this->tablesToDelete[$tableName]); } } diff --git a/lib/private/Files/Cache/Cache.php b/lib/private/Files/Cache/Cache.php index a4290549dd92c..622381dc507d3 100644 --- a/lib/private/Files/Cache/Cache.php +++ b/lib/private/Files/Cache/Cache.php @@ -9,6 +9,7 @@ use Doctrine\DBAL\Exception\UniqueConstraintViolationException; use OC\DB\Exceptions\DbalException; +use OC\DB\QueryBuilder\Sharded\ShardDefinition; use OC\Files\Search\SearchComparison; use OC\Files\Search\SearchQuery; use OC\Files\Storage\Wrapper\Encryption; @@ -284,6 +285,7 @@ public function insert($file, array $data) { if (count($extensionValues)) { $query = $this->getQueryBuilder(); $query->insert('filecache_extended'); + $query->hintShardKey('storage', $storageId); $query->setValue('fileid', $query->createNamedParameter($fileId, IQueryBuilder::PARAM_INT)); foreach ($extensionValues as $column => $value) { @@ -357,6 +359,7 @@ public function update($id, array $data) { try { $query = $this->getQueryBuilder(); $query->insert('filecache_extended'); + $query->hintShardKey('storage', $this->getNumericStorageId()); $query->setValue('fileid', $query->createNamedParameter($id, IQueryBuilder::PARAM_INT)); foreach ($extensionValues as $column => $value) { @@ -652,6 +655,15 @@ public function moveFromCache(ICache $sourceCache, $sourcePath, $targetPath) { throw new \Exception('Invalid source storage path: ' . $sourcePath); } + $shardDefinition = $this->connection->getShardDefinition('filecache'); + if ( + $shardDefinition && + $shardDefinition->getShardForKey($sourceCache->getNumericStorageId()) !== $shardDefinition->getShardForKey($this->getNumericStorageId()) + ) { + $this->moveFromStorageSharded($shardDefinition, $sourceCache, $sourceData, $targetPath); + return; + } + $sourceId = $sourceData['fileid']; $newParentId = $this->getParentId($targetPath); @@ -673,7 +685,7 @@ public function moveFromCache(ICache $sourceCache, $sourcePath, $targetPath) { $childChunks = array_chunk($childIds, 1000); - $query = $this->connection->getQueryBuilder(); + $query = $this->getQueryBuilder(); $fun = $query->func(); $newPathFunction = $fun->concat( @@ -681,12 +693,15 @@ public function moveFromCache(ICache $sourceCache, $sourcePath, $targetPath) { $fun->substring('path', $query->createNamedParameter($sourceLength + 1, IQueryBuilder::PARAM_INT))// +1 for the leading slash ); $query->update('filecache') - ->set('storage', $query->createNamedParameter($targetStorageId, IQueryBuilder::PARAM_INT)) ->set('path_hash', $fun->md5($newPathFunction)) ->set('path', $newPathFunction) - ->where($query->expr()->eq('storage', $query->createNamedParameter($sourceStorageId, IQueryBuilder::PARAM_INT))) + ->whereStorageId($sourceStorageId) ->andWhere($query->expr()->in('fileid', $query->createParameter('files'))); + if ($sourceStorageId !== $targetStorageId) { + $query->set('storage', $query->createNamedParameter($targetStorageId), IQueryBuilder::PARAM_INT); + } + // when moving from an encrypted storage to a non-encrypted storage remove the `encrypted` mark if ($sourceCache->hasEncryptionWrapper() && !$this->hasEncryptionWrapper()) { $query->set('encrypted', $query->createNamedParameter(0, IQueryBuilder::PARAM_INT)); @@ -728,13 +743,17 @@ public function moveFromCache(ICache $sourceCache, $sourcePath, $targetPath) { $query = $this->getQueryBuilder(); $query->update('filecache') - ->set('storage', $query->createNamedParameter($targetStorageId)) ->set('path', $query->createNamedParameter($targetPath)) ->set('path_hash', $query->createNamedParameter(md5($targetPath))) ->set('name', $query->createNamedParameter(basename($targetPath))) ->set('parent', $query->createNamedParameter($newParentId, IQueryBuilder::PARAM_INT)) + ->whereStorageId($sourceStorageId) ->whereFileId($sourceId); + if ($sourceStorageId !== $targetStorageId) { + $query->set('storage', $query->createNamedParameter($targetStorageId), IQueryBuilder::PARAM_INT); + } + // when moving from an encrypted storage to a non-encrypted storage remove the `encrypted` mark if ($sourceCache->hasEncryptionWrapper() && !$this->hasEncryptionWrapper()) { $query->set('encrypted', $query->createNamedParameter(0, IQueryBuilder::PARAM_INT)); @@ -891,6 +910,7 @@ public function getIncompleteChildrenCount($fileId) { $query->select($query->func()->count()) ->from('filecache') ->whereParent($fileId) + ->whereStorageId($this->getNumericStorageId()) ->andWhere($query->expr()->lt('size', $query->createNamedParameter(0, IQueryBuilder::PARAM_INT))); $result = $query->execute(); @@ -1183,4 +1203,72 @@ public function getCacheEntryFromSearchResult(ICacheEntry $rawEntry): ?ICacheEnt return null; } } + + private function moveFromStorageSharded(ShardDefinition $shardDefinition, ICache $sourceCache, ICacheEntry $sourceEntry, $targetPath) { + if ($sourceEntry->getMimeType() === ICacheEntry::DIRECTORY_MIMETYPE) { + $fileIds = $this->getChildIds($sourceCache->getNumericStorageId(), $sourceEntry->getPath()); + } else { + $fileIds = []; + } + $fileIds[] = $sourceEntry->getId(); + + $helper = $this->connection->getCrossShardMoveHelper(); + + $sourceConnection = $helper->getConnection($shardDefinition, $sourceCache->getNumericStorageId()); + $targetConnection = $helper->getConnection($shardDefinition, $this->getNumericStorageId()); + + $cacheItems = $helper->loadItems($sourceConnection, "filecache", "fileid", $fileIds); + $extendedItems = $helper->loadItems($sourceConnection, "filecache_extended", "fileid", $fileIds); + $metadataItems = $helper->loadItems($sourceConnection, "files_metadata", "file_id", $fileIds); + + // when moving from an encrypted storage to a non-encrypted storage remove the `encrypted` mark + $removeEncryptedFlag = ($sourceCache instanceof Cache && $sourceCache->hasEncryptionWrapper()) && !$this->hasEncryptionWrapper(); + + $sourcePathLength = strlen($sourceEntry->getPath()); + foreach ($cacheItems as &$cacheItem) { + if ($cacheItem['path'] === $sourceEntry->getPath()) { + $cacheItem['path'] = $targetPath; + $cacheItem['parent'] = $this->getParentId($targetPath); + $cacheItem['name'] = basename($cacheItem['path']); + } else { + $cacheItem['path'] = $targetPath . '/' . substr($cacheItem['path'], $sourcePathLength + 1); // +1 for the leading slash + } + $cacheItem['path_hash'] = md5($cacheItem['path']); + $cacheItem['storage'] = $this->getNumericStorageId(); + if ($removeEncryptedFlag) { + $cacheItem['encrypted'] = 0; + } + } + + $targetConnection->beginTransaction(); + + try { + $helper->saveItems($targetConnection, "filecache", $cacheItems); + $helper->saveItems($targetConnection, "filecache_extended", $extendedItems); + $helper->saveItems($targetConnection, "files_metadata", $metadataItems); + } catch (\Exception $e) { + $targetConnection->rollback(); + throw $e; + } + + $sourceConnection->beginTransaction(); + + try { + $helper->deleteItems($sourceConnection, "filecache", "fileid", $fileIds); + $helper->deleteItems($sourceConnection, "filecache_extended", "fileid", $fileIds); + $helper->deleteItems($sourceConnection, "files_metadata", "file_id", $fileIds); + } catch (\Exception $e) { + $targetConnection->rollback(); + $sourceConnection->rollBack(); + throw $e; + } + + try { + $sourceConnection->commit(); + } catch (\Exception $e) { + $targetConnection->rollback(); + throw $e; + } + $targetConnection->commit(); + } } diff --git a/lib/private/Server.php b/lib/private/Server.php index 35a301a9d1c8f..ae2355f40c0ee 100644 --- a/lib/private/Server.php +++ b/lib/private/Server.php @@ -811,7 +811,7 @@ public function __construct($webRoot, \OC\Config $config) { $this->registerAlias(IDBConnection::class, ConnectionAdapter::class); $this->registerService(Connection::class, function (Server $c) { $systemConfig = $c->get(SystemConfig::class); - $factory = new \OC\DB\ConnectionFactory($systemConfig); + $factory = new \OC\DB\ConnectionFactory($systemConfig, $c->get(ICacheFactory::class)); $type = $systemConfig->getValue('dbtype', 'sqlite'); if (!$factory->isValidType($type)) { throw new \OC\DatabaseException('Invalid database type'); diff --git a/lib/public/DB/QueryBuilder/IQueryBuilder.php b/lib/public/DB/QueryBuilder/IQueryBuilder.php index 129787fa9c483..048de26c22a1e 100644 --- a/lib/public/DB/QueryBuilder/IQueryBuilder.php +++ b/lib/public/DB/QueryBuilder/IQueryBuilder.php @@ -1009,6 +1009,15 @@ public function getLastInsertId(): int; */ public function getTableName($table); + /** + * Returns the table name with database prefix as needed by the implementation + * + * @param string $table + * @return string + * @since 30.0.0 + */ + public function prefixTableName(string $table): string; + /** * Returns the column name quoted and with table alias prefix as needed by the implementation * @@ -1019,6 +1028,24 @@ public function getTableName($table); */ public function getColumnName($column, $tableAlias = ''); + /** + * Provide a hint for the shard key for queries where this can't be detected otherwise + * + * @param string $column + * @param mixed $value + * @return $this + * @since 30.0.0 + */ + public function hintShardKey(string $column, mixed $value); + + /** + * Set the query to run across all shards if sharding is enabled. + * + * @return $this + * @since 30.0.0 + */ + public function runAcrossAllShards(); + /** * Get a list of column names that are expected in the query output * diff --git a/lib/public/DB/QueryBuilder/Sharded/IShardMapper.php b/lib/public/DB/QueryBuilder/Sharded/IShardMapper.php new file mode 100644 index 0000000000000..fa00fb68719cd --- /dev/null +++ b/lib/public/DB/QueryBuilder/Sharded/IShardMapper.php @@ -0,0 +1,25 @@ + + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +namespace OCP\DB\QueryBuilder\Sharded; + +/** + * Implementation of logic of mapping shard keys to shards. + * @since 30.0.0 + */ +interface IShardMapper { + /** + * Get the shard number for a given shard key and total shard count + * + * @param int $key + * @param int $count + * @return int + * @since 30.0.0 + */ + public function getShardForKey(int $key, int $count): int; +} diff --git a/lib/public/IDBConnection.php b/lib/public/IDBConnection.php index 09bd1a564cd46..7ef46ed0609f9 100644 --- a/lib/public/IDBConnection.php +++ b/lib/public/IDBConnection.php @@ -11,6 +11,8 @@ namespace OCP; use Doctrine\DBAL\Schema\Schema; +use OC\DB\QueryBuilder\Sharded\CrossShardMoveHelper; +use OC\DB\QueryBuilder\Sharded\ShardDefinition; use OCP\DB\Exception; use OCP\DB\IPreparedStatement; use OCP\DB\IResult; @@ -345,4 +347,21 @@ public function migrateToSchema(Schema $toSchema): void; * @return self::PLATFORM_MYSQL|self::PLATFORM_ORACLE|self::PLATFORM_POSTGRES|self::PLATFORM_SQLITE */ public function getDatabaseProvider(): string; + + /** + * Get the shard definition by name, if configured + * + * @param string $name + * @return ShardDefinition|null + * @since 30.0.0 + */ + public function getShardDefinition(string $name): ?ShardDefinition; + + /** + * Get a helper class for implementing cross-shard moves + * + * @return CrossShardMoveHelper + * @since 30.0.0 + */ + public function getCrossShardMoveHelper(): CrossShardMoveHelper; } diff --git a/tests/lib/DB/ConnectionFactoryTest.php b/tests/lib/DB/ConnectionFactoryTest.php index 208f78f4512e9..d889021f2b4ee 100644 --- a/tests/lib/DB/ConnectionFactoryTest.php +++ b/tests/lib/DB/ConnectionFactoryTest.php @@ -8,6 +8,7 @@ use OC\DB\ConnectionFactory; use OC\SystemConfig; +use OCP\ICacheFactory; use Test\TestCase; class ConnectionFactoryTest extends TestCase { @@ -33,7 +34,8 @@ public function splitHostFromPortAndSocketData() { public function testSplitHostFromPortAndSocket($host, array $expected) { /** @var SystemConfig $config */ $config = $this->createMock(SystemConfig::class); - $factory = new ConnectionFactory($config); + $cacheFactory = $this->createMock(ICacheFactory::class); + $factory = new ConnectionFactory($config, $cacheFactory); $this->assertEquals($expected, self::invokePrivate($factory, 'splitHostFromPortAndSocket', [$host])); } diff --git a/tests/lib/DB/QueryBuilder/Partitioned/PartitionedQueryBuilderTest.php b/tests/lib/DB/QueryBuilder/Partitioned/PartitionedQueryBuilderTest.php index a893891a96992..0397ce687767b 100644 --- a/tests/lib/DB/QueryBuilder/Partitioned/PartitionedQueryBuilderTest.php +++ b/tests/lib/DB/QueryBuilder/Partitioned/PartitionedQueryBuilderTest.php @@ -9,6 +9,9 @@ namespace Test\DB\QueryBuilder\Partitioned; use OC\DB\QueryBuilder\Partitioned\PartitionedQueryBuilder; +use OC\DB\QueryBuilder\Partitioned\PartitionSplit; +use OC\DB\QueryBuilder\Sharded\AutoIncrementHandler; +use OC\DB\QueryBuilder\Sharded\ShardConnectionManager; use OCP\DB\QueryBuilder\IQueryBuilder; use OCP\IDBConnection; use OCP\Server; @@ -19,9 +22,13 @@ */ class PartitionedQueryBuilderTest extends TestCase { private IDBConnection $connection; + private ShardConnectionManager $shardConnectionManager; + private AutoIncrementHandler $autoIncrementHandler; protected function setUp(): void { $this->connection = Server::get(IDBConnection::class); + $this->shardConnectionManager = Server::get(ShardConnectionManager::class); + $this->autoIncrementHandler = Server::get(AutoIncrementHandler::class); $this->setupFileCache(); } @@ -37,7 +44,7 @@ private function getQueryBuilder(): PartitionedQueryBuilder { if ($builder instanceof PartitionedQueryBuilder) { return $builder; } else { - return new PartitionedQueryBuilder($builder); + return new PartitionedQueryBuilder($builder, [], $this->shardConnectionManager, $this->autoIncrementHandler); } } diff --git a/tests/lib/DB/QueryBuilder/Sharded/SharedQueryBuilderTest.php b/tests/lib/DB/QueryBuilder/Sharded/SharedQueryBuilderTest.php new file mode 100644 index 0000000000000..83261729e5e56 --- /dev/null +++ b/tests/lib/DB/QueryBuilder/Sharded/SharedQueryBuilderTest.php @@ -0,0 +1,125 @@ + + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +namespace Test\DB\QueryBuilder\Sharded; + +use OC\DB\QueryBuilder\Sharded\AutoIncrementHandler; +use OC\DB\QueryBuilder\Sharded\InvalidShardedQueryException; +use OC\DB\QueryBuilder\Sharded\RoundRobinShardMapper; +use OC\DB\QueryBuilder\Sharded\ShardConnectionManager; +use OC\DB\QueryBuilder\Sharded\ShardDefinition; +use OC\DB\QueryBuilder\Sharded\ShardedQueryBuilder; +use OCP\DB\QueryBuilder\IQueryBuilder; +use OCP\IDBConnection; +use OCP\Server; +use Test\TestCase; + +/** + * @group DB + */ +class SharedQueryBuilderTest extends TestCase { + private IDBConnection $connection; + private AutoIncrementHandler $autoIncrementHandler; + + protected function setUp(): void { + $this->connection = Server::get(IDBConnection::class); + $this->autoIncrementHandler = Server::get(AutoIncrementHandler::class); + } + + + private function getQueryBuilder(string $table, string $shardColumn, string $primaryColumn, array $companionTables = []): ShardedQueryBuilder { + return new ShardedQueryBuilder( + $this->connection->getQueryBuilder(), + [ + new ShardDefinition($table, $primaryColumn, [], $shardColumn, new RoundRobinShardMapper(), $companionTables, []), + ], + $this->createMock(ShardConnectionManager::class), + $this->autoIncrementHandler, + ); + } + + public function testGetShardKeySingleParam() { + $query = $this->getQueryBuilder('filecache', 'storage', 'fileid'); + $query->select('fileid', 'path') + ->from('filecache') + ->where($query->expr()->eq('storage', $query->createNamedParameter(10, IQueryBuilder::PARAM_INT))); + + $this->assertEquals([], $query->getPrimaryKeys()); + $this->assertEquals([10], $query->getShardKeys()); + } + + public function testGetPrimaryKeyParam() { + $query = $this->getQueryBuilder('filecache', 'storage', 'fileid'); + $query->select('fileid', 'path') + ->from('filecache') + ->where($query->expr()->in('fileid', $query->createNamedParameter([10, 11], IQueryBuilder::PARAM_INT))); + + $this->assertEquals([10, 11], $query->getPrimaryKeys()); + $this->assertEquals([], $query->getShardKeys()); + } + + public function testValidateWithShardKey() { + $query = $this->getQueryBuilder('filecache', 'storage', 'fileid'); + $query->select('fileid', 'path') + ->from('filecache') + ->where($query->expr()->eq('storage', $query->createNamedParameter(10))); + + $query->validate(); + $this->assertTrue(true); + } + + public function testValidateWithPrimaryKey() { + $query = $this->getQueryBuilder('filecache', 'storage', 'fileid'); + $query->select('fileid', 'path') + ->from('filecache') + ->where($query->expr()->in('fileid', $query->createNamedParameter([10, 11], IQueryBuilder::PARAM_INT))); + + $query->validate(); + $this->assertTrue(true); + } + + public function testValidateWithNoKey() { + $query = $this->getQueryBuilder('filecache', 'storage', 'fileid'); + $query->select('fileid', 'path') + ->from('filecache') + ->where($query->expr()->lt('size', $query->createNamedParameter(0))); + + $this->expectException(InvalidShardedQueryException::class); + $query->validate(); + $this->fail("exception expected"); + } + + public function testValidateNonSharedTable() { + $query = $this->getQueryBuilder('filecache', 'storage', 'fileid'); + $query->select('configvalue') + ->from('appconfig') + ->where($query->expr()->eq('configkey', $query->createNamedParameter('test'))); + + $query->validate(); + $this->assertTrue(true); + } + + public function testGetShardKeyMultipleSingleParam() { + $query = $this->getQueryBuilder('filecache', 'storage', 'fileid'); + $query->select('fileid', 'path') + ->from('filecache') + ->where($query->expr()->andX( + $query->expr()->gt('mtime', $query->createNamedParameter(0), IQueryBuilder::PARAM_INT), + $query->expr()->orX( + $query->expr()->eq('storage', $query->createNamedParameter(10, IQueryBuilder::PARAM_INT)), + $query->expr()->andX( + $query->expr()->eq('storage', $query->createNamedParameter(11, IQueryBuilder::PARAM_INT)), + $query->expr()->like('path', $query->createNamedParameter("foo/%")) + ) + ) + )); + + $this->assertEquals([], $query->getPrimaryKeys()); + $this->assertEquals([10, 11], $query->getShardKeys()); + } +} diff --git a/tests/lib/Files/Cache/CacheTest.php b/tests/lib/Files/Cache/CacheTest.php index a36607eb9659e..4afc1a0d8c9b3 100644 --- a/tests/lib/Files/Cache/CacheTest.php +++ b/tests/lib/Files/Cache/CacheTest.php @@ -483,6 +483,23 @@ public function testMove($sourceFolder, $targetFolder, $children) { } } + public function testMoveFromCache() { + $data = ['size' => 100, 'mtime' => 50, 'mimetype' => 'foo/bar']; + $folderData = ['size' => 100, 'mtime' => 50, 'mimetype' => 'httpd/unix-directory']; + + $this->cache2->put("folder", $folderData); + $this->cache2->put("folder/sub", $data); + + + $this->cache->moveFromCache($this->cache2, "folder", "targetfolder"); + + $this->assertFalse($this->cache2->inCache("folder")); + $this->assertFalse($this->cache2->inCache("folder/sub")); + + $this->assertTrue($this->cache->inCache("targetfolder")); + $this->assertTrue($this->cache->inCache("targetfolder/sub")); + } + public function testGetIncomplete() { $file1 = 'folder1'; $file2 = 'folder2'; diff --git a/tests/lib/TestCase.php b/tests/lib/TestCase.php index 8c97c184c6fda..1dbe69ab6c43f 100644 --- a/tests/lib/TestCase.php +++ b/tests/lib/TestCase.php @@ -342,6 +342,7 @@ protected static function tearDownAfterClassCleanStorages(IQueryBuilder $queryBu */ protected static function tearDownAfterClassCleanFileCache(IQueryBuilder $queryBuilder) { $queryBuilder->delete('filecache') + ->runAcrossAllShards() ->execute(); } From ecf1cc2c3cdebb3abeb14e377ef504cacf93ca7b Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Tue, 16 Jul 2024 17:48:03 +0200 Subject: [PATCH 06/19] test: mark share test cleanup as running across all shards Signed-off-by: Robin Appelman --- apps/files_sharing/tests/TestCase.php | 2 +- tests/lib/Repair/CleanTagsTest.php | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/apps/files_sharing/tests/TestCase.php b/apps/files_sharing/tests/TestCase.php index 4aaeec1f76587..3fe2993e189d1 100644 --- a/apps/files_sharing/tests/TestCase.php +++ b/apps/files_sharing/tests/TestCase.php @@ -118,7 +118,7 @@ protected function tearDown(): void { $qb->execute(); $qb = \OC::$server->getDatabaseConnection()->getQueryBuilder(); - $qb->delete('filecache'); + $qb->delete('filecache')->runAcrossAllShards(); $qb->execute(); parent::tearDown(); diff --git a/tests/lib/Repair/CleanTagsTest.php b/tests/lib/Repair/CleanTagsTest.php index 586ae24a0aab1..5e7b82d219889 100644 --- a/tests/lib/Repair/CleanTagsTest.php +++ b/tests/lib/Repair/CleanTagsTest.php @@ -65,6 +65,7 @@ protected function cleanUpTables() { ->execute(); $qb->delete('filecache') + ->runAcrossAllShards() ->execute(); } @@ -176,6 +177,7 @@ protected function getFileID() { $fileName = $this->getUniqueID('TestRepairCleanTags', 12); $qb->insert('filecache') ->values([ + 'storage' => $qb->createNamedParameter(1, IQueryBuilder::PARAM_INT), 'path' => $qb->createNamedParameter($fileName), 'path_hash' => $qb->createNamedParameter(md5($fileName)), ]) @@ -183,6 +185,7 @@ protected function getFileID() { $fileName = $this->getUniqueID('TestRepairCleanTags', 12); $qb->insert('filecache') ->values([ + 'storage' => $qb->createNamedParameter(1, IQueryBuilder::PARAM_INT), 'path' => $qb->createNamedParameter($fileName), 'path_hash' => $qb->createNamedParameter(md5($fileName)), ]) From 3e5193957b7fa0fc913ca470d66818325569c0ef Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Thu, 18 Jul 2024 17:16:01 +0200 Subject: [PATCH 07/19] fix: only allow pre-defined shards Signed-off-by: Robin Appelman --- lib/private/DB/Connection.php | 38 +++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/lib/private/DB/Connection.php b/lib/private/DB/Connection.php index 0f70e732865d6..027ee3be97672 100644 --- a/lib/private/DB/Connection.php +++ b/lib/private/DB/Connection.php @@ -91,6 +91,21 @@ class Connection extends PrimaryReadReplicaConnection { protected ShardConnectionManager $shardConnectionManager; protected AutoIncrementHandler $autoIncrementHandler; + public const SHARD_PRESETS = [ + 'filecache' => [ + 'companion_keys' => [ + 'file_id', + ], + 'companion_tables' => [ + 'filecache_extended', + 'files_metadata', + ], + 'primary_key' => 'fileid', + 'shard_key' => 'storage', + 'table' => 'filecache', + ], + ]; + /** * Initializes a new instance of the Connection class. * @@ -140,23 +155,30 @@ public function __construct( $this->_config->setSQLLogger($debugStack); } - // todo: only allow specific, pre-defined shard configurations, the current config exists for easy testing setup - $this->shards = array_map(function (array $config) { + /** @var array $shardConfig */ + $shardConfig = $this->params['sharding'] ?? []; + $shardNames = array_keys($shardConfig); + $this->shards = array_map(function (array $config, string $name) { + if (!isset(self::SHARD_PRESETS[$name])) { + throw new \Exception("Shard preset $name not found"); + } + $shardMapperClass = $config['mapper'] ?? RoundRobinShardMapper::class; $shardMapper = Server::get($shardMapperClass); if (!$shardMapper instanceof IShardMapper) { throw new \Exception("Invalid shard mapper: $shardMapperClass"); } return new ShardDefinition( - $config['table'], - $config['primary_key'], - $config['companion_keys'], - $config['shard_key'], + self::SHARD_PRESETS[$name]['table'], + self::SHARD_PRESETS[$name]['primary_key'], + self::SHARD_PRESETS[$name]['companion_keys'], + self::SHARD_PRESETS[$name]['shard_key'], $shardMapper, - $config['companion_tables'], + self::SHARD_PRESETS[$name]['companion_tables'], $config['shards'] ); - }, $this->params['sharding']); + }, $shardConfig, $shardNames); + $this->shards = array_combine($shardNames, $this->shards); $this->partitions = array_map(function (ShardDefinition $shard) { return array_merge([$shard->table], $shard->companionTables); }, $this->shards); From 22f76fca59dc9cb0681ce5d9de1c9ef0486b45df Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Thu, 18 Jul 2024 17:22:02 +0200 Subject: [PATCH 08/19] test: run sharding tests in ci Signed-off-by: Robin Appelman --- .github/workflows/phpunit-mysql-sharding.yml | 184 +++++++++++++++++++ tests/preseed-config.php | 21 +++ 2 files changed, 205 insertions(+) create mode 100644 .github/workflows/phpunit-mysql-sharding.yml diff --git a/.github/workflows/phpunit-mysql-sharding.yml b/.github/workflows/phpunit-mysql-sharding.yml new file mode 100644 index 0000000000000..40727b385c425 --- /dev/null +++ b/.github/workflows/phpunit-mysql-sharding.yml @@ -0,0 +1,184 @@ +# This workflow is provided via the organization template repository +# +# https://github.com/nextcloud/.github +# https://docs.github.com/en/actions/learn-github-actions/sharing-workflows-with-your-organization +# +# SPDX-FileCopyrightText: 2022-2024 Nextcloud GmbH and Nextcloud contributors +# SPDX-License-Identifier: MIT + +name: PHPUnit sharding + +on: + pull_request: + schedule: + - cron: "5 2 * * *" + +permissions: + contents: read + +concurrency: + group: phpunit-mysql-sharding-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + changes: + runs-on: ubuntu-latest-low + + outputs: + src: ${{ steps.changes.outputs.src }} + + steps: + - uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3.0.2 + id: changes + continue-on-error: true + with: + filters: | + src: + - '.github/workflows/**' + - '3rdparty/**' + - '**/appinfo/**' + - '**/lib/**' + - '**/templates/**' + - '**/tests/**' + - 'vendor/**' + - 'vendor-bin/**' + - '.php-cs-fixer.dist.php' + - 'composer.json' + - 'composer.lock' + - '**.php' + + phpunit-mysql: + runs-on: ubuntu-latest + + needs: changes + if: needs.changes.outputs.src != 'false' + + strategy: + matrix: + php-versions: ['8.1'] + mysql-versions: ['8.4'] + + name: Sharding - MySQL ${{ matrix.mysql-versions }} (PHP ${{ matrix.php-versions }}) - database tests + + services: + cache: + image: ghcr.io/nextcloud/continuous-integration-redis:latest + ports: + - 6379:6379/tcp + options: --health-cmd="redis-cli ping" --health-interval=10s --health-timeout=5s --health-retries=3 + + mysql: + image: ghcr.io/nextcloud/continuous-integration-mysql-${{ matrix.mysql-versions }}:latest + ports: + - 4444:3306/tcp + env: + MYSQL_ROOT_PASSWORD: rootpassword + MYSQL_USER: oc_autotest + MYSQL_PASSWORD: nextcloud + MYSQL_DATABASE: oc_autotest + options: --health-cmd="mysqladmin ping" --health-interval 5s --health-timeout 2s --health-retries 10 + shard1: + image: ghcr.io/nextcloud/continuous-integration-mysql-${{ matrix.mysql-versions }}:latest + ports: + - 5001:3306/tcp + env: + MYSQL_ROOT_PASSWORD: rootpassword + MYSQL_USER: oc_autotest + MYSQL_PASSWORD: nextcloud + MYSQL_DATABASE: nextcloud + options: --health-cmd="mysqladmin ping" --health-interval 5s --health-timeout 2s --health-retries 10 + shard2: + image: ghcr.io/nextcloud/continuous-integration-mysql-${{ matrix.mysql-versions }}:latest + ports: + - 5002:3306/tcp + env: + MYSQL_ROOT_PASSWORD: rootpassword + MYSQL_USER: oc_autotest + MYSQL_PASSWORD: nextcloud + MYSQL_DATABASE: nextcloud + options: --health-cmd="mysqladmin ping" --health-interval 5s --health-timeout 2s --health-retries 10 + shard3: + image: ghcr.io/nextcloud/continuous-integration-mysql-${{ matrix.mysql-versions }}:latest + ports: + - 5003:3306/tcp + env: + MYSQL_ROOT_PASSWORD: rootpassword + MYSQL_USER: oc_autotest + MYSQL_PASSWORD: nextcloud + MYSQL_DATABASE: nextcloud + options: --health-cmd="mysqladmin ping" --health-interval 5s --health-timeout 2s --health-retries 10 + shard4: + image: ghcr.io/nextcloud/continuous-integration-mysql-${{ matrix.mysql-versions }}:latest + ports: + - 5004:3306/tcp + env: + MYSQL_ROOT_PASSWORD: rootpassword + MYSQL_USER: oc_autotest + MYSQL_PASSWORD: nextcloud + MYSQL_DATABASE: nextcloud + options: --health-cmd="mysqladmin ping" --health-interval 5s --health-timeout 2s --health-retries 10 + + steps: + - name: Checkout server + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 + with: + submodules: true + + - name: Set up php ${{ matrix.php-versions }} + uses: shivammathur/setup-php@2e947f1f6932d141d076ca441d0e1e881775e95b #v2.31.0 + with: + php-version: ${{ matrix.php-versions }} + # https://docs.nextcloud.com/server/stable/admin_manual/installation/source_installation.html#prerequisites-for-manual-installation + extensions: bz2, ctype, curl, dom, fileinfo, gd, iconv, intl, json, libxml, mbstring, openssl, pcntl, posix, redis, session, simplexml, xmlreader, xmlwriter, zip, zlib, mysql, pdo_mysql + coverage: ${{ matrix.coverage && 'xdebug' || 'none' }} + ini-file: development + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up dependencies + run: composer i + + - name: Enable ONLY_FULL_GROUP_BY MySQL option + run: | + echo "SET GLOBAL sql_mode=(SELECT CONCAT(@@sql_mode,',ONLY_FULL_GROUP_BY'));" | mysql -h 127.0.0.1 -P 4444 -u root -prootpassword + echo "SELECT @@sql_mode;" | mysql -h 127.0.0.1 -P 4444 -u root -prootpassword + + - name: Set up Nextcloud + env: + DB_PORT: 4444 + SHARDING: 1 + run: | + mkdir data + cp tests/redis.config.php config/ + cp tests/preseed-config.php config/config.php + ./occ maintenance:install --verbose --database=mysql --database-name=nextcloud --database-host=127.0.0.1 --database-port=$DB_PORT --database-user=root --database-pass=rootpassword --admin-user admin --admin-pass admin + php -f tests/enable_all.php | grep -i -C9999 error && echo "Error during app setup" && exit 1 || exit 0 + + - name: PHPUnit + run: composer run test:db ${{ matrix.coverage && ' -- --coverage-clover ./clover.db.xml' || '' }} + + - name: Upload db code coverage + if: ${{ !cancelled() && matrix.coverage }} + uses: codecov/codecov-action@v4.1.1 + with: + files: ./clover.db.xml + flags: phpunit-mysql + + - name: Print logs + if: always() + run: | + cat data/nextcloud.log + + summary: + permissions: + contents: none + runs-on: ubuntu-latest-low + needs: [changes, phpunit-mysql] + + if: always() + + name: phpunit-mysql-summary + + steps: + - name: Summary status + run: if ${{ needs.changes.outputs.src != 'false' && needs.phpunit-mysql.result != 'success' }}; then exit 1; fi diff --git a/tests/preseed-config.php b/tests/preseed-config.php index 3739335676abb..7c341278027ce 100644 --- a/tests/preseed-config.php +++ b/tests/preseed-config.php @@ -78,3 +78,24 @@ ] ]; } + +if (getenv('SHARDING') == '1') { + $CONFIG['dbsharding'] = [ + "filecache" => [ + "shards" => [ + [ + "port" => 5001, + ], + [ + "port" => 5002, + ], + [ + "port" => 5003, + ], + [ + "port" => 5004, + ], + ] + ] + ]; +} From 693ee5ea0f322983ed11b181b74fa01789b4c410 Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Fri, 19 Jul 2024 16:04:47 +0200 Subject: [PATCH 09/19] fix: hint storage id in more places Signed-off-by: Robin Appelman --- lib/private/Files/Cache/Cache.php | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/private/Files/Cache/Cache.php b/lib/private/Files/Cache/Cache.php index 622381dc507d3..b18b576a7a145 100644 --- a/lib/private/Files/Cache/Cache.php +++ b/lib/private/Files/Cache/Cache.php @@ -371,6 +371,7 @@ public function update($id, array $data) { $query = $this->getQueryBuilder(); $query->update('filecache_extended') ->whereFileId($id) + ->hintShardKey('storage', $this->getNumericStorageId()) ->andWhere($query->expr()->orX(...array_map(function ($key, $value) use ($query) { return $query->expr()->orX( $query->expr()->neq($key, $query->createNamedParameter($value)), @@ -523,7 +524,8 @@ public function remove($file) { $query = $this->getQueryBuilder(); $query->delete('filecache_extended') - ->whereFileId($entry->getId()); + ->whereFileId($entry->getId()) + ->hintShardKey('storage', $this->getNumericStorageId()); $query->execute(); if ($entry->getMimeType() == FileInfo::MIMETYPE_FOLDER) { @@ -567,7 +569,8 @@ private function removeChildren(ICacheEntry $entry) { $query = $this->getQueryBuilder(); $query->delete('filecache_extended') - ->where($query->expr()->in('fileid', $query->createParameter('childIds'))); + ->where($query->expr()->in('fileid', $query->createParameter('childIds'))) + ->hintShardKey('storage', $this->getNumericStorageId()); foreach (array_chunk($childIds, 1000) as $childIdChunk) { $query->setParameter('childIds', $childIdChunk, IQueryBuilder::PARAM_INT_ARRAY); From 0e40fa4fa5cbd243dd4a047cdb574bff124b72b6 Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Fri, 19 Jul 2024 19:30:53 +0200 Subject: [PATCH 10/19] fix: run mimetype repair query across all shards Signed-off-by: Robin Appelman --- lib/private/Repair/RepairMimeTypes.php | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/private/Repair/RepairMimeTypes.php b/lib/private/Repair/RepairMimeTypes.php index 2eece761c8da7..ce13151b58123 100644 --- a/lib/private/Repair/RepairMimeTypes.php +++ b/lib/private/Repair/RepairMimeTypes.php @@ -58,6 +58,7 @@ private function updateMimetypes($updatedMimetypes): IResult|int|null { $update = $this->connection->getQueryBuilder(); $update->update('filecache') + ->runAcrossAllShards() ->set('mimetype', $update->createParameter('mimetype')) ->where($update->expr()->neq('mimetype', $update->createParameter('mimetype'), IQueryBuilder::PARAM_INT)) ->andWhere($update->expr()->neq('mimetype', $update->createParameter('folder'), IQueryBuilder::PARAM_INT)) From 1b6d76aa1b475a72e601aed4b9c7a272f3e38f21 Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Fri, 19 Jul 2024 19:31:10 +0200 Subject: [PATCH 11/19] test: fix share provider tests for sharding Signed-off-by: Robin Appelman --- tests/lib/Share20/DefaultShareProviderTest.php | 10 +++++----- tests/lib/Share20/ShareByMailProviderTest.php | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/lib/Share20/DefaultShareProviderTest.php b/tests/lib/Share20/DefaultShareProviderTest.php index 017deeef114b0..e498d497427cc 100644 --- a/tests/lib/Share20/DefaultShareProviderTest.php +++ b/tests/lib/Share20/DefaultShareProviderTest.php @@ -111,7 +111,7 @@ protected function setUp(): void { protected function tearDown(): void { $this->dbConn->getQueryBuilder()->delete('share')->execute(); - $this->dbConn->getQueryBuilder()->delete('filecache')->execute(); + $this->dbConn->getQueryBuilder()->delete('filecache')->runAcrossAllShards()->execute(); $this->dbConn->getQueryBuilder()->delete('storages')->execute(); } @@ -924,10 +924,10 @@ private function createTestFileEntry($path, $storage = 1) { $qb = $this->dbConn->getQueryBuilder(); $qb->insert('filecache') ->values([ - 'storage' => $qb->expr()->literal($storage), - 'path' => $qb->expr()->literal($path), - 'path_hash' => $qb->expr()->literal(md5($path)), - 'name' => $qb->expr()->literal(basename($path)), + 'storage' => $qb->createNamedParameter($storage, IQueryBuilder::PARAM_INT), + 'path' => $qb->createNamedParameter($path), + 'path_hash' => $qb->createNamedParameter(md5($path)), + 'name' => $qb->createNamedParameter(basename($path)), ]); $this->assertEquals(1, $qb->execute()); return $qb->getLastInsertId(); diff --git a/tests/lib/Share20/ShareByMailProviderTest.php b/tests/lib/Share20/ShareByMailProviderTest.php index c3fe0dab79052..7e3eee106e82b 100644 --- a/tests/lib/Share20/ShareByMailProviderTest.php +++ b/tests/lib/Share20/ShareByMailProviderTest.php @@ -125,7 +125,7 @@ protected function setUp(): void { protected function tearDown(): void { $this->dbConn->getQueryBuilder()->delete('share')->execute(); - $this->dbConn->getQueryBuilder()->delete('filecache')->execute(); + $this->dbConn->getQueryBuilder()->delete('filecache')->runAcrossAllShards()->execute(); $this->dbConn->getQueryBuilder()->delete('storages')->execute(); } From 550072342b44f77140d661276d1eaa8e4de0de7c Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Thu, 25 Jul 2024 18:46:47 +0200 Subject: [PATCH 12/19] fix: make background scan job compatible with sharding Signed-off-by: Robin Appelman --- apps/files/lib/BackgroundJob/ScanFiles.php | 70 ++++++++++++++++++---- 1 file changed, 58 insertions(+), 12 deletions(-) diff --git a/apps/files/lib/BackgroundJob/ScanFiles.php b/apps/files/lib/BackgroundJob/ScanFiles.php index 97122e738e2c9..42beb70aaf5d6 100644 --- a/apps/files/lib/BackgroundJob/ScanFiles.php +++ b/apps/files/lib/BackgroundJob/ScanFiles.php @@ -33,11 +33,11 @@ class ScanFiles extends TimedJob { public const USERS_PER_SESSION = 500; public function __construct( - IConfig $config, + IConfig $config, IEventDispatcher $dispatcher, - LoggerInterface $logger, - IDBConnection $connection, - ITimeFactory $time + LoggerInterface $logger, + IDBConnection $connection, + ITimeFactory $time ) { parent::__construct($time); // Run once per 10 minutes @@ -70,15 +70,61 @@ protected function runScanner(string $user): void { * @return string|false */ private function getUserToScan() { + if ($this->connection->getShardDefinition("filecache")) { + // for sharded filecache, the "LIMIT" from the normal query doesn't work + + // first we try it with a "LEFT JOIN" on mounts, this is fast, but might return a storage that isn't mounted. + // we also ask for up to 10 results from different storages to increase the odds of finding a result that is mounted + $query = $this->connection->getQueryBuilder(); + $query->select('m.user_id') + ->from('filecache', 'f') + ->leftJoin('f', 'mounts', 'm', $query->expr()->eq('m.storage_id', 'f.storage')) + ->where($query->expr()->lt('f.size', $query->createNamedParameter(0, IQueryBuilder::PARAM_INT))) + ->andWhere($query->expr()->gt('f.parent', $query->createNamedParameter(-1, IQueryBuilder::PARAM_INT))) + ->setMaxResults(10) + ->groupBy("f.storage") + ->runAcrossAllShards(); + + $result = $query->executeQuery(); + while ($res = $result->fetch()) { + if ($res['user_id']) { + return $res['user_id']; + } + } + + // as a fallback, we try a slower approach where we find all mounted storages first + // this is essentially doing the inner join manually + $storages = $this->getAllMountedStorages(); + + $query = $this->connection->getQueryBuilder(); + $query->select('m.user_id') + ->from('filecache', 'f') + ->leftJoin('f', 'mounts', 'm', $query->expr()->eq('m.storage_id', 'f.storage')) + ->where($query->expr()->lt('f.size', $query->createNamedParameter(0, IQueryBuilder::PARAM_INT))) + ->andWhere($query->expr()->gt('f.parent', $query->createNamedParameter(-1, IQueryBuilder::PARAM_INT))) + ->andWhere($query->expr()->in('f.storage', $query->createNamedParameter($storages, IQueryBuilder::PARAM_INT_ARRAY))) + ->setMaxResults(1) + ->runAcrossAllShards(); + return $query->executeQuery()->fetchOne(); + } else { + $query = $this->connection->getQueryBuilder(); + $query->select('m.user_id') + ->from('filecache', 'f') + ->innerJoin('f', 'mounts', 'm', $query->expr()->eq('m.storage_id', 'f.storage')) + ->where($query->expr()->lt('f.size', $query->createNamedParameter(0, IQueryBuilder::PARAM_INT))) + ->andWhere($query->expr()->gt('f.parent', $query->createNamedParameter(-1, IQueryBuilder::PARAM_INT))) + ->setMaxResults(1) + ->runAcrossAllShards(); + + return $query->executeQuery()->fetchOne(); + } + } + + private function getAllMountedStorages(): array { $query = $this->connection->getQueryBuilder(); - $query->select('user_id') - ->from('filecache', 'f') - ->innerJoin('f', 'mounts', 'm', $query->expr()->eq('storage_id', 'storage')) - ->where($query->expr()->lt('size', $query->createNamedParameter(0, IQueryBuilder::PARAM_INT))) - ->andWhere($query->expr()->gt('parent', $query->createNamedParameter(-1, IQueryBuilder::PARAM_INT))) - ->setMaxResults(1); - - return $query->executeQuery()->fetchOne(); + $query->selectDistinct('storage_id') + ->from('mounts'); + return $query->executeQuery()->fetchAll(\PDO::FETCH_COLUMN); } /** From ddecae894a456a61dd15886acdf4a90c4d477e96 Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Wed, 31 Jul 2024 17:17:54 +0200 Subject: [PATCH 13/19] fix: adjust systemtag orphan cleanup query to work with sharding Signed-off-by: Robin Appelman --- apps/files/lib/BackgroundJob/DeleteOrphanedItems.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/files/lib/BackgroundJob/DeleteOrphanedItems.php b/apps/files/lib/BackgroundJob/DeleteOrphanedItems.php index 0d6a632db1ac5..32fb569a3d462 100644 --- a/apps/files/lib/BackgroundJob/DeleteOrphanedItems.php +++ b/apps/files/lib/BackgroundJob/DeleteOrphanedItems.php @@ -59,8 +59,8 @@ protected function cleanUp($table, $idCol, $typeCol) { $query->select('t1.' . $idCol) ->from($table, 't1') ->where($query->expr()->eq($typeCol, $query->expr()->literal('files'))) - ->andWhere($query->expr()->isNull('t2.fileid')) ->leftJoin('t1', 'filecache', 't2', $query->expr()->eq($query->expr()->castColumn('t1.' . $idCol, IQueryBuilder::PARAM_INT), 't2.fileid')) + ->andWhere($query->expr()->isNull('t2.fileid')) ->groupBy('t1.' . $idCol) ->setMaxResults(self::CHUNK_SIZE); From dc5f0f5fb73f9848ce0fa3f797ac4c817802d33f Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Tue, 6 Aug 2024 14:46:50 +0200 Subject: [PATCH 14/19] fix: fix share cleanup for deleted groups with sharding Signed-off-by: Robin Appelman --- lib/private/Share20/DefaultShareProvider.php | 28 +++++++++++++------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/lib/private/Share20/DefaultShareProvider.php b/lib/private/Share20/DefaultShareProvider.php index 970b2840b4206..7ed4aeefe8f1c 100644 --- a/lib/private/Share20/DefaultShareProvider.php +++ b/lib/private/Share20/DefaultShareProvider.php @@ -1203,10 +1203,14 @@ public function groupDeleted($gid) { if (!empty($ids)) { $chunks = array_chunk($ids, 100); + + $qb = $this->dbConn->getQueryBuilder(); + $qb->delete('share') + ->where($qb->expr()->eq('share_type', $qb->createNamedParameter(IShare::TYPE_USERGROUP))) + ->andWhere($qb->expr()->in('parent', $qb->createParameter('parents'))); + foreach ($chunks as $chunk) { - $qb->delete('share') - ->where($qb->expr()->eq('share_type', $qb->createNamedParameter(IShare::TYPE_USERGROUP))) - ->andWhere($qb->expr()->in('parent', $qb->createNamedParameter($chunk, IQueryBuilder::PARAM_INT_ARRAY))); + $qb->setParameter('parents', $chunk, IQueryBuilder::PARAM_INT_ARRAY); $qb->execute(); } } @@ -1247,14 +1251,18 @@ public function userDeletedFromGroup($uid, $gid) { if (!empty($ids)) { $chunks = array_chunk($ids, 100); + + /* + * Delete all special shares with this user for the found group shares + */ + $qb = $this->dbConn->getQueryBuilder(); + $qb->delete('share') + ->where($qb->expr()->eq('share_type', $qb->createNamedParameter(IShare::TYPE_USERGROUP))) + ->andWhere($qb->expr()->eq('share_with', $qb->createNamedParameter($uid))) + ->andWhere($qb->expr()->in('parent', $qb->createParameter('parents'))); + foreach ($chunks as $chunk) { - /* - * Delete all special shares with this users for the found group shares - */ - $qb->delete('share') - ->where($qb->expr()->eq('share_type', $qb->createNamedParameter(IShare::TYPE_USERGROUP))) - ->andWhere($qb->expr()->eq('share_with', $qb->createNamedParameter($uid))) - ->andWhere($qb->expr()->in('parent', $qb->createNamedParameter($chunk, IQueryBuilder::PARAM_INT_ARRAY))); + $qb->setParameter('parents', $chunk, IQueryBuilder::PARAM_INT_ARRAY); $qb->executeStatement(); } } From b2645590cef45a99ee7c7d549f65b3432b81e422 Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Thu, 15 Aug 2024 18:04:55 +0200 Subject: [PATCH 15/19] fix: implement sharding compatible cleanup for various bits Signed-off-by: Robin Appelman --- .../lib/BackgroundJob/DeleteOrphanedItems.php | 86 ++++++++++++++---- .../files/lib/Command/DeleteOrphanedFiles.php | 88 +++++++++++++------ .../tests/Command/DeleteOrphanedFilesTest.php | 14 ++- .../lib/DeleteOrphanedSharesJob.php | 41 +++++++++ .../DB/QueryBuilder/ExtendedQueryBuilder.php | 4 +- .../Sharded/ShardedQueryBuilder.php | 6 +- lib/private/Preview/BackgroundCleanupJob.php | 57 ++++++++++++ lib/private/Repair/CleanTags.php | 17 ++-- lib/public/DB/QueryBuilder/IQueryBuilder.php | 2 +- .../lib/Preview/BackgroundCleanupJobTest.php | 8 ++ tests/lib/Repair/CleanTagsTest.php | 9 -- 11 files changed, 264 insertions(+), 68 deletions(-) diff --git a/apps/files/lib/BackgroundJob/DeleteOrphanedItems.php b/apps/files/lib/BackgroundJob/DeleteOrphanedItems.php index 32fb569a3d462..b1a795b775cc9 100644 --- a/apps/files/lib/BackgroundJob/DeleteOrphanedItems.php +++ b/apps/files/lib/BackgroundJob/DeleteOrphanedItems.php @@ -52,34 +52,86 @@ public function run($argument) { * @param string $typeCol * @return int Number of deleted entries */ - protected function cleanUp($table, $idCol, $typeCol) { + protected function cleanUp(string $table, string $idCol, string $typeCol): int { $deletedEntries = 0; - $query = $this->connection->getQueryBuilder(); - $query->select('t1.' . $idCol) - ->from($table, 't1') - ->where($query->expr()->eq($typeCol, $query->expr()->literal('files'))) - ->leftJoin('t1', 'filecache', 't2', $query->expr()->eq($query->expr()->castColumn('t1.' . $idCol, IQueryBuilder::PARAM_INT), 't2.fileid')) - ->andWhere($query->expr()->isNull('t2.fileid')) - ->groupBy('t1.' . $idCol) - ->setMaxResults(self::CHUNK_SIZE); - $deleteQuery = $this->connection->getQueryBuilder(); $deleteQuery->delete($table) - ->where($deleteQuery->expr()->in($idCol, $deleteQuery->createParameter('objectid'))); + ->where($deleteQuery->expr()->eq($idCol, $deleteQuery->createParameter('objectid'))); + + if ($this->connection->getShardDefinition('filecache')) { + $sourceIdChunks = $this->getItemIds($table, $idCol, $typeCol, 1000); + foreach ($sourceIdChunks as $sourceIdChunk) { + $deletedSources = $this->findMissingSources($sourceIdChunk); + $deleteQuery->setParameter('objectid', $deletedSources, IQueryBuilder::PARAM_INT_ARRAY); + $deletedEntries += $deleteQuery->executeStatement(); + } + } else { + $query = $this->connection->getQueryBuilder(); + $query->select('t1.' . $idCol) + ->from($table, 't1') + ->where($query->expr()->eq($typeCol, $query->expr()->literal('files'))) + ->leftJoin('t1', 'filecache', 't2', $query->expr()->eq($query->expr()->castColumn('t1.' . $idCol, IQueryBuilder::PARAM_INT), 't2.fileid')) + ->andWhere($query->expr()->isNull('t2.fileid')) + ->groupBy('t1.' . $idCol) + ->setMaxResults(self::CHUNK_SIZE); + + $deleteQuery = $this->connection->getQueryBuilder(); + $deleteQuery->delete($table) + ->where($deleteQuery->expr()->in($idCol, $deleteQuery->createParameter('objectid'))); - $deletedInLastChunk = self::CHUNK_SIZE; - while ($deletedInLastChunk === self::CHUNK_SIZE) { - $chunk = $query->executeQuery()->fetchAll(\PDO::FETCH_COLUMN); - $deletedInLastChunk = count($chunk); + $deletedInLastChunk = self::CHUNK_SIZE; + while ($deletedInLastChunk === self::CHUNK_SIZE) { + $chunk = $query->executeQuery()->fetchAll(\PDO::FETCH_COLUMN); + $deletedInLastChunk = count($chunk); - $deleteQuery->setParameter('objectid', $chunk, IQueryBuilder::PARAM_INT_ARRAY); - $deletedEntries += $deleteQuery->executeStatement(); + $deleteQuery->setParameter('objectid', $chunk, IQueryBuilder::PARAM_INT_ARRAY); + $deletedEntries += $deleteQuery->executeStatement(); + } } return $deletedEntries; } + /** + * @param string $table + * @param string $idCol + * @param string $typeCol + * @param int $chunkSize + * @return \Iterator + * @throws \OCP\DB\Exception + */ + private function getItemIds(string $table, string $idCol, string $typeCol, int $chunkSize): \Iterator { + $query = $this->connection->getQueryBuilder(); + $query->select($idCol) + ->from($table) + ->where($query->expr()->eq($typeCol, $query->expr()->literal('files'))) + ->groupBy($idCol) + ->andWhere($query->expr()->gt($idCol, $query->createParameter('min_id'))) + ->setMaxResults($chunkSize); + + $minId = 0; + while (true) { + $query->setParameter('min_id', $minId); + $rows = $query->executeQuery()->fetchAll(\PDO::FETCH_COLUMN); + if (count($rows) > 0) { + $minId = $rows[count($rows) - 1]; + yield $rows; + } else { + break; + } + } + } + + private function findMissingSources(array $ids): array { + $qb = $this->connection->getQueryBuilder(); + $qb->select('fileid') + ->from('filecache') + ->where($qb->expr()->in('fileid', $qb->createNamedParameter($ids, IQueryBuilder::PARAM_INT_ARRAY))); + $found = $qb->executeQuery()->fetchAll(\PDO::FETCH_COLUMN); + return array_diff($ids, $found); + } + /** * Deleting orphaned system tag mappings * diff --git a/apps/files/lib/Command/DeleteOrphanedFiles.php b/apps/files/lib/Command/DeleteOrphanedFiles.php index b7101c072586f..8f93242b255b9 100644 --- a/apps/files/lib/Command/DeleteOrphanedFiles.php +++ b/apps/files/lib/Command/DeleteOrphanedFiles.php @@ -35,17 +35,18 @@ protected function configure(): void { public function execute(InputInterface $input, OutputInterface $output): int { $deletedEntries = 0; + $fileIdsByStorage = []; - $query = $this->connection->getQueryBuilder(); - $query->select('fc.fileid') - ->from('filecache', 'fc') - ->where($query->expr()->isNull('s.numeric_id')) - ->leftJoin('fc', 'storages', 's', $query->expr()->eq('fc.storage', 's.numeric_id')) - ->setMaxResults(self::CHUNK_SIZE); + $deletedStorages = array_diff($this->getReferencedStorages(), $this->getExistingStorages()); + + $deleteExtended = !$input->getOption('skip-filecache-extended'); + if ($deleteExtended) { + $fileIdsByStorage = $this->getFileIdsForStorages($deletedStorages); + } $deleteQuery = $this->connection->getQueryBuilder(); $deleteQuery->delete('filecache') - ->where($deleteQuery->expr()->eq('fileid', $deleteQuery->createParameter('objectid'))); + ->where($deleteQuery->expr()->in('storage', $deleteQuery->createParameter('storage_ids'))); $deletedInLastChunk = self::CHUNK_SIZE; while ($deletedInLastChunk === self::CHUNK_SIZE) { @@ -61,8 +62,8 @@ public function execute(InputInterface $input, OutputInterface $output): int { $output->writeln("$deletedEntries orphaned file cache entries deleted"); - if (!$input->getOption('skip-filecache-extended')) { - $deletedFileCacheExtended = $this->cleanupOrphanedFileCacheExtended(); + if ($deleteExtended) { + $deletedFileCacheExtended = $this->cleanupOrphanedFileCacheExtended($fileIdsByStorage); $output->writeln("$deletedFileCacheExtended orphaned file cache extended entries deleted"); } @@ -72,28 +73,63 @@ public function execute(InputInterface $input, OutputInterface $output): int { return self::SUCCESS; } - private function cleanupOrphanedFileCacheExtended(): int { - $deletedEntries = 0; - + private function getReferencedStorages(): array { $query = $this->connection->getQueryBuilder(); - $query->select('fce.fileid') - ->from('filecache_extended', 'fce') - ->leftJoin('fce', 'filecache', 'fc', $query->expr()->eq('fce.fileid', 'fc.fileid')) - ->where($query->expr()->isNull('fc.fileid')) - ->setMaxResults(self::CHUNK_SIZE); + $query->select('storage') + ->from('filecache') + ->groupBy('storage') + ->runAcrossAllShards(); + return $query->executeQuery()->fetchAll(\PDO::FETCH_COLUMN); + } - $deleteQuery = $this->connection->getQueryBuilder(); - $deleteQuery->delete('filecache_extended') - ->where($deleteQuery->expr()->in('fileid', $deleteQuery->createParameter('idsToDelete'))); + private function getExistingStorages(): array { + $query = $this->connection->getQueryBuilder(); + $query->select('numeric_id') + ->from('storages') + ->groupBy('numeric_id'); + return $query->executeQuery()->fetchAll(\PDO::FETCH_COLUMN); + } - $result = $query->executeQuery(); - while ($result->rowCount() > 0) { - $idsToDelete = $result->fetchAll(\PDO::FETCH_COLUMN); + /** + * @param int[] $storageIds + * @return array + */ + private function getFileIdsForStorages(array $storageIds): array { + $query = $this->connection->getQueryBuilder(); + $query->select('storage', 'fileid') + ->from('filecache') + ->where($query->expr()->in('storage', $query->createParameter('storage_ids'))); + + $result = []; + $storageIdChunks = array_chunk($storageIds, self::CHUNK_SIZE); + foreach ($storageIdChunks as $storageIdChunk) { + $query->setParameter('storage_ids', $storageIdChunk, IQueryBuilder::PARAM_INT_ARRAY); + $chunk = $query->executeQuery()->fetchAll(); + foreach ($chunk as $row) { + $result[$row['storage']][] = $row['fileid']; + } + } + return $result; + } - $deleteQuery->setParameter('idsToDelete', $idsToDelete, IQueryBuilder::PARAM_INT_ARRAY); - $deletedEntries += $deleteQuery->executeStatement(); + /** + * @param array $fileIdsByStorage + * @return int + */ + private function cleanupOrphanedFileCacheExtended(array $fileIdsByStorage): int { + $deletedEntries = 0; - $result = $query->executeQuery(); + $deleteQuery = $this->connection->getQueryBuilder(); + $deleteQuery->delete('filecache_extended') + ->where($deleteQuery->expr()->in('fileid', $deleteQuery->createParameter('file_ids'))); + + foreach ($fileIdsByStorage as $storageId => $fileIds) { + $deleteQuery->hintShardKey('storage', $storageId, true); + $fileChunks = array_chunk($fileIds, self::CHUNK_SIZE); + foreach ($fileChunks as $fileChunk) { + $deleteQuery->setParameter('file_ids', $fileChunk, IQueryBuilder::PARAM_INT_ARRAY); + $deletedEntries += $deleteQuery->executeStatement(); + } } return $deletedEntries; diff --git a/apps/files/tests/Command/DeleteOrphanedFilesTest.php b/apps/files/tests/Command/DeleteOrphanedFilesTest.php index e52f9e1e1305a..ed9a1866d2616 100644 --- a/apps/files/tests/Command/DeleteOrphanedFilesTest.php +++ b/apps/files/tests/Command/DeleteOrphanedFilesTest.php @@ -64,13 +64,19 @@ protected function tearDown(): void { } protected function getFile($fileId) { - $stmt = $this->connection->executeQuery('SELECT * FROM `*PREFIX*filecache` WHERE `fileid` = ?', [$fileId]); - return $stmt->fetchAll(); + $query = $this->connection->getQueryBuilder(); + $query->select('*') + ->from('filecache') + ->where($query->expr()->eq('fileid', $query->createNamedParameter($fileId))); + return $query->executeQuery()->fetchAll(); } protected function getMounts($storageId) { - $stmt = $this->connection->executeQuery('SELECT * FROM `*PREFIX*mounts` WHERE `storage_id` = ?', [$storageId]); - return $stmt->fetchAll(); + $query = $this->connection->getQueryBuilder(); + $query->select('*') + ->from('mounts') + ->where($query->expr()->eq('storage_id', $query->createNamedParameter($storageId))); + return $query->executeQuery()->fetchAll(); } /** diff --git a/apps/files_sharing/lib/DeleteOrphanedSharesJob.php b/apps/files_sharing/lib/DeleteOrphanedSharesJob.php index 9a052b3d126bc..bcbdbaba53165 100644 --- a/apps/files_sharing/lib/DeleteOrphanedSharesJob.php +++ b/apps/files_sharing/lib/DeleteOrphanedSharesJob.php @@ -55,6 +55,11 @@ public function __construct( * @param array $argument unused argument */ public function run($argument) { + if ($this->db->getShardDefinition('filecache')) { + $this->shardingCleanup(); + return; + } + $qbSelect = $this->db->getQueryBuilder(); $qbSelect->select('id') ->from('share', 's') @@ -96,4 +101,40 @@ public function run($argument) { }, $this->db); } while ($deleted >= self::CHUNK_SIZE && $this->time->getTime() <= $cutOff); } + + private function shardingCleanup(): void { + $qb = $this->db->getQueryBuilder(); + $qb->selectDistinct('file_source') + ->from('share', 's'); + $sourceFiles = $qb->executeQuery()->fetchAll(PDO::FETCH_COLUMN); + + $deleteQb = $this->db->getQueryBuilder(); + $deleteQb->delete('share') + ->where( + $deleteQb->expr()->in('file_source', $deleteQb->createParameter('ids'), IQueryBuilder::PARAM_INT_ARRAY) + ); + + $chunks = array_chunk($sourceFiles, self::CHUNK_SIZE); + foreach ($chunks as $chunk) { + $deletedFiles = $this->findMissingSources($chunk); + $this->atomic(function () use ($deletedFiles, $deleteQb) { + $deleteQb->setParameter('ids', $deletedFiles, IQueryBuilder::PARAM_INT_ARRAY); + $deleted = $deleteQb->executeStatement(); + $this->logger->debug("{deleted} orphaned share(s) deleted", [ + 'app' => 'DeleteOrphanedSharesJob', + 'deleted' => $deleted, + ]); + return $deleted; + }, $this->db); + } + } + + private function findMissingSources(array $ids): array { + $qb = $this->db->getQueryBuilder(); + $qb->select('fileid') + ->from('filecache') + ->where($qb->expr()->in('fileid', $qb->createNamedParameter($ids, IQueryBuilder::PARAM_INT_ARRAY))); + $found = $qb->executeQuery()->fetchAll(\PDO::FETCH_COLUMN); + return array_diff($ids, $found); + } } diff --git a/lib/private/DB/QueryBuilder/ExtendedQueryBuilder.php b/lib/private/DB/QueryBuilder/ExtendedQueryBuilder.php index 8ed88198c19fc..c40cadfbdb5f6 100644 --- a/lib/private/DB/QueryBuilder/ExtendedQueryBuilder.php +++ b/lib/private/DB/QueryBuilder/ExtendedQueryBuilder.php @@ -289,8 +289,8 @@ public function executeStatement(?IDBConnection $connection = null): int { return $this->builder->executeStatement($connection); } - public function hintShardKey(string $column, mixed $value) { - $this->builder->hintShardKey($column, $value); + public function hintShardKey(string $column, mixed $value, bool $overwrite = false) { + $this->builder->hintShardKey($column, $value, $overwrite); return $this; } diff --git a/lib/private/DB/QueryBuilder/Sharded/ShardedQueryBuilder.php b/lib/private/DB/QueryBuilder/Sharded/ShardedQueryBuilder.php index 6496453a1a609..650e414096e65 100644 --- a/lib/private/DB/QueryBuilder/Sharded/ShardedQueryBuilder.php +++ b/lib/private/DB/QueryBuilder/Sharded/ShardedQueryBuilder.php @@ -296,7 +296,11 @@ private function registerOrder(string $column, string $order): void { ]; } - public function hintShardKey(string $column, mixed $value) { + public function hintShardKey(string $column, mixed $value, bool $overwrite = false) { + if ($overwrite) { + $this->primaryKeys = []; + $this->shardKeys = []; + } if ($this->shardDefinition?->isKey($column)) { $this->primaryKeys[] = $value; } diff --git a/lib/private/Preview/BackgroundCleanupJob.php b/lib/private/Preview/BackgroundCleanupJob.php index deadcd007b1af..acf7bf22f5259 100644 --- a/lib/private/Preview/BackgroundCleanupJob.php +++ b/lib/private/Preview/BackgroundCleanupJob.php @@ -16,6 +16,7 @@ use OCP\Files\NotFoundException; use OCP\Files\NotPermittedException; use OCP\IDBConnection; +use function Symfony\Component\Translation\t; class BackgroundCleanupJob extends TimedJob { /** @var IDBConnection */ @@ -64,6 +65,11 @@ private function getDeletedFiles(): \Iterator { } private function getOldPreviewLocations(): \Iterator { + if ($this->connection->getShardDefinition('filecache')) { + // sharding is new enough that we don't need to support this + return; + } + $qb = $this->connection->getQueryBuilder(); $qb->select('a.name') ->from('filecache', 'a') @@ -106,6 +112,15 @@ private function getNewPreviewLocations(): \Iterator { return []; } + if ($this->connection->getShardDefinition('filecache')) { + $chunks = $this->getAllPreviewIds($data['path'], 1000); + foreach ($chunks as $chunk) { + yield from $this->findMissingSources($chunk); + } + + return; + } + /* * This lovely like is the result of the way the new previews are stored * We take the md5 of the name (fileid) and split the first 7 chars. That way @@ -155,4 +170,46 @@ private function getNewPreviewLocations(): \Iterator { $cursor->closeCursor(); } + + private function getAllPreviewIds(string $previewRoot, int $chunkSize): \Iterator { + // See `getNewPreviewLocations` for some more info about the logic here + $like = $this->connection->escapeLikeParameter($previewRoot). '/_/_/_/_/_/_/_/%'; + + $qb = $this->connection->getQueryBuilder(); + $qb->select('name', 'fileid') + ->from('filecache') + ->where( + $qb->expr()->andX( + $qb->expr()->eq('storage', $qb->createNamedParameter($this->previewFolder->getStorageId())), + $qb->expr()->like('path', $qb->createNamedParameter($like)), + $qb->expr()->eq('mimetype', $qb->createNamedParameter($this->mimeTypeLoader->getId('httpd/unix-directory'))), + $qb->expr()->gt('fileid', $qb->createParameter('min_id')), + ) + ) + ->orderBy('fileid', 'ASC') + ->setMaxResults($chunkSize); + + $minId = 0; + while (true) { + $qb->setParameter('min_id', $minId); + $rows = $qb->executeQuery()->fetchAll(); + if (count($rows) > 0) { + $minId = $rows[count($rows) - 1]['fileid']; + yield array_map(function ($row) { + return (int)$row['name']; + }, $rows); + } else { + break; + } + } + } + + private function findMissingSources(array $ids): array { + $qb = $this->connection->getQueryBuilder(); + $qb->select('fileid') + ->from('filecache') + ->where($qb->expr()->in('fileid', $qb->createNamedParameter($ids, IQueryBuilder::PARAM_INT_ARRAY))); + $found = $qb->executeQuery()->fetchAll(\PDO::FETCH_COLUMN); + return array_diff($ids, $found); + } } diff --git a/lib/private/Repair/CleanTags.php b/lib/private/Repair/CleanTags.php index f2fc8156f29de..b7960a1def1b6 100644 --- a/lib/private/Repair/CleanTags.php +++ b/lib/private/Repair/CleanTags.php @@ -107,7 +107,7 @@ protected function deleteOrphanFileEntries(IOutput $output) { $output, '%d tags for delete files have been removed.', 'vcategory_to_object', 'objid', - 'filecache', 'fileid', 'path_hash' + 'filecache', 'fileid', 'fileid' ); } @@ -169,16 +169,17 @@ protected function deleteOrphanEntries(IOutput $output, $repairInfo, $deleteTabl $orphanItems[] = (int) $row[$deleteId]; } + $deleteQuery = $this->connection->getQueryBuilder(); + $deleteQuery->delete($deleteTable) + ->where( + $deleteQuery->expr()->eq('type', $deleteQuery->expr()->literal('files')) + ) + ->andWhere($deleteQuery->expr()->in($deleteId, $deleteQuery->createParameter('ids'))); if (!empty($orphanItems)) { $orphanItemsBatch = array_chunk($orphanItems, 200); foreach ($orphanItemsBatch as $items) { - $qb->delete($deleteTable) - ->where( - $qb->expr()->eq('type', $qb->expr()->literal('files')) - ) - ->andWhere($qb->expr()->in($deleteId, $qb->createParameter('ids'))); - $qb->setParameter('ids', $items, IQueryBuilder::PARAM_INT_ARRAY); - $qb->execute(); + $deleteQuery->setParameter('ids', $items, IQueryBuilder::PARAM_INT_ARRAY); + $deleteQuery->executeStatement(); } } diff --git a/lib/public/DB/QueryBuilder/IQueryBuilder.php b/lib/public/DB/QueryBuilder/IQueryBuilder.php index 048de26c22a1e..b673c5ef6ec77 100644 --- a/lib/public/DB/QueryBuilder/IQueryBuilder.php +++ b/lib/public/DB/QueryBuilder/IQueryBuilder.php @@ -1036,7 +1036,7 @@ public function getColumnName($column, $tableAlias = ''); * @return $this * @since 30.0.0 */ - public function hintShardKey(string $column, mixed $value); + public function hintShardKey(string $column, mixed $value, bool $overwrite = false); /** * Set the query to run across all shards if sharding is enabled. diff --git a/tests/lib/Preview/BackgroundCleanupJobTest.php b/tests/lib/Preview/BackgroundCleanupJobTest.php index c07ec42b36b34..ccd5dba69cf05 100644 --- a/tests/lib/Preview/BackgroundCleanupJobTest.php +++ b/tests/lib/Preview/BackgroundCleanupJobTest.php @@ -146,6 +146,10 @@ public function testCleanupSystemCron() { } public function testCleanupAjax() { + if ($this->connection->getShardDefinition('filecache')) { + $this->markTestSkipped("ajax cron is not supported for sharded setups"); + return; + } $files = $this->setup11Previews(); $fileIds = array_map(function (File $f) { return $f->getId(); @@ -174,6 +178,10 @@ public function testCleanupAjax() { } public function testOldPreviews() { + if ($this->connection->getShardDefinition('filecache')) { + $this->markTestSkipped("old previews are not supported for sharded setups"); + return; + } $appdata = \OC::$server->getAppDataDir('preview'); $f1 = $appdata->newFolder('123456781'); diff --git a/tests/lib/Repair/CleanTagsTest.php b/tests/lib/Repair/CleanTagsTest.php index 5e7b82d219889..80c0b2c9770a4 100644 --- a/tests/lib/Repair/CleanTagsTest.php +++ b/tests/lib/Repair/CleanTagsTest.php @@ -194,13 +194,4 @@ protected function getFileID() { $this->createdFile = (int) $this->getLastInsertID('filecache', 'fileid'); return $this->createdFile; } - - /** - * @param $tableName - * @param $idName - * @return int - */ - protected function getLastInsertID($tableName, $idName) { - return $this->connection->lastInsertId("*PREFIX*$tableName"); - } } From 57ffbb7391513835d5d29482546f41e9a88f2a57 Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Wed, 21 Aug 2024 17:18:08 +0200 Subject: [PATCH 16/19] fix: make preload custom proterties sharding compatible Signed-off-by: Robin Appelman --- apps/dav/lib/DAV/CustomPropertiesBackend.php | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/apps/dav/lib/DAV/CustomPropertiesBackend.php b/apps/dav/lib/DAV/CustomPropertiesBackend.php index c3a547ab07d7e..dde97cabf375d 100644 --- a/apps/dav/lib/DAV/CustomPropertiesBackend.php +++ b/apps/dav/lib/DAV/CustomPropertiesBackend.php @@ -364,16 +364,16 @@ private function getPublishedProperties(string $path, array $requestedProperties private function cacheDirectory(string $path, Directory $node): void { $prefix = ltrim($path . '/', '/'); $query = $this->connection->getQueryBuilder(); - $query->select('name', 'propertypath', 'propertyname', 'propertyvalue', 'valuetype') + $query->select('name', 'p.propertypath', 'p.propertyname', 'p.propertyvalue', 'p.valuetype') ->from('filecache', 'f') - ->leftJoin('f', 'properties', 'p', $query->expr()->andX( - $query->expr()->eq('propertypath', $query->func()->concat( - $query->createNamedParameter($prefix), - 'name' - )), - $query->expr()->eq('userid', $query->createNamedParameter($this->user->getUID())) - )) - ->where($query->expr()->eq('parent', $query->createNamedParameter($node->getInternalFileId(), IQueryBuilder::PARAM_INT))); + ->hintShardKey('storage', $node->getNode()->getMountPoint()->getNumericStorageId()) + ->leftJoin('f', 'properties', 'p', $query->expr()->eq('p.propertypath', $query->func()->concat( + $query->createNamedParameter($prefix), + 'f.name' + )), + ) + ->where($query->expr()->eq('parent', $query->createNamedParameter($node->getInternalFileId(), IQueryBuilder::PARAM_INT))) + ->andWhere($query->expr()->eq('p.userid', $query->createNamedParameter($this->user->getUID()))); $result = $query->executeQuery(); $propsByPath = []; From e2bff39f736fabe8e150f1593ed2ea2c6cc2ea6b Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Thu, 22 Aug 2024 14:34:37 +0200 Subject: [PATCH 17/19] fix: mark systemconfig value as not being tainted because they are implicitly trusted Signed-off-by: Robin Appelman --- lib/private/SystemConfig.php | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/lib/private/SystemConfig.php b/lib/private/SystemConfig.php index f817e327b194b..ed77526c29c68 100644 --- a/lib/private/SystemConfig.php +++ b/lib/private/SystemConfig.php @@ -114,6 +114,24 @@ public function __construct( ) { } + /** + * Since system config is admin controlled, we can tell psalm to ignore any taint + * + * @psalm-taint-escape sql + * @psalm-taint-escape html + * @psalm-taint-escape ldap + * @psalm-taint-escape callable + * @psalm-taint-escape file + * @psalm-taint-escape ssrf + * @psalm-taint-escape cookie + * @psalm-taint-escape header + * @psalm-taint-escape has_quotes + * @psalm-pure + */ + public static function trustSystemConfig(mixed $value): mixed { + return $value; + } + /** * Lists all available config keys * @return array an array of key names @@ -150,7 +168,7 @@ public function setValues(array $configs) { * @return mixed the value or $default */ public function getValue($key, $default = '') { - return $this->config->getValue($key, $default); + return $this->trustSystemConfig($this->config->getValue($key, $default)); } /** From e5a8f996bde57c2245432bee22f966b522469f4d Mon Sep 17 00:00:00 2001 From: Louis Chemineau Date: Wed, 28 Aug 2024 10:44:18 +0200 Subject: [PATCH 18/19] chore: Apply php:cs recommendations Signed-off-by: Louis Chemineau [skip ci] --- apps/files/lib/BackgroundJob/ScanFiles.php | 4 ++-- .../lib/DeleteOrphanedSharesJob.php | 2 +- lib/private/DB/ArrayResult.php | 4 ++-- .../Partitioned/JoinCondition.php | 8 ++++---- .../Partitioned/PartitionedQueryBuilder.php | 4 ++-- .../Sharded/AutoIncrementHandler.php | 20 +++++++++---------- .../Sharded/CrossShardMoveHelper.php | 8 ++++---- .../QueryBuilder/Sharded/ShardDefinition.php | 2 +- .../QueryBuilder/Sharded/ShardQueryRunner.php | 4 ++-- .../Sharded/ShardedQueryBuilder.php | 10 +++++----- lib/private/Files/Cache/Cache.php | 18 ++++++++--------- .../Partitioned/JoinConditionTest.php | 2 +- .../Sharded/SharedQueryBuilderTest.php | 4 ++-- tests/lib/Files/Cache/CacheTest.php | 14 ++++++------- .../lib/Preview/BackgroundCleanupJobTest.php | 4 ++-- tests/preseed-config.php | 12 +++++------ 16 files changed, 60 insertions(+), 60 deletions(-) diff --git a/apps/files/lib/BackgroundJob/ScanFiles.php b/apps/files/lib/BackgroundJob/ScanFiles.php index 42beb70aaf5d6..b7e6e8db10ecb 100644 --- a/apps/files/lib/BackgroundJob/ScanFiles.php +++ b/apps/files/lib/BackgroundJob/ScanFiles.php @@ -70,7 +70,7 @@ protected function runScanner(string $user): void { * @return string|false */ private function getUserToScan() { - if ($this->connection->getShardDefinition("filecache")) { + if ($this->connection->getShardDefinition('filecache')) { // for sharded filecache, the "LIMIT" from the normal query doesn't work // first we try it with a "LEFT JOIN" on mounts, this is fast, but might return a storage that isn't mounted. @@ -82,7 +82,7 @@ private function getUserToScan() { ->where($query->expr()->lt('f.size', $query->createNamedParameter(0, IQueryBuilder::PARAM_INT))) ->andWhere($query->expr()->gt('f.parent', $query->createNamedParameter(-1, IQueryBuilder::PARAM_INT))) ->setMaxResults(10) - ->groupBy("f.storage") + ->groupBy('f.storage') ->runAcrossAllShards(); $result = $query->executeQuery(); diff --git a/apps/files_sharing/lib/DeleteOrphanedSharesJob.php b/apps/files_sharing/lib/DeleteOrphanedSharesJob.php index bcbdbaba53165..be082f990bce2 100644 --- a/apps/files_sharing/lib/DeleteOrphanedSharesJob.php +++ b/apps/files_sharing/lib/DeleteOrphanedSharesJob.php @@ -120,7 +120,7 @@ private function shardingCleanup(): void { $this->atomic(function () use ($deletedFiles, $deleteQb) { $deleteQb->setParameter('ids', $deletedFiles, IQueryBuilder::PARAM_INT_ARRAY); $deleted = $deleteQb->executeStatement(); - $this->logger->debug("{deleted} orphaned share(s) deleted", [ + $this->logger->debug('{deleted} orphaned share(s) deleted', [ 'app' => 'DeleteOrphanedSharesJob', 'deleted' => $deleted, ]); diff --git a/lib/private/DB/ArrayResult.php b/lib/private/DB/ArrayResult.php index 5d094533a3f39..b567ad23d577f 100644 --- a/lib/private/DB/ArrayResult.php +++ b/lib/private/DB/ArrayResult.php @@ -37,7 +37,7 @@ public function fetch(int $fetchMode = PDO::FETCH_ASSOC) { PDO::FETCH_ASSOC => $row, PDO::FETCH_NUM => array_values($row), PDO::FETCH_COLUMN => current($row), - default => throw new \InvalidArgumentException("Fetch mode not supported for array result"), + default => throw new \InvalidArgumentException('Fetch mode not supported for array result'), }; } @@ -51,7 +51,7 @@ public function fetchAll(int $fetchMode = PDO::FETCH_ASSOC): array { PDO::FETCH_COLUMN => array_map(function ($row) { return current($row); }, $this->rows), - default => throw new \InvalidArgumentException("Fetch mode not supported for array result"), + default => throw new \InvalidArgumentException('Fetch mode not supported for array result'), }; } diff --git a/lib/private/DB/QueryBuilder/Partitioned/JoinCondition.php b/lib/private/DB/QueryBuilder/Partitioned/JoinCondition.php index ff4e1da70b966..a08858d1d6bd6 100644 --- a/lib/private/DB/QueryBuilder/Partitioned/JoinCondition.php +++ b/lib/private/DB/QueryBuilder/Partitioned/JoinCondition.php @@ -37,8 +37,8 @@ public function __construct( * @return JoinCondition */ public static function merge(array $conditions): JoinCondition { - $fromColumn = ""; - $toColumn = ""; + $fromColumn = ''; + $toColumn = ''; $fromAlias = null; $toAlias = null; $fromConditions = []; @@ -99,9 +99,9 @@ private static function parseSubCondition($condition, string $join, string $alia $isSubCondition = self::isExtraCondition($condition); if ($isSubCondition) { if (self::mentionsAlias($condition, $fromAlias)) { - return new JoinCondition("", null, "", null, [$condition], []); + return new JoinCondition('', null, '', null, [$condition], []); } else { - return new JoinCondition("", null, "", null, [], [$condition]); + return new JoinCondition('', null, '', null, [], [$condition]); } } diff --git a/lib/private/DB/QueryBuilder/Partitioned/PartitionedQueryBuilder.php b/lib/private/DB/QueryBuilder/Partitioned/PartitionedQueryBuilder.php index 8fcde0d24ae29..175b7c1a42eb7 100644 --- a/lib/private/DB/QueryBuilder/Partitioned/PartitionedQueryBuilder.php +++ b/lib/private/DB/QueryBuilder/Partitioned/PartitionedQueryBuilder.php @@ -391,10 +391,10 @@ public function executeQuery(?IDBConnection $connection = null): IResult { }, false); if ($hasNonLeftJoins) { if (is_int($this->limit)) { - throw new InvalidPartitionedQueryException("Limit is not allowed in partitioned queries"); + throw new InvalidPartitionedQueryException('Limit is not allowed in partitioned queries'); } if (is_int($this->offset)) { - throw new InvalidPartitionedQueryException("Offset is not allowed in partitioned queries"); + throw new InvalidPartitionedQueryException('Offset is not allowed in partitioned queries'); } } } diff --git a/lib/private/DB/QueryBuilder/Sharded/AutoIncrementHandler.php b/lib/private/DB/QueryBuilder/Sharded/AutoIncrementHandler.php index 553644def4e1b..d40934669d741 100644 --- a/lib/private/DB/QueryBuilder/Sharded/AutoIncrementHandler.php +++ b/lib/private/DB/QueryBuilder/Sharded/AutoIncrementHandler.php @@ -29,13 +29,13 @@ public function __construct( private ShardConnectionManager $shardConnectionManager, ) { if (PHP_INT_SIZE < 8) { - throw new \Exception("sharding is only supported with 64bit php"); + throw new \Exception('sharding is only supported with 64bit php'); } } private function getCache(): IMemcache { if(is_null($this->cache)) { - $cache = $this->cacheFactory->createDistributed("shared_autoincrement"); + $cache = $this->cacheFactory->createDistributed('shared_autoincrement'); if ($cache instanceof IMemcache) { $this->cache = $cache; } else { @@ -61,7 +61,7 @@ public function getNextPrimaryKey(ShardDefinition $shardDefinition, int $shard): $next = $this->getNextInner($shardDefinition); if ($next !== null) { if ($next > ShardDefinition::MAX_PRIMARY_KEY) { - throw new \Exception("Max primary key of " . ShardDefinition::MAX_PRIMARY_KEY . " exceeded"); + throw new \Exception('Max primary key of ' . ShardDefinition::MAX_PRIMARY_KEY . ' exceeded'); } // we encode the shard the primary key was originally inserted into to allow guessing the shard by primary key later on return ($next << 8) | $shard; @@ -69,7 +69,7 @@ public function getNextPrimaryKey(ShardDefinition $shardDefinition, int $shard): $retries++; } } - throw new \Exception("Failed to get next primary key"); + throw new \Exception('Failed to get next primary key'); } /** @@ -88,7 +88,7 @@ private function getNextInner(ShardDefinition $shardDefinition): ?int { // if that is not the case we find the highest used id in the database increment it, and save it in the cache // prevent inc from returning `1` if the key doesn't exist by setting it to a non-numeric value - $cache->add($shardDefinition->table, "empty-placeholder", self::TTL); + $cache->add($shardDefinition->table, 'empty-placeholder', self::TTL); $next = $cache->inc($shardDefinition->table); if ($cache instanceof IMemcacheTTL) { @@ -101,7 +101,7 @@ private function getNextInner(ShardDefinition $shardDefinition): ?int { return $next; } elseif (is_int($next)) { // we hit the edge case, so invalidate the cached value - if (!$cache->cas($shardDefinition->table, $next, "empty-placeholder")) { + if (!$cache->cas($shardDefinition->table, $next, 'empty-placeholder')) { // someone else is changing the value concurrently, give up and retry return null; } @@ -110,7 +110,7 @@ private function getNextInner(ShardDefinition $shardDefinition): ?int { // discard the encoded initial shard $current = $this->getMaxFromDb($shardDefinition) >> 8; $next = max($current, self::MIN_VALID_KEY) + 1; - if ($cache->cas($shardDefinition->table, "empty-placeholder", $next)) { + if ($cache->cas($shardDefinition->table, 'empty-placeholder', $next)) { return $next; } @@ -120,11 +120,11 @@ private function getNextInner(ShardDefinition $shardDefinition): ?int { return $next; } elseif(is_int($next)) { // key got cleared, invalidate and retry - $cache->cas($shardDefinition->table, $next, "empty-placeholder"); + $cache->cas($shardDefinition->table, $next, 'empty-placeholder'); return null; } else { // cleanup any non-numeric value other than the placeholder if that got stored somehow - $cache->ncad($shardDefinition->table, "empty-placeholder"); + $cache->ncad($shardDefinition->table, 'empty-placeholder'); // retry return null; } @@ -140,7 +140,7 @@ private function getMaxFromDb(ShardDefinition $shardDefinition): int { $query = $connection->getQueryBuilder(); $query->select($shardDefinition->primaryKey) ->from($shardDefinition->table) - ->orderBy($shardDefinition->primaryKey, "DESC") + ->orderBy($shardDefinition->primaryKey, 'DESC') ->setMaxResults(1); $result = $query->executeQuery()->fetchOne(); if ($result) { diff --git a/lib/private/DB/QueryBuilder/Sharded/CrossShardMoveHelper.php b/lib/private/DB/QueryBuilder/Sharded/CrossShardMoveHelper.php index ffc95e4e54c05..45f24e326858c 100644 --- a/lib/private/DB/QueryBuilder/Sharded/CrossShardMoveHelper.php +++ b/lib/private/DB/QueryBuilder/Sharded/CrossShardMoveHelper.php @@ -83,13 +83,13 @@ public function loadItems(IDBConnection $connection, string $table, string $prim $query = $connection->getQueryBuilder(); $query->select('*') ->from($table) - ->where($query->expr()->in($primaryColumn, $query->createParameter("keys"))); + ->where($query->expr()->in($primaryColumn, $query->createParameter('keys'))); $chunks = array_chunk($primaryKeys, 1000); $results = []; foreach ($chunks as $chunk) { - $query->setParameter("keys", $chunk, IQueryBuilder::PARAM_INT_ARRAY); + $query->setParameter('keys', $chunk, IQueryBuilder::PARAM_INT_ARRAY); $results = array_merge($results, $query->execute()->fetchAll()); } @@ -151,11 +151,11 @@ public function updateItems(IDBConnection $connection, string $table, string $sh public function deleteItems(IDBConnection $connection, string $table, string $primaryColumn, array $primaryKeys): void { $query = $connection->getQueryBuilder(); $query->delete($table) - ->where($query->expr()->in($primaryColumn, $query->createParameter("keys"))); + ->where($query->expr()->in($primaryColumn, $query->createParameter('keys'))); $chunks = array_chunk($primaryKeys, 1000); foreach ($chunks as $chunk) { - $query->setParameter("keys", $chunk, IQueryBuilder::PARAM_INT_ARRAY); + $query->setParameter('keys', $chunk, IQueryBuilder::PARAM_INT_ARRAY); $query->executeStatement(); } } diff --git a/lib/private/DB/QueryBuilder/Sharded/ShardDefinition.php b/lib/private/DB/QueryBuilder/Sharded/ShardDefinition.php index 5661ca079e1c1..ebccbb639a624 100644 --- a/lib/private/DB/QueryBuilder/Sharded/ShardDefinition.php +++ b/lib/private/DB/QueryBuilder/Sharded/ShardDefinition.php @@ -41,7 +41,7 @@ public function __construct( public array $shards = [], ) { if (count($this->shards) >= self::MAX_SHARDS) { - throw new \Exception("Only allowed maximum of " . self::MAX_SHARDS . " shards allowed"); + throw new \Exception('Only allowed maximum of ' . self::MAX_SHARDS . ' shards allowed'); } } diff --git a/lib/private/DB/QueryBuilder/Sharded/ShardQueryRunner.php b/lib/private/DB/QueryBuilder/Sharded/ShardQueryRunner.php index 22b86a018b3dc..51cd055e801fa 100644 --- a/lib/private/DB/QueryBuilder/Sharded/ShardQueryRunner.php +++ b/lib/private/DB/QueryBuilder/Sharded/ShardQueryRunner.php @@ -135,7 +135,7 @@ public function executeQuery( if ($cmp === 0) { continue; } - if ($sort['order'] === "DESC") { + if ($sort['order'] === 'DESC') { $cmp = -$cmp; } return $cmp; @@ -166,7 +166,7 @@ public function executeQuery( */ public function executeStatement(IQueryBuilder $query, bool $allShards, array $shardKeys, array $primaryKeys): int { if ($query->getType() === \Doctrine\DBAL\Query\QueryBuilder::INSERT) { - throw new \Exception("insert queries need special handling"); + throw new \Exception('insert queries need special handling'); } $shards = $this->getShards($allShards, $shardKeys); diff --git a/lib/private/DB/QueryBuilder/Sharded/ShardedQueryBuilder.php b/lib/private/DB/QueryBuilder/Sharded/ShardedQueryBuilder.php index 650e414096e65..e7bc70ce440c3 100644 --- a/lib/private/DB/QueryBuilder/Sharded/ShardedQueryBuilder.php +++ b/lib/private/DB/QueryBuilder/Sharded/ShardedQueryBuilder.php @@ -36,7 +36,7 @@ class ShardedQueryBuilder extends ExtendedQueryBuilder { private ?int $updateShardKey = null; private ?int $limit = null; private ?int $offset = null; - /** @var array{column: string, order: string}[] */ + /** @var array{column: string, order: string}[] */ private array $sortList = []; private string $mainTable = ''; @@ -276,13 +276,13 @@ public function setFirstResult($firstResult) { } public function addOrderBy($sort, $order = null) { - $this->registerOrder((string) $sort, (string)$order ?? "ASC"); + $this->registerOrder((string)$sort, (string)$order ?? 'ASC'); return parent::orderBy($sort, $order); } public function orderBy($sort, $order = null) { $this->sortList = []; - $this->registerOrder((string) $sort, (string)$order ?? "ASC"); + $this->registerOrder((string)$sort, (string)$order ?? 'ASC'); return parent::orderBy($sort, $order); } @@ -329,7 +329,7 @@ public function validate(): void { } if ($this->shardDefinition && !$this->allShards) { if (empty($this->getShardKeys()) && empty($this->getPrimaryKeys())) { - throw new InvalidShardedQueryException("No shard key or primary key set for query"); + throw new InvalidShardedQueryException('No shard key or primary key set for query'); } } if ($this->shardDefinition && $this->updateShardKey) { @@ -347,7 +347,7 @@ public function validate(): void { }, $oldShardKeys))); $newShard = $this->shardDefinition->getShardForKey((int)$newShardKey); if ($oldShards === [$newShard]) { - throw new InvalidShardedQueryException("Update statement would move rows to a different shard"); + throw new InvalidShardedQueryException('Update statement would move rows to a different shard'); } } } diff --git a/lib/private/Files/Cache/Cache.php b/lib/private/Files/Cache/Cache.php index b18b576a7a145..d866c8e6bcb14 100644 --- a/lib/private/Files/Cache/Cache.php +++ b/lib/private/Files/Cache/Cache.php @@ -1220,9 +1220,9 @@ private function moveFromStorageSharded(ShardDefinition $shardDefinition, ICache $sourceConnection = $helper->getConnection($shardDefinition, $sourceCache->getNumericStorageId()); $targetConnection = $helper->getConnection($shardDefinition, $this->getNumericStorageId()); - $cacheItems = $helper->loadItems($sourceConnection, "filecache", "fileid", $fileIds); - $extendedItems = $helper->loadItems($sourceConnection, "filecache_extended", "fileid", $fileIds); - $metadataItems = $helper->loadItems($sourceConnection, "files_metadata", "file_id", $fileIds); + $cacheItems = $helper->loadItems($sourceConnection, 'filecache', 'fileid', $fileIds); + $extendedItems = $helper->loadItems($sourceConnection, 'filecache_extended', 'fileid', $fileIds); + $metadataItems = $helper->loadItems($sourceConnection, 'files_metadata', 'file_id', $fileIds); // when moving from an encrypted storage to a non-encrypted storage remove the `encrypted` mark $removeEncryptedFlag = ($sourceCache instanceof Cache && $sourceCache->hasEncryptionWrapper()) && !$this->hasEncryptionWrapper(); @@ -1246,9 +1246,9 @@ private function moveFromStorageSharded(ShardDefinition $shardDefinition, ICache $targetConnection->beginTransaction(); try { - $helper->saveItems($targetConnection, "filecache", $cacheItems); - $helper->saveItems($targetConnection, "filecache_extended", $extendedItems); - $helper->saveItems($targetConnection, "files_metadata", $metadataItems); + $helper->saveItems($targetConnection, 'filecache', $cacheItems); + $helper->saveItems($targetConnection, 'filecache_extended', $extendedItems); + $helper->saveItems($targetConnection, 'files_metadata', $metadataItems); } catch (\Exception $e) { $targetConnection->rollback(); throw $e; @@ -1257,9 +1257,9 @@ private function moveFromStorageSharded(ShardDefinition $shardDefinition, ICache $sourceConnection->beginTransaction(); try { - $helper->deleteItems($sourceConnection, "filecache", "fileid", $fileIds); - $helper->deleteItems($sourceConnection, "filecache_extended", "fileid", $fileIds); - $helper->deleteItems($sourceConnection, "files_metadata", "file_id", $fileIds); + $helper->deleteItems($sourceConnection, 'filecache', 'fileid', $fileIds); + $helper->deleteItems($sourceConnection, 'filecache_extended', 'fileid', $fileIds); + $helper->deleteItems($sourceConnection, 'files_metadata', 'file_id', $fileIds); } catch (\Exception $e) { $targetConnection->rollback(); $sourceConnection->rollBack(); diff --git a/tests/lib/DB/QueryBuilder/Partitioned/JoinConditionTest.php b/tests/lib/DB/QueryBuilder/Partitioned/JoinConditionTest.php index 56a8e5783aa2b..a8ebf11c2c2f4 100644 --- a/tests/lib/DB/QueryBuilder/Partitioned/JoinConditionTest.php +++ b/tests/lib/DB/QueryBuilder/Partitioned/JoinConditionTest.php @@ -47,7 +47,7 @@ private function getBuilder(string $platform): IQueryBuilder { public function testParseCondition(string $platform): void { $query = $this->getBuilder($platform); $param1 = $query->createNamedParameter('files'); - $param2 = $query->createNamedParameter("test"); + $param2 = $query->createNamedParameter('test'); $condition = $query->expr()->andX( $query->expr()->eq('tagmap.categoryid', 'tag.id'), $query->expr()->eq('tag.type', $param1), diff --git a/tests/lib/DB/QueryBuilder/Sharded/SharedQueryBuilderTest.php b/tests/lib/DB/QueryBuilder/Sharded/SharedQueryBuilderTest.php index 83261729e5e56..7af93bbbe590a 100644 --- a/tests/lib/DB/QueryBuilder/Sharded/SharedQueryBuilderTest.php +++ b/tests/lib/DB/QueryBuilder/Sharded/SharedQueryBuilderTest.php @@ -91,7 +91,7 @@ public function testValidateWithNoKey() { $this->expectException(InvalidShardedQueryException::class); $query->validate(); - $this->fail("exception expected"); + $this->fail('exception expected'); } public function testValidateNonSharedTable() { @@ -114,7 +114,7 @@ public function testGetShardKeyMultipleSingleParam() { $query->expr()->eq('storage', $query->createNamedParameter(10, IQueryBuilder::PARAM_INT)), $query->expr()->andX( $query->expr()->eq('storage', $query->createNamedParameter(11, IQueryBuilder::PARAM_INT)), - $query->expr()->like('path', $query->createNamedParameter("foo/%")) + $query->expr()->like('path', $query->createNamedParameter('foo/%')) ) ) )); diff --git a/tests/lib/Files/Cache/CacheTest.php b/tests/lib/Files/Cache/CacheTest.php index 4afc1a0d8c9b3..12955f662cc00 100644 --- a/tests/lib/Files/Cache/CacheTest.php +++ b/tests/lib/Files/Cache/CacheTest.php @@ -487,17 +487,17 @@ public function testMoveFromCache() { $data = ['size' => 100, 'mtime' => 50, 'mimetype' => 'foo/bar']; $folderData = ['size' => 100, 'mtime' => 50, 'mimetype' => 'httpd/unix-directory']; - $this->cache2->put("folder", $folderData); - $this->cache2->put("folder/sub", $data); + $this->cache2->put('folder', $folderData); + $this->cache2->put('folder/sub', $data); - $this->cache->moveFromCache($this->cache2, "folder", "targetfolder"); + $this->cache->moveFromCache($this->cache2, 'folder', 'targetfolder'); - $this->assertFalse($this->cache2->inCache("folder")); - $this->assertFalse($this->cache2->inCache("folder/sub")); + $this->assertFalse($this->cache2->inCache('folder')); + $this->assertFalse($this->cache2->inCache('folder/sub')); - $this->assertTrue($this->cache->inCache("targetfolder")); - $this->assertTrue($this->cache->inCache("targetfolder/sub")); + $this->assertTrue($this->cache->inCache('targetfolder')); + $this->assertTrue($this->cache->inCache('targetfolder/sub')); } public function testGetIncomplete() { diff --git a/tests/lib/Preview/BackgroundCleanupJobTest.php b/tests/lib/Preview/BackgroundCleanupJobTest.php index ccd5dba69cf05..82f1979772800 100644 --- a/tests/lib/Preview/BackgroundCleanupJobTest.php +++ b/tests/lib/Preview/BackgroundCleanupJobTest.php @@ -147,7 +147,7 @@ public function testCleanupSystemCron() { public function testCleanupAjax() { if ($this->connection->getShardDefinition('filecache')) { - $this->markTestSkipped("ajax cron is not supported for sharded setups"); + $this->markTestSkipped('ajax cron is not supported for sharded setups'); return; } $files = $this->setup11Previews(); @@ -179,7 +179,7 @@ public function testCleanupAjax() { public function testOldPreviews() { if ($this->connection->getShardDefinition('filecache')) { - $this->markTestSkipped("old previews are not supported for sharded setups"); + $this->markTestSkipped('old previews are not supported for sharded setups'); return; } $appdata = \OC::$server->getAppDataDir('preview'); diff --git a/tests/preseed-config.php b/tests/preseed-config.php index 7c341278027ce..f9c1593946946 100644 --- a/tests/preseed-config.php +++ b/tests/preseed-config.php @@ -81,19 +81,19 @@ if (getenv('SHARDING') == '1') { $CONFIG['dbsharding'] = [ - "filecache" => [ - "shards" => [ + 'filecache' => [ + 'shards' => [ [ - "port" => 5001, + 'port' => 5001, ], [ - "port" => 5002, + 'port' => 5002, ], [ - "port" => 5003, + 'port' => 5003, ], [ - "port" => 5004, + 'port' => 5004, ], ] ] From 140b36fcea956b149b25d1fbd3fc961a20b819eb Mon Sep 17 00:00:00 2001 From: Louis Chemineau Date: Wed, 28 Aug 2024 12:28:50 +0200 Subject: [PATCH 19/19] fix: Backport to 30 Signed-off-by: Louis Chemineau --- apps/files/lib/Command/DeleteOrphanedFiles.php | 14 ++++---------- lib/private/DB/QueryBuilder/QueryBuilder.php | 10 ++++++++++ tests/lib/Repair/CleanTagsTest.php | 4 ++-- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/apps/files/lib/Command/DeleteOrphanedFiles.php b/apps/files/lib/Command/DeleteOrphanedFiles.php index 8f93242b255b9..047e26f11eca7 100644 --- a/apps/files/lib/Command/DeleteOrphanedFiles.php +++ b/apps/files/lib/Command/DeleteOrphanedFiles.php @@ -48,16 +48,10 @@ public function execute(InputInterface $input, OutputInterface $output): int { $deleteQuery->delete('filecache') ->where($deleteQuery->expr()->in('storage', $deleteQuery->createParameter('storage_ids'))); - $deletedInLastChunk = self::CHUNK_SIZE; - while ($deletedInLastChunk === self::CHUNK_SIZE) { - $deletedInLastChunk = 0; - $result = $query->execute(); - while ($row = $result->fetch()) { - $deletedInLastChunk++; - $deletedEntries += $deleteQuery->setParameter('objectid', (int) $row['fileid']) - ->execute(); - } - $result->closeCursor(); + $deletedStorageChunks = array_chunk($deletedStorages, self::CHUNK_SIZE); + foreach ($deletedStorageChunks as $deletedStorageChunk) { + $deleteQuery->setParameter('storage_ids', $deletedStorageChunk, IQueryBuilder::PARAM_INT_ARRAY); + $deletedEntries += $deleteQuery->executeStatement(); } $output->writeln("$deletedEntries orphaned file cache entries deleted"); diff --git a/lib/private/DB/QueryBuilder/QueryBuilder.php b/lib/private/DB/QueryBuilder/QueryBuilder.php index 912d48cd34ece..ec2581d7e9a8b 100644 --- a/lib/private/DB/QueryBuilder/QueryBuilder.php +++ b/lib/private/DB/QueryBuilder/QueryBuilder.php @@ -1365,4 +1365,14 @@ public function quoteAlias($alias) { return $this->helper->quoteColumnName($alias); } + + public function hintShardKey(string $column, mixed $value, bool $overwrite = false) { + return $this; + } + + public function runAcrossAllShards() { + // noop + return $this; + } + } diff --git a/tests/lib/Repair/CleanTagsTest.php b/tests/lib/Repair/CleanTagsTest.php index 80c0b2c9770a4..11430cc121550 100644 --- a/tests/lib/Repair/CleanTagsTest.php +++ b/tests/lib/Repair/CleanTagsTest.php @@ -142,7 +142,7 @@ protected function addTagCategory($category, $type, $user = 'TestRepairCleanTags ]) ->execute(); - return (int) $this->getLastInsertID('vcategory', 'id'); + return $qb->getLastInsertId(); } /** @@ -191,7 +191,7 @@ protected function getFileID() { ]) ->execute(); - $this->createdFile = (int) $this->getLastInsertID('filecache', 'fileid'); + $this->createdFile = $qb->getLastInsertId(); return $this->createdFile; } }