Skip to content

Commit

Permalink
Version 1.4.0 (#23)
Browse files Browse the repository at this point in the history
  • Loading branch information
Toflar authored Dec 20, 2021
1 parent 5e49557 commit 67ede9c
Show file tree
Hide file tree
Showing 12 changed files with 137 additions and 107 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on:
pull_request: ~
push:
branches:
- main
- '*'
tags:
- '*'
schedule:
Expand Down Expand Up @@ -35,7 +35,7 @@ jobs:
strategy:
fail-fast: false
matrix:
php: [7.2, 7.3, 7.4, 8.0]
php: [7.2, 7.3, 7.4, 8.0, 8.1]
steps:
- name: Setup PHP
uses: shivammathur/setup-php@v2
Expand Down
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
/composer.lock
/phpunit.xml
/.phpunit.result.cache
/.php_cs
/.php_cs.cache
/.php-cs-fixer.php
/.php-cs-fixer.cache
48 changes: 48 additions & 0 deletions .php-cs-fixer.dist.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
<?php

$date = date('Y');

$header = <<<EOF
Escargot
@copyright Copyright (c) 2019 - $date, terminal42 gmbh
@author terminal42 gmbh <[email protected]>
@license MIT
EOF;

$finder = PhpCsFixer\Finder::create()
->in([__DIR__.'/src', __DIR__.'/tests'])
;

$config = new PhpCsFixer\Config();
$config
->setRiskyAllowed(true)
->setRules([
'@Symfony' => true,
'@Symfony:risky' => true,
'array_syntax' => ['syntax' => 'short'],
'combine_consecutive_unsets' => true,
'declare_strict_types' => true,
'general_phpdoc_annotation_remove' => true,
'header_comment' => ['header' => $header],
'heredoc_to_nowdoc' => true,
'no_extra_blank_lines' => true,
'no_unreachable_default_argument_value' => true,
'no_useless_else' => true,
'no_useless_return' => true,
'no_superfluous_phpdoc_tags' => true,
'ordered_class_elements' => true,
'ordered_imports' => true,
'php_unit_strict' => true,
'phpdoc_add_missing_param_annotation' => true,
'phpdoc_order' => true,
'psr_autoloading' => true,
'strict_comparison' => true,
'strict_param' => true,
'native_function_invocation' => ['include' => ['@compiler_optimized']],
'void_return' => true,
])
->setFinder($finder)
;

return $config;
43 changes: 0 additions & 43 deletions .php_cs.dist

This file was deleted.

14 changes: 7 additions & 7 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,20 @@
"source": "https://github.com/terminal42/escargot"
},
"require": {
"php": "^7.2 | ^8.0",
"php": "^7.2 || ^8.0",
"ext-simplexml": "*",
"nyholm/psr7": "^1.1",
"psr/http-message": "^1.0",
"psr/log": "^1.1",
"symfony/dom-crawler": "^4.4 | ^5.0",
"symfony/event-dispatcher": "^4.4 | ^5.0",
"symfony/http-client": "^4.4 | ^5.0",
"symfony/dom-crawler": "^4.4 || ^5.0 || ^6.0",
"symfony/event-dispatcher": "^4.4 || ^5.0 || ^6.0",
"symfony/http-client": "^4.4 || ^5.0 || ^6.0",
"webignition/robots-txt-file": "^3.0"
},
"require-dev": {
"doctrine/dbal": "^2.10",
"symfony/finder": "^4.4 | ^5.0",
"symfony/phpunit-bridge": "^5.1.8"
"doctrine/dbal": "^2.13 || ^3.0",
"symfony/finder": "^4.4 || ^5.0 || ^6.0",
"symfony/phpunit-bridge": "^5.1.8 || ^6.0"
},
"config": {
"preferred-install": {
Expand Down
4 changes: 2 additions & 2 deletions src/BaseUriCollection.php
Original file line number Diff line number Diff line change
Expand Up @@ -83,15 +83,15 @@ public function all(): array
/**
* {@inheritdoc}
*/
public function getIterator()
public function getIterator(): \Traversable
{
return new \ArrayIterator($this->all());
}

/**
* {@inheritdoc}
*/
public function count()
public function count(): int
{
return \count($this->all());
}
Expand Down
2 changes: 1 addition & 1 deletion src/CrawlUri.php
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ public function __toString(): string
(string) $this->getUri(),
$this->getLevel(),
$this->isProcessed() ? 'yes' : 'no',
(string) ($this->getFoundOn() ? $this->getFoundOn() : 'root'),
(string) ($this->getFoundOn() ?: 'root'),
$this->getTags() ? implode(', ', $this->getTags()) : 'none'
);
}
Expand Down
104 changes: 64 additions & 40 deletions src/Queue/DoctrineQueue.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,8 @@
namespace Terminal42\Escargot\Queue;

use Doctrine\DBAL\Connection;
use Doctrine\DBAL\Schema\Comparator;
use Doctrine\DBAL\Schema\Schema;
use Doctrine\DBAL\Schema\Synchronizer\SchemaSynchronizer;
use Doctrine\DBAL\Schema\Synchronizer\SingleDatabaseSynchronizer;
use Doctrine\DBAL\Types\Types;
use Psr\Http\Message\UriInterface;
use Terminal42\Escargot\BaseUriCollection;
Expand All @@ -39,17 +38,11 @@ final class DoctrineQueue implements QueueInterface
*/
private $tableName;

/**
* @var SchemaSynchronizer
*/
private $schemaSynchronizer;

public function __construct(Connection $connection, \Closure $jobIdGenerator, ?string $tableName = null, ?SchemaSynchronizer $schemaSynchronizer = null)
public function __construct(Connection $connection, \Closure $jobIdGenerator, ?string $tableName = null)
{
$this->connection = $connection;
$this->jobIdGenerator = $jobIdGenerator;
$this->tableName = $tableName ?? 'escargot';
$this->schemaSynchronizer = $schemaSynchronizer ?? new SingleDatabaseSynchronizer($connection);
}

public function createJobId(BaseUriCollection $baseUris): string
Expand All @@ -69,20 +62,21 @@ public function isJobIdValid(string $jobId): bool
->select('COUNT(job_id) as count')
->from($this->tableName)
->where('job_id = :jobId')
->setParameter(':jobId', $jobId, Types::STRING)
->setParameter('jobId', $jobId, Types::STRING)
->setMaxResults(1);
$stmt = method_exists($queryBuilder, 'executeQuery') ? $queryBuilder->executeQuery() : $queryBuilder->execute();

return (bool) $queryBuilder->execute()->fetchColumn();
return (bool) $stmt->fetchOne();
}

public function deleteJobId(string $jobId): void
{
$queryBuilder = $this->connection->createQueryBuilder()
->delete($this->tableName)
->where('job_id = :jobId')
->setParameter(':jobId', $jobId, Types::STRING);
->setParameter('jobId', $jobId, Types::STRING);

$queryBuilder->execute();
method_exists($queryBuilder, 'executeQuery') ? $queryBuilder->executeQuery() : $queryBuilder->execute();
}

public function getBaseUris(string $jobId): BaseUriCollection
Expand All @@ -94,13 +88,15 @@ public function getBaseUris(string $jobId): BaseUriCollection
->from($this->tableName)
->where('job_id = :jobId')
->andWhere('level = :level')
->setParameter(':jobId', $jobId, Types::STRING)
->setParameter(':level', 0, Types::INTEGER);
->setParameter('jobId', $jobId, Types::STRING)
->setParameter('level', 0, Types::INTEGER);

$stmt = method_exists($queryBuilder, 'executeQuery') ? $queryBuilder->executeQuery() : $queryBuilder->execute();

$uris = $queryBuilder->execute()->fetchAll(\PDO::FETCH_COLUMN);
$rows = $stmt->fetchAllAssociative();

foreach ($uris as $uri) {
$baseUris->add(HttpUriFactory::create($uri));
foreach ($rows as $row) {
$baseUris->add(HttpUriFactory::create($row['uri']));
}

return $baseUris;
Expand All @@ -113,11 +109,13 @@ public function get(string $jobId, UriInterface $uri): ?CrawlUri
->from($this->tableName)
->where('job_id = :jobId')
->andWhere('uri_hash = :uri_hash')
->setParameter(':jobId', $jobId, Types::STRING)
->setParameter(':uri_hash', $this->getUriHash($uri), Types::STRING)
->setParameter('jobId', $jobId, Types::STRING)
->setParameter('uri_hash', $this->getUriHash($uri), Types::STRING)
->setMaxResults(1);

$data = $queryBuilder->execute()->fetch();
$stmt = method_exists($queryBuilder, 'executeQuery') ? $queryBuilder->executeQuery() : $queryBuilder->execute();

$data = $stmt->fetchAssociative();

if (false === $data) {
return null;
Expand All @@ -142,9 +140,9 @@ public function add(string $jobId, CrawlUri $crawlUri): void
'processed' => ':processed',
'tags' => ':tags',
])
->setParameter(':uri', (string) $crawlUri->getUri(), Types::STRING)
->setParameter(':level', (int) $crawlUri->getLevel(), Types::INTEGER)
->setParameter(':foundOn', $crawlUri->getFoundOn(), Types::STRING);
->setParameter('uri', (string) $crawlUri->getUri(), Types::STRING)
->setParameter('level', $crawlUri->getLevel(), Types::INTEGER)
->setParameter('foundOn', $crawlUri->getFoundOn(), Types::STRING);
} else {
$queryBuilder
->update($this->tableName)
Expand All @@ -155,12 +153,12 @@ public function add(string $jobId, CrawlUri $crawlUri): void
}

$queryBuilder
->setParameter(':jobId', $jobId, Types::STRING)
->setParameter(':uri_hash', $this->getUriHash($crawlUri->getUri()), Types::STRING)
->setParameter(':processed', $crawlUri->isProcessed(), Types::BOOLEAN)
->setParameter(':tags', implode(',', $crawlUri->getTags()), Types::TEXT);
->setParameter('jobId', $jobId, Types::STRING)
->setParameter('uri_hash', $this->getUriHash($crawlUri->getUri()), Types::STRING)
->setParameter('processed', $crawlUri->isProcessed(), Types::BOOLEAN)
->setParameter('tags', implode(',', $crawlUri->getTags()), Types::TEXT);

$queryBuilder->execute();
method_exists($queryBuilder, 'executeQuery') ? $queryBuilder->executeQuery() : $queryBuilder->execute();
}

public function getNext(string $jobId, int $skip = 0): ?CrawlUri
Expand All @@ -171,15 +169,16 @@ public function getNext(string $jobId, int $skip = 0): ?CrawlUri
->where('job_id = :jobId')
->andWhere('processed = :processed')
->orderBy('id', 'ASC')
->setParameter(':jobId', $jobId, Types::STRING)
->setParameter(':processed', false, Types::BOOLEAN)
->setParameter('jobId', $jobId, Types::STRING)
->setParameter('processed', false, Types::BOOLEAN)
->setMaxResults(1);

if ($skip > 0) {
$queryBuilder->setFirstResult($skip);
}

$data = $queryBuilder->execute()->fetch();
$stmt = method_exists($queryBuilder, 'executeQuery') ? $queryBuilder->executeQuery() : $queryBuilder->execute();
$data = $stmt->fetchAssociative();

if (false === $data) {
return null;
Expand All @@ -194,10 +193,12 @@ public function countAll(string $jobId): int
->select('COUNT(job_id) as count')
->from($this->tableName)
->where('job_id = :jobId')
->setParameter(':jobId', $jobId, Types::STRING)
->setParameter('jobId', $jobId, Types::STRING)
->setMaxResults(1);

return (int) $queryBuilder->execute()->fetchColumn();
$stmt = method_exists($queryBuilder, 'executeQuery') ? $queryBuilder->executeQuery() : $queryBuilder->execute();

return (int) $stmt->fetchOne();
}

public function countPending(string $jobId): int
Expand All @@ -207,11 +208,13 @@ public function countPending(string $jobId): int
->from($this->tableName)
->where('job_id = :jobId')
->andWhere('processed = :processed')
->setParameter(':jobId', $jobId, Types::STRING)
->setParameter(':processed', false, Types::BOOLEAN)
->setParameter('jobId', $jobId, Types::STRING)
->setParameter('processed', false, Types::BOOLEAN)
->setMaxResults(1);

return (int) $queryBuilder->execute()->fetchColumn();
$stmt = method_exists($queryBuilder, 'executeQuery') ? $queryBuilder->executeQuery() : $queryBuilder->execute();

return (int) $stmt->fetchOne();
}

public function getAll(string $jobId): \Generator
Expand All @@ -221,8 +224,11 @@ public function getAll(string $jobId): \Generator
->from($this->tableName)
->where('job_id = :jobId')
->orderBy('id', 'ASC')
->setParameter(':jobId', $jobId, Types::STRING);
$allData = $queryBuilder->execute()->fetchAll();
->setParameter('jobId', $jobId, Types::STRING);

$stmt = method_exists($queryBuilder, 'executeQuery') ? $queryBuilder->executeQuery() : $queryBuilder->execute();

$allData = $stmt->fetchAllAssociative();

foreach ($allData as $data) {
yield $this->createCrawlUriFromRow($data);
Expand Down Expand Up @@ -267,7 +273,25 @@ public function createSchema(): void
$table->addIndex(['uri']);
$table->addIndex(['processed']);

$this->schemaSynchronizer->createSchema($schema);
$schemaManager = method_exists($this->connection, 'createSchemaManager') ?
$this->connection->createSchemaManager() :
$this->connection->getSchemaManager()
;

if (!$schemaManager->tablesExist($this->tableName)) {
$queries = $this->connection->getDatabasePlatform()->getCreateTableSQL($table);
} else {
$comparator = method_exists($schemaManager, 'createComparator') ?
$schemaManager->createComparator() :
new Comparator()
;
$tableDiff = $comparator->diffTable($schemaManager->listTableDetails($this->tableName), $table);
$queries = $this->connection->getDatabasePlatform()->getAlterTableSQL($tableDiff);
}

foreach ($queries as $query) {
$this->connection->executeQuery($query);
}
}

/**
Expand Down
Loading

0 comments on commit 67ede9c

Please sign in to comment.