Skip to content

Commit 0195768

Browse files
committed
TASK: Improve performance and memory usage during workspace indexing job creation
1 parent e5d1156 commit 0195768

File tree

2 files changed

+45
-8
lines changed

2 files changed

+45
-8
lines changed

Classes/Flowpack/ElasticSearch/ContentRepositoryQueueIndexer/Command/NodeIndexQueueCommandController.php

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
use TYPO3\Flow\Exception;
1717
use TYPO3\Flow\Persistence\PersistenceManagerInterface;
1818
use TYPO3\Jobqueue\Common\Job\JobManager;
19+
use TYPO3\TYPO3CR\Domain\Factory\NodeFactory;
1920
use TYPO3\TYPO3CR\Domain\Model\NodeData;
2021
use TYPO3\TYPO3CR\Domain\Model\NodeInterface;
2122
use TYPO3\TYPO3CR\Domain\Repository\WorkspaceRepository;
@@ -34,6 +35,12 @@ class NodeIndexQueueCommandController extends CommandController {
3435
*/
3536
protected $jobManager;
3637

38+
/**
39+
* @var PersistenceManagerInterface
40+
* @Flow\Inject
41+
*/
42+
protected $persistenceManager;
43+
3744
/**
3845
* @Flow\Inject
3946
* @var NodeTypeMappingBuilder
@@ -65,6 +72,8 @@ class NodeIndexQueueCommandController extends CommandController {
6572
protected $logger;
6673

6774
/**
75+
* Index all nodes by creating a new index and when everything was completed, switch the index alias.
76+
*
6877
* @param string $workspace
6978
*/
7079
public function buildCommand($workspace = NULL) {
@@ -97,22 +106,26 @@ protected function indexWorkspace($workspaceName, $indexPostfix) {
97106
$offset = 0;
98107
$batchSize = 100;
99108
while (TRUE) {
100-
$result = $this->nodeDataRepository->findAllBySiteAndWorkspace($workspaceName, $offset, $batchSize);
101-
if ($result === array()) {
102-
break;
103-
}
109+
$iterator = $this->nodeDataRepository->findAllBySiteAndWorkspace($workspaceName, $offset, $batchSize);
110+
104111
$jobData = [];
105-
foreach ($result as $data) {
112+
113+
foreach ($this->nodeDataRepository->iterate($iterator) as $data) {
106114
$jobData[] = [
107115
'nodeIdentifier' => $data['nodeIdentifier'],
108116
'dimensions' => $data['dimensions']
109-
110117
];
111118
}
119+
120+
if ($jobData === []) {
121+
break;
122+
}
123+
112124
$indexingJob = new IndexingJob($indexPostfix, $workspaceName, $jobData);
113125
$this->jobManager->queue('Flowpack.ElasticSearch.ContentRepositoryQueueIndexer', $indexingJob);
114126
$this->output('.');
115127
$offset += $batchSize;
128+
$this->persistenceManager->clearState();
116129
}
117130
}
118131

Classes/Flowpack/ElasticSearch/ContentRepositoryQueueIndexer/Domain/Repository/NodeDataRepository.php

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
namespace Flowpack\ElasticSearch\ContentRepositoryQueueIndexer\Domain\Repository;
33

44
use Doctrine\Common\Persistence\ObjectManager;
5+
use Doctrine\ORM\Internal\Hydration\IterableResult;
56
use Doctrine\ORM\Query;
67
use Doctrine\ORM\QueryBuilder;
78
use TYPO3\Flow\Annotations as Flow;
@@ -25,7 +26,7 @@ class NodeDataRepository extends Repository {
2526
* @param string $workspaceName
2627
* @param integer $firstResult
2728
* @param integer $maxResults
28-
* @return array
29+
* @return IterableResult
2930
*/
3031
public function findAllBySiteAndWorkspace($workspaceName, $firstResult = 0, $maxResults = 1000) {
3132

@@ -42,7 +43,30 @@ public function findAllBySiteAndWorkspace($workspaceName, $firstResult = 0, $max
4243
':removed' => FALSE,
4344
]);
4445

45-
return $queryBuilder->getQuery()->getArrayResult();
46+
return $queryBuilder->getQuery()->iterate();
47+
}
48+
49+
/**
50+
* Iterator over an IterableResult and return a Generator
51+
*
52+
* This methos is useful for batch processing huge result set as it clear the object
53+
* manager and detach the current object on each iteration.
54+
*
55+
* @param IterableResult $iterator
56+
* @param callable $callback
57+
* @return \Generator
58+
*/
59+
public function iterate(IterableResult $iterator, callable $callback = null)
60+
{
61+
$iteration = 0;
62+
foreach ($iterator as $object) {
63+
$object = current($object);
64+
yield $object;
65+
if ($callback !== null) {
66+
call_user_func($callback, $iteration, $object);
67+
}
68+
++$iteration;
69+
}
4670
}
4771

4872
}

0 commit comments

Comments
 (0)