Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
- [FileMoverTask]
- [FileReaderTask](reference/tasks/file_reader_task.md)
- [FileRemoverTask]
- [FileSplitterTask](reference/tasks/file_splitter_task.md)
- [FileWriterTask]
- [FolderBrowserTask](reference/tasks/folder_browser_task.md)
- [InputFileReaderTask](reference/tasks/input_file_reader_task.md)
Expand Down
42 changes: 42 additions & 0 deletions docs/reference/tasks/file_splitter_task.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
FileSplitterTask
=============

Split long file into smaller ones

Task reference
--------------

* **Service**: `CleverAge\ProcessBundle\Task\File\FileSplitterTask`
* **Iterable task**

Accepted inputs
---------------

`array`: inputs are merged with task defined options.

Possible outputs
----------------

`string`: absolute path of the produced file

Options
-------

| Code | Type | Required | Default | Description |
|-------------------------|-----------------|:--------:|----------|------------------------------------------|
| `file_path` | `string` | **X** | | Path of the file to read from (absolute) |
| `max_lines` | `int` | **X** | 1000 | Max number of line on a produced file |

Example
-------

```yaml
# Task configuration level
entry:
service: '@CleverAge\ProcessBundle\Task\File\FileSplitterTask'
options:
file_path: '%kernel.project_dir%/var/data/json_stream_reader.json'
max_lines: 1
```


106 changes: 106 additions & 0 deletions src/Filesystem/SplFile.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
<?php

declare(strict_types=1);

/*
* This file is part of the CleverAge/ProcessBundle package.
*
* Copyright (c) Clever-Age
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

namespace CleverAge\ProcessBundle\Filesystem;

/**
* Wrapper around files to read/write them.
*/
class SplFile
{
protected \SplFileObject $file;

protected ?int $lineCount = null;

protected int $lineNumber = 1;

public function __construct(
string $filename,
string $mode = 'rb',
?array $splFileObjectFlags = null,
) {
$this->file = new \SplFileObject($filename, $mode);

// Useful to skip empty trailing lines (doesn't work well on PHP 8, see readLine() code)
$this->file->setFlags(null !== $splFileObjectFlags
? array_sum($splFileObjectFlags)
: \SplFileObject::DROP_NEW_LINE | \SplFileObject::READ_AHEAD | \SplFileObject::SKIP_EMPTY
);
}

/**
* Warning! This method will rewind the file to the beginning before and after counting the lines!
*/
public function getLineCount(): int
{
if (null === $this->lineCount) {
$this->rewind();
$line = 0;
while (!$this->isEndOfFile()) {
++$line;
$this->file->next();
}
$this->rewind();

$this->lineCount = $line;
}

return $this->lineCount;
}

public function getLineNumber(): int
{
return $this->lineNumber;
}

public function isEndOfFile(): bool
{
return $this->file->eof();
}

/**
* Return an array containing current data and moving the file pointer.
*/
public function readLine(?int $length = null): ?string
{
if ($this->isEndOfFile()) {
return null;
}

$rawLine = $this->file->fgets();
// Fix issue on PHP 8 with empty line at the end, even if SKIP_EMPTY is set
if ('' === $rawLine) {
return null;
}
++$this->lineNumber;

return $rawLine;
}

public function writeLine(string $data): int
{
$this->file->fwrite($data.\PHP_EOL);
++$this->lineNumber;

return $this->lineNumber;
}

/**
* Rewind data to array.
*/
public function rewind(): void
{
$this->file->rewind();
$this->lineNumber = 1;
}
}
110 changes: 110 additions & 0 deletions src/Task/File/FileSplitterTask.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
<?php

declare(strict_types=1);

/*
* This file is part of the CleverAge/ProcessBundle package.
*
* Copyright (c) Clever-Age
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

namespace CleverAge\ProcessBundle\Task\File;

use CleverAge\ProcessBundle\Filesystem\SplFile;
use CleverAge\ProcessBundle\Model\AbstractConfigurableTask;
use CleverAge\ProcessBundle\Model\IterableTaskInterface;
use CleverAge\ProcessBundle\Model\ProcessState;
use Symfony\Component\OptionsResolver\OptionsResolver;

/**
* Split long file into smaller ones.
*/
class FileSplitterTask extends AbstractConfigurableTask implements IterableTaskInterface
{
protected ?SplFile $file = null;

private ?array $splFileObjectFlags = null;

private int $lineCount;

public function execute(ProcessState $state): void
{
$options = $this->getMergedOptions($state);
$this->splFileObjectFlags = [\SplFileObject::READ_AHEAD, \SplFileObject::SKIP_EMPTY];
if (!$this->file instanceof SplFile) {
$this->file = new SplFile($options['file_path'], 'rb', $this->splFileObjectFlags);
$this->lineCount = $this->file->getLineCount();
}

// Return a temporary file containing a limited number of lines
$splittedFilename = $this->splitFile($this->file, $options['max_lines']);
$state->setOutput($splittedFilename);
}

/**
* Moves the internal pointer to the next element,
* return true if the task has a next element
* return false if the task has terminated it's iteration.
*/
public function next(ProcessState $state): bool
{
if (!$this->file instanceof SplFile) {
return false;
}

// Fix issue on PHP 8 with empty line at the end, even if SKIP_EMPTY is set
$endOfFile = $this->file->isEndOfFile() || $this->file->getLineNumber() > $this->lineCount;
if ($endOfFile) {
$this->file = null;
}

return !$endOfFile;
}

protected function splitFile(SplFile $file, int $maxLines): string
{
$tmpFilePath = sys_get_temp_dir().\DIRECTORY_SEPARATOR.'php_'.uniqid('process', false).'.tmp';
$splitFile = new SplFile($tmpFilePath, 'wb', $this->splFileObjectFlags);

while ($splitFile->getLineNumber() <= $maxLines && !$file->isEndOfFile()) {
$line = $file->readLine();
if ('' === $line || null === $line) {
continue; // This is probably an empty line, no harm to skip it
}
$splitFile->writeLine($line);
}

return $tmpFilePath;
}

protected function configureOptions(OptionsResolver $resolver): void
{
$resolver->setRequired(['file_path']);
$resolver->setAllowedTypes('file_path', ['string']);
$resolver->setDefaults([
'max_lines' => 1000,
]);
$resolver->setAllowedTypes('max_lines', ['int']);
}

/**
* @return array<mixed>
*/
protected function getMergedOptions(ProcessState $state): array
{
/** @var array<mixed> $options */
$options = $this->getOptions($state);

/** @var array<mixed>|mixed $input */
$input = $state->getInput() ?: [];
if (!\is_array($input)) {
$input = [];
}
// @var array<mixed> $input

return array_merge($options, $input);
}
}