diff --git a/.gitignore b/.gitignore index 896e906..652b2af 100644 --- a/.gitignore +++ b/.gitignore @@ -2,5 +2,5 @@ build composer.lock docs vendor -coverage .phpunit.result.cache +.phpunit.cache diff --git a/.scrutinizer.yml b/.scrutinizer.yml index df16b68..4e48850 100644 --- a/.scrutinizer.yml +++ b/.scrutinizer.yml @@ -1,3 +1,11 @@ +build: + nodes: + analysis: + project_setup: + override: true + tests: + override: [php-scrutinizer-run] + filter: excluded_paths: [tests/*] @@ -17,3 +25,7 @@ checks: fix_identation_4spaces: true fix_doc_comments: true +tools: + external_code_coverage: + timeout: 600 + runs: 1 diff --git a/.styleci.yml b/.styleci.yml index 247a09c..ac8d606 100644 --- a/.styleci.yml +++ b/.styleci.yml @@ -1 +1 @@ -preset: psr2 +preset: psr12 diff --git a/.travis.yml b/.travis.yml index 8370411..b2604c6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,20 +1,26 @@ language: php php: - - 7.3 - 7.4 + - 8.0 + +cache: + directories: + - $HOME/.composer/cache env: - matrix: - - COMPOSER_FLAGS="--prefer-lowest" - - COMPOSER_FLAGS="" + - XDEBUG_MODE=coverage before_script: - - travis_retry composer self-update - - travis_retry composer update ${COMPOSER_FLAGS} --no-interaction --prefer-source + - travis_retry composer update --no-interaction --prefer-dist script: + - vendor/bin/phpcs --standard=psr2 src/ - vendor/bin/phpunit --coverage-text --coverage-clover=coverage.clover after_script: - - php vendor/bin/ocular code-coverage:upload --format=php-clover coverage.clover + - | + if [[ "$TRAVIS_PHP_VERSION" != '8.0' ]]; then + wget https://scrutinizer-ci.com/ocular.phar + php ocular.phar code-coverage:upload --format=php-clover coverage.clover + fi diff --git a/README.md b/README.md index a42620b..a1de4ce 100644 --- a/README.md +++ b/README.md @@ -7,8 +7,6 @@ [K-mean](http://en.wikipedia.org/wiki/K-means_clustering) clustering algorithm implementation in PHP. -Please also see the [FAQ](#faq) - ## Installation You can install the package via composer: @@ -22,8 +20,7 @@ composer require bdelespierre/php-kmeans ```PHP require "vendor/autoload.php"; -// prepare 50 points of 2D space to be clustered -$points = [ +$data = [ [80,55],[86,59],[19,85],[41,47],[57,58], [76,22],[94,60],[13,93],[90,48],[52,54], [62,46],[88,44],[85,24],[63,14],[51,40], @@ -37,30 +34,35 @@ $points = [ ]; // create a 2-dimentions space -$space = new KMeans\Space(2); +$space = new Kmeans\Space(2); + +// prepare the points +$points = new Kmeans\PointCollection($space); -// add points to space -foreach ($points as $i => $coordinates) { - $space->addPoint($coordinates); +foreach ($data as $coordinates) { + $points->attach(new Kmeans\Point($space, $coordinates)); } +// prepare the algorithm +$algorithm = new Kmeans\Algorithm(new Kmeans\RandomInitialization()); + // cluster these 50 points in 3 clusters -$clusters = $space->solve(3); +$clusters = $algorithm->clusterize($points, 3); // display the cluster centers and attached points foreach ($clusters as $num => $cluster) { - $coordinates = $cluster->getCoordinates(); + $coordinates = $cluster->getCentroid()->getCoordinates(); printf( - "Cluster %s [%d,%d]: %d points\n", + "Cluster #%s [%d,%d] has %d points\n", $num, $coordinates[0], $coordinates[1], - count($cluster) + count($cluster->getPoints()) ); } ``` -**Note:** the example is given with points of a 2D space but it will work with any dimention >1. +**Note:** the example is given with points of a 2D space but it will work with any dimention greater than or equal to 1. ### Testing @@ -89,51 +91,3 @@ If you discover any security related issues, please email benjamin.delespierre@g ## License Lesser General Public License (LGPL). Please see [License File](LICENSE.md) for more information. - -## FAQ - -### How to get coordinates of a point/cluster: -```PHP -$x = $point[0]; -$y = $point[1]; - -// or - -list($x,$y) = $point->getCoordinates(); -``` - -### List all points of a space/cluster: - -```PHP -foreach ($cluster as $point) { - printf('[%d,%d]', $point[0], $point[1]); -} -``` - -### Attach data to a point: - -```PHP -$point = $space->addPoint([$x, $y, $z], "user #123"); -``` - -### Retrieve point data: - -```PHP -$data = $space[$point]; // e.g. "user #123" -``` - -### Watch the algorithm run - -Each iteration step can be monitored using a callback function passed to `Kmeans\Space::solve`: - -```PHP -$clusters = $space->solve(3, function($space, $clusters) { - static $iterations = 0; - - printf("Iteration: %d\n", ++$iterations); - - foreach ($clusters as $i => $cluster) { - printf("Cluster %d [%d,%d]: %d points\n", $i, $cluster[0], $cluster[1], count($cluster)); - } -}); -``` diff --git a/composer.json b/composer.json index 137d615..9353d8a 100644 --- a/composer.json +++ b/composer.json @@ -16,14 +16,17 @@ } ], "require": { - "php": "^7.3|^8.0" + "php": "^7.4|^8.0" }, "require-dev": { - "phpunit/phpunit": "^9.3" + "phpunit/phpunit": "^9.5", + "squizlabs/php_codesniffer": "^3.6", + "phpstan/phpstan": "^1.5", + "mockery/mockery": "^1.4" }, "autoload": { - "psr-0": { - "KMeans": "src/" + "psr-4": { + "Kmeans\\": "src/" } }, "autoload-dev": { @@ -33,6 +36,6 @@ }, "scripts": { "test": "vendor/bin/phpunit", - "test-coverage": "vendor/bin/phpunit --coverage-html coverage" + "test-coverage": "vendor/bin/phpunit --coverage-html build/coverage" } } diff --git a/demo.php b/demo.php index 408ad99..3e9c385 100644 --- a/demo.php +++ b/demo.php @@ -2,8 +2,7 @@ require "vendor/autoload.php"; -// prepare 50 points of 2D space to be clustered -$points = [ +$data = [ [80,55],[86,59],[19,85],[41,47],[57,58], [76,22],[94,60],[13,93],[90,48],[52,54], [62,46],[88,44],[85,24],[63,14],[51,40], @@ -17,24 +16,26 @@ ]; // create a 2-dimentions space -$space = new KMeans\Space(2); +$space = new Kmeans\Euclidean\Space(2); -// add points to space -foreach ($points as $i => $coordinates) { - $space->addPoint($coordinates); -} +// prepare the points +$points = new Kmeans\PointCollection($space, array_map([$space, 'makePoint'], $data)); + +// prepare the algorithm +$algorithm = new Kmeans\Euclidean\Algorithm(new Kmeans\RandomInitialization()); // cluster these 50 points in 3 clusters -$clusters = $space->solve(3); +$clusters = $algorithm->fit($points, 3); // display the cluster centers and attached points foreach ($clusters as $num => $cluster) { - $coordinates = $cluster->getCoordinates(); + $coordinates = $cluster->getCentroid()->getCoordinates(); + assert(is_int($num)); printf( - "Cluster %s [%d,%d]: %d points\n", + "Cluster #%s [%d,%d] has %d points\n", $num, $coordinates[0], $coordinates[1], - count($cluster) + count($cluster->getPoints()) ); } diff --git a/makefile b/makefile new file mode 100644 index 0000000..fe0064b --- /dev/null +++ b/makefile @@ -0,0 +1,31 @@ + +# ----------------------------------------------------------------------------- +# Code Quality +# ----------------------------------------------------------------------------- + +qa: phplint phpcs phpstan + +QA_PATHS = src/ tests/ +QA_STANDARD = psr12 + +phplint: + find $(QA_PATHS) -name "*.php" -print0 | xargs -0 -n1 -P8 php -l > /dev/null + +phpstan: + vendor/bin/phpstan analyse $(QA_PATHS) + +phpcs: + vendor/bin/phpcs --standard=$(QA_STANDARD) $(QA_PATHS) + +phpcbf: + vendor/bin/phpcbf --standard=$(QA_STANDARD) $(QA_PATHS) + +todolist: + git grep -C2 -p -E '[@]todo' + +# ----------------------------------------------------------------------------- +# Tests +# ----------------------------------------------------------------------------- + +test: + vendor/bin/phpunit --colors diff --git a/phpstan.neon b/phpstan.neon new file mode 100644 index 0000000..191c126 --- /dev/null +++ b/phpstan.neon @@ -0,0 +1,5 @@ +parameters: + paths: + - src + - tests + level: 9 diff --git a/phpunit.xml.dist b/phpunit.xml.dist index b9ba704..c264e4b 100644 --- a/phpunit.xml.dist +++ b/phpunit.xml.dist @@ -1,34 +1,31 @@ - - - - src/ - - - - - - - - - - tests - - - - - + + + + tests/Unit + + + + + + src + + + + + + + diff --git a/src/Algorithm.php b/src/Algorithm.php new file mode 100644 index 0000000..8e83045 --- /dev/null +++ b/src/Algorithm.php @@ -0,0 +1,114 @@ + + */ + private array $iterationCallbacks = []; + + public function __construct(InitializationSchemeInterface $initScheme) + { + $this->initScheme = $initScheme; + } + + public function registerIterationCallback(callable $callback): void + { + $this->iterationCallbacks[] = $callback; + } + + public function fit( + PointCollectionInterface $points, + int $nClusters, + ?int $maxIter = null + ): ClusterCollectionInterface { + $maxIter ??= INF; + + if ($maxIter < 1) { + throw new \UnexpectedValueException( + "Invalid maximum number of iterations: {$maxIter}" + ); + } + + // initialize clusters + $clusters = $this->initScheme->initializeClusters($points, $nClusters); + + // iterate until convergence is reached + do { + $this->invokeIterationCallbacks($clusters); + } while ($this->iterate($clusters) && --$maxIter); + + // clustering is done. + return $clusters; + } + + protected function iterate(ClusterCollectionInterface $clusters): bool + { + /** @var \SplObjectStorage */ + $changed = new \SplObjectStorage(); + + // calculate proximity amongst points and clusters + foreach ($clusters as $cluster) { + foreach ($cluster->getPoints() as $point) { + // find the closest cluster + $closest = $this->getClosestCluster($clusters, $point); + + if ($closest !== $cluster) { + // move the point from its current cluster to its closest + $cluster->detach($point); + $closest->attach($point); + + // flag both clusters as changed + $changed->attach($cluster); + $changed->attach($closest); + } + } + } + + // update changed clusters' centroid + foreach ($changed as $cluster) { + $cluster->setCentroid($this->findCentroid($cluster->getPoints())); + } + + // return true if something changed during this iteration + return count($changed) > 0; + } + + private function getClosestCluster(ClusterCollectionInterface $clusters, PointInterface $point): ClusterInterface + { + $min = INF; + $closest = null; + + foreach ($clusters as $cluster) { + $distance = $this->getDistanceBetween($point, $cluster->getCentroid()); + + if ($distance < $min) { + $min = $distance; + $closest = $cluster; + } + } + + assert($closest !== null); + return $closest; + } + + private function invokeIterationCallbacks(ClusterCollectionInterface $clusters): void + { + foreach ($this->iterationCallbacks as $callback) { + $callback($this, $clusters); + } + } +} diff --git a/src/Cluster.php b/src/Cluster.php new file mode 100644 index 0000000..efa876d --- /dev/null +++ b/src/Cluster.php @@ -0,0 +1,59 @@ +points = $points ?? new PointCollection($centroid->getSpace()); + $this->setCentroid($centroid); + } + + public function getSpace(): SpaceInterface + { + return $this->points->getSpace(); + } + + public function belongsTo(SpaceInterface $space): bool + { + return $this->getSpace()->isEqualTo($space); + } + + public function getCentroid(): PointInterface + { + return $this->centroid; + } + + public function setCentroid(PointInterface $point): void + { + if (! $point->belongsTo($this->getSpace())) { + throw new \LogicException("Cannot set centroid: invalid point space"); + } + + $this->centroid = $point; + } + + public function getPoints(): PointCollectionInterface + { + return $this->points; + } + + public function attach(PointInterface $point): void + { + $this->points->attach($point); + } + + public function detach(PointInterface $point): void + { + $this->points->detach($point); + } +} diff --git a/src/ClusterCollection.php b/src/ClusterCollection.php new file mode 100644 index 0000000..07569aa --- /dev/null +++ b/src/ClusterCollection.php @@ -0,0 +1,91 @@ + + */ + protected \SplObjectStorage $clusters; + + /** + * @param array $clusters + */ + public function __construct(SpaceInterface $space, array $clusters = []) + { + $this->setSpace($space); + $this->clusters = new \SplObjectStorage(); + + foreach ($clusters as $cluster) { + $this->attach($cluster); + } + } + + // ------------------------------------------------------------------------ + // ClusterCollectionInterface + + public function contains(ClusterInterface $cluster): bool + { + return $this->clusters->contains($cluster); + } + + public function attach(ClusterInterface $cluster): void + { + if (! $this->getSpace()->isEqualTo($cluster->getSpace())) { + throw new \InvalidArgumentException( + "Cannot add cluster to collection: cluster space is not same as collection space" + ); + } + + $this->clusters->attach($cluster); + } + + public function detach(ClusterInterface $cluster): void + { + $this->clusters->detach($cluster); + } + + // ------------------------------------------------------------------------ + // Iterator + + public function current() + { + return $this->clusters->current(); + } + + public function key() + { + return $this->clusters->key(); + } + + public function next(): void + { + $this->clusters->next(); + } + + public function rewind(): void + { + $this->clusters->rewind(); + } + + public function valid(): bool + { + return $this->clusters->valid(); + } + + // ------------------------------------------------------------------------ + // Countable + + public function count(): int + { + return count($this->clusters); + } +} diff --git a/src/Concerns/HasDataTrait.php b/src/Concerns/HasDataTrait.php new file mode 100644 index 0000000..885a975 --- /dev/null +++ b/src/Concerns/HasDataTrait.php @@ -0,0 +1,27 @@ +data; + } + + /** + * @param mixed $data + */ + public function setData($data): void + { + $this->data = $data; + } +} diff --git a/src/Concerns/HasSpaceTrait.php b/src/Concerns/HasSpaceTrait.php new file mode 100644 index 0000000..b466613 --- /dev/null +++ b/src/Concerns/HasSpaceTrait.php @@ -0,0 +1,25 @@ +space = $space; + } + + public function getSpace(): SpaceInterface + { + return $this->space; + } + + public function belongsTo(SpaceInterface $space): bool + { + return $this->getSpace()->isEqualTo($space); + } +} diff --git a/src/Euclidean/Algorithm.php b/src/Euclidean/Algorithm.php new file mode 100644 index 0000000..a9813a7 --- /dev/null +++ b/src/Euclidean/Algorithm.php @@ -0,0 +1,35 @@ +getCoordinates(), $pointB->getCoordinates()); + } + + public function findCentroid(PointCollectionInterface $points): PointInterface + { + if (! $points->getSpace() instanceof Space) { + throw new \InvalidArgumentException( + "Point collection should consist of Euclidean points" + ); + } + + return $points->getSpace()->makePoint(Math::centroid( + array_map(fn (PointInterface $point) => $point->getCoordinates(), iterator_to_array($points)) + )); + } +} diff --git a/src/Euclidean/Point.php b/src/Euclidean/Point.php new file mode 100644 index 0000000..31794c1 --- /dev/null +++ b/src/Euclidean/Point.php @@ -0,0 +1,68 @@ + + */ + private array $coordinates; + + /** + * @param array $coordinates + */ + public function __construct(SpaceInterface $space, array $coordinates) + { + if (! $space instanceof Space) { + throw new \LogicException( + "An euclidean point must belong to an euclidean space" + ); + } + + $this->setSpace($space); + $this->coordinates = $this->sanitizeCoordinates($coordinates); + } + + public function getCoordinates(): array + { + return $this->coordinates; + } + + /** + * @param array $coordinates + * @return array + */ + private function sanitizeCoordinates(array $coordinates): array + { + assert($this->space instanceof Space); + if (count($coordinates) != $this->space->getDimensions()) { + throw new \InvalidArgumentException(sprintf( + "Invalid set of coordinates: %d coordinates expected, %d given", + $this->space->getDimensions(), + count($coordinates) + )); + } + + $coordinates = filter_var_array($coordinates, FILTER_VALIDATE_FLOAT); + assert(is_array($coordinates)); + $errors = array_keys($coordinates, false, true); + + if (! empty($errors)) { + throw new \InvalidArgumentException(sprintf( + "Invalid set of coordinates: values at offsets [%s] could not be converted to numbers", + implode(',', $errors) + )); + } + + return $coordinates; + } +} diff --git a/src/Euclidean/Space.php b/src/Euclidean/Space.php new file mode 100644 index 0000000..dba53a6 --- /dev/null +++ b/src/Euclidean/Space.php @@ -0,0 +1,44 @@ + + */ + protected int $dimensions; + + public function __construct(int $dimensions) + { + if ($dimensions < 1) { + throw new \InvalidArgumentException( + "Invalid space dimentions: {$dimensions}" + ); + } + + $this->dimensions = $dimensions; + } + + public function getDimensions(): int + { + return $this->dimensions; + } + + public function isEqualTo(SpaceInterface $space): bool + { + return $space instanceof self + && $this->dimensions == $space->dimensions; + } + + /** + * @param array $coordinates + */ + public function makePoint(array $coordinates): PointInterface + { + return new Point($this, $coordinates); + } +} diff --git a/src/Gps/Algorithm.php b/src/Gps/Algorithm.php new file mode 100644 index 0000000..b9eb4e6 --- /dev/null +++ b/src/Gps/Algorithm.php @@ -0,0 +1,38 @@ +getCoordinates(), $pointB->getCoordinates()); + } + + public function findCentroid(PointCollectionInterface $points): PointInterface + { + if (! $points->getSpace() instanceof Space) { + throw new \InvalidArgumentException( + "Point collection should consist of GPS coordinates" + ); + } + + /** @var array $pointsArray */ + $pointsArray = iterator_to_array($points); + + return $points->getSpace()->makePoint(Math::gpsCentroid( + array_map(fn (Point $point) => $point->getCoordinates(), $pointsArray) + )); + } +} diff --git a/src/Gps/Point.php b/src/Gps/Point.php new file mode 100644 index 0000000..b66db67 --- /dev/null +++ b/src/Gps/Point.php @@ -0,0 +1,42 @@ +validateCoordinates($lat, $long); + $this->setSpace(Space::singleton()); + $this->lat = $lat; + $this->long = $long; + } + + /** + * @return array{0: float, 1: float} + */ + public function getCoordinates(): array + { + return [$this->lat, $this->long]; + } + + private function validateCoordinates(float $lat, float $long): void + { + if ($lat < -90 || $lat > 90 || $long < -180 || $long > 180) { + throw new \InvalidArgumentException( + "Invalid GPS coordinates" + ); + } + } +} diff --git a/src/Gps/Space.php b/src/Gps/Space.php new file mode 100644 index 0000000..1edc687 --- /dev/null +++ b/src/Gps/Space.php @@ -0,0 +1,29 @@ + + */ +interface ClusterCollectionInterface extends SpaceBoundInterface, \Iterator, \Countable +{ + public function contains(ClusterInterface $cluster): bool; + + public function attach(ClusterInterface $cluster): void; + + public function detach(ClusterInterface $cluster): void; +} diff --git a/src/Interfaces/ClusterInterface.php b/src/Interfaces/ClusterInterface.php new file mode 100644 index 0000000..d51de69 --- /dev/null +++ b/src/Interfaces/ClusterInterface.php @@ -0,0 +1,16 @@ + + */ + public function iterationsCount(): int; + + public function getClusters(): ClusterCollectionInterface; + + public function resume(PointCollectionInterface $newPoints): self; +} diff --git a/src/Interfaces/InitializationSchemeInterface.php b/src/Interfaces/InitializationSchemeInterface.php new file mode 100644 index 0000000..d3cdfec --- /dev/null +++ b/src/Interfaces/InitializationSchemeInterface.php @@ -0,0 +1,11 @@ + + */ +interface PointCollectionInterface extends SpaceBoundInterface, \Iterator, \Countable +{ + public function contains(PointInterface $point): bool; + + public function attach(PointInterface $point): void; + + public function detach(PointInterface $point): void; +} diff --git a/src/Interfaces/PointInterface.php b/src/Interfaces/PointInterface.php new file mode 100644 index 0000000..62adc77 --- /dev/null +++ b/src/Interfaces/PointInterface.php @@ -0,0 +1,21 @@ + + */ + public function getCoordinates(): array; + + /** + * @return mixed + */ + public function getData(); + + /** + * @param mixed $data + */ + public function setData($data): void; +} diff --git a/src/Interfaces/SpaceBoundInterface.php b/src/Interfaces/SpaceBoundInterface.php new file mode 100644 index 0000000..779c463 --- /dev/null +++ b/src/Interfaces/SpaceBoundInterface.php @@ -0,0 +1,10 @@ + $coordinates + */ + public function makePoint(array $coordinates): PointInterface; +} diff --git a/src/KMeans/Cluster.php b/src/KMeans/Cluster.php deleted file mode 100644 index e49b06f..0000000 --- a/src/KMeans/Cluster.php +++ /dev/null @@ -1,110 +0,0 @@ -points = new \SplObjectStorage(); - } - - public function toArray(): array - { - $points = []; - foreach ($this->points as $point) { - $points[] = $point->toArray(); - } - - return [ - 'centroid' => parent::toArray(), - 'points' => $points, - ]; - } - - public function attach(Point $point): Point - { - if ($point instanceof self) { - throw new \LogicException("cannot attach a cluster to another"); - } - - $this->points->attach($point); - return $point; - } - - public function detach(Point $point): Point - { - $this->points->detach($point); - return $point; - } - - public function attachAll(\SplObjectStorage $points): void - { - $this->points->addAll($points); - } - - public function detachAll(\SplObjectStorage $points): void - { - $this->points->removeAll($points); - } - - public function updateCentroid(): void - { - if (!$count = count($this->points)) { - return; - } - - $centroid = $this->space->newPoint(array_fill(0, $this->dimention, 0)); - - foreach ($this->points as $point) { - for ($n = 0; $n < $this->dimention; $n++) { - $centroid->coordinates[$n] += $point->coordinates[$n]; - } - } - - for ($n = 0; $n < $this->dimention; $n++) { - $this->coordinates[$n] = $centroid->coordinates[$n] / $count; - } - } - - public function getIterator(): \Iterator - { - return $this->points; - } - - public function count(): int - { - return count($this->points); - } -} diff --git a/src/KMeans/Point.php b/src/KMeans/Point.php deleted file mode 100644 index 6f92e7e..0000000 --- a/src/KMeans/Point.php +++ /dev/null @@ -1,115 +0,0 @@ -space = $space; - $this->dimention = $space->getDimention(); - $this->coordinates = $coordinates; - } - - public function toArray(): array - { - return [ - 'coordinates' => $this->coordinates, - 'data' => isset($this->space[$this]) ? $this->space[$this] : null, - ]; - } - - public function getDistanceWith(self $point, bool $precise = true): float - { - if ($point->space !== $this->space) { - throw new \LogicException("can only calculate distances from points in the same space"); - } - - $distance = 0; - for ($n = 0; $n < $this->dimention; $n++) { - $difference = $this->coordinates[$n] - $point->coordinates[$n]; - $distance += $difference * $difference; - } - - return $precise ? sqrt($distance) : $distance; - } - - public function getClosest(iterable $points): ?Point - { - $minDistance = PHP_INT_MAX; - $minPoint = null; - foreach ($points as $point) { - $distance = $this->getDistanceWith($point, false); - - if ($distance < $minDistance) { - $minDistance = $distance; - $minPoint = $point; - } - } - - return $minPoint; - } - - public function belongsTo(Space $space): bool - { - return $this->space === $space; - } - - public function getSpace(): Space - { - return $this->space; - } - - public function getCoordinates(): array - { - return $this->coordinates; - } - - public function offsetExists($offset): bool - { - return isset($this->coordinates[$offset]); - } - - public function offsetGet($offset) - { - return $this->coordinates[$offset]; - } - - public function offsetSet($offset, $value): void - { - $this->coordinates[$offset] = $value; - } - - public function offsetUnset($offset): void - { - unset($this->coordinates[$offset]); - } -} diff --git a/src/KMeans/Space.php b/src/KMeans/Space.php deleted file mode 100644 index 103375e..0000000 --- a/src/KMeans/Space.php +++ /dev/null @@ -1,277 +0,0 @@ -dimention = $dimention; - } - - public static function setRng(callable $fn): void - { - static::$rng = $fn; - } - - public function toArray(): array - { - $points = []; - foreach ($this as $point) { - $points[] = $point->toArray(); - } - - return ['points' => $points]; - } - - public function newPoint(array $coordinates): Point - { - if (count($coordinates) != $this->dimention) { - throw new \LogicException("(" . implode(',', $coordinates) . ") is not a point of this space"); - } - - return new Point($this, $coordinates); - } - - public function addPoint(array $coordinates, $data = null): Point - { - $this->attach($point = $this->newPoint($coordinates), $data); - - return $point; - } - - public function attach($point, $data = null): void - { - if (!$point instanceof Point) { - throw new \InvalidArgumentException("can only attach points to spaces"); - } - - parent::attach($point, $data); - } - - public function getDimention(): int - { - return $this->dimention; - } - - public function getBoundaries(): array - { - if (!count($this)) { - return []; - } - - $min = $this->newPoint(array_fill(0, $this->dimention, null)); - $max = $this->newPoint(array_fill(0, $this->dimention, null)); - - foreach ($this as $point) { - for ($n = 0; $n < $this->dimention; $n++) { - if ($min[$n] === null || $min[$n] > $point[$n]) { - $min[$n] = $point[$n]; - } - - if ($max[$n] === null || $max[$n] < $point[$n]) { - $max[$n] = $point[$n]; - } - } - } - - return [$min, $max]; - } - - public function getRandomPoint(Point $min, Point $max): Point - { - $point = $this->newPoint(array_fill(0, $this->dimention, null)); - $rng = static::$rng; - - for ($n = 0; $n < $this->dimention; $n++) { - $point[$n] = $rng($min[$n], $max[$n]); - } - - return $point; - } - - public function solve(int $nbClusters, callable $iterationCallback = null, $initMethod = Cluster::INIT_RANDOM): array - { - // initialize K clusters - $clusters = $this->initializeClusters($nbClusters, $initMethod); - - // there's only one cluster, clusterization has no meaning - if (count($clusters) == 1) { - return $clusters; - } - - // until convergence is reached - do { - if ($iterationCallback) { - $iterationCallback($this, $clusters); - } - } while ($this->iterate($clusters)); - - // clustering is done. - return $clusters; - } - - protected function initializeClusters(int $nbClusters, int $initMethod): array - { - if ($nbClusters <= 0) { - throw new \InvalidArgumentException("invalid clusters number"); - } - - switch ($initMethod) { - case Cluster::INIT_RANDOM: - $clusters = $this->initializeRandomClusters($nbClusters); - - break; - - case Cluster::INIT_KMEANS_PLUS_PLUS: - $clusters = $this->initializeKmeansPlusPlusClusters($nbClusters); - - break; - - default: - return []; - } - - // assign all points to the first cluster - $clusters[0]->attachAll($this); - - return $clusters; - } - - protected function initializeKmeansPlusPlusClusters(int $nbClusters): array - { - $clusters = []; - $clusters[] = new Cluster($this, $this->current()->getCoordinates()); - - for ($i = 1; $i < $nbClusters; ++$i) { - $sum = 0; - $distances = []; - foreach ($this as $point) { - $distance = $point->getDistanceWith($point->getClosest($clusters), false); - $distances[] = $distance; - $sum += $distance; - } - - $probabilities = []; - foreach ($distances as $distance) { - $probabilities[] = $distance / $sum; - } - - $cumulativeProbabilities = array_reduce($probabilities, function ($c, $i) { - $c[] = end($c) + $i; - return $c; - }, []); - - $rng = static::$rng; - $rand = $rng() / mt_getrandmax(); - foreach ($cumulativeProbabilities as $j => $cumulativeProbability) { - if ($rand < $cumulativeProbability) { - foreach ($this as $key => $value) { - if ($j == $key) { - $clusters[] = new Cluster($this, $value->getCoordinates()); - break; - } - } - break; - } - } - } - - return $clusters; - } - - protected function initializeRandomClusters(int $nbClusters): array - { - $clusters = []; - - // get the space boundaries to avoid placing clusters centroid too far from points - list($min, $max) = $this->getBoundaries(); - - // initialize N clusters with a random point within space boundaries - for ($n = 0; $n < $nbClusters; $n++) { - $clusters[] = new Cluster($this, $this->getRandomPoint($min, $max)->getCoordinates()); - } - return $clusters; - } - - protected function iterate(array $clusters): bool - { - $continue = false; - - // migration storages - $attach = new \SplObjectStorage(); - $detach = new \SplObjectStorage(); - - // calculate proximity amongst points and clusters - foreach ($clusters as $cluster) { - foreach ($cluster as $point) { - // find the closest cluster - $closest = $point->getClosest($clusters); - - // move the point from its old cluster to its closest - if ($closest !== $cluster) { - if (! isset($attach[$closest])) { - $attach[$closest] = new \SplObjectStorage(); - } - - if (! isset($detach[$cluster])) { - $detach[$cluster] = new \SplObjectStorage(); - } - - $attach[$closest]->attach($point); - $detach[$cluster]->attach($point); - - $continue = true; - } - } - } - - // perform points migrations - foreach ($attach as $cluster) { - $cluster->attachAll($attach[$cluster]); - } - - foreach ($detach as $cluster) { - $cluster->detachAll($detach[$cluster]); - } - - // update all cluster's centroids - foreach ($clusters as $cluster) { - $cluster->updateCentroid(); - } - - return $continue; - } -} diff --git a/src/Math.php b/src/Math.php new file mode 100644 index 0000000..cec14be --- /dev/null +++ b/src/Math.php @@ -0,0 +1,126 @@ + $a + * @param array $b + */ + public static function euclideanDist(array $a, array $b): float + { + assert(count($a) == count($b)); + + for ($dist = 0, $n = 0, $c = count($a); $n < $c; $n++) { + $dist += pow($a[$n] - $b[$n], 2); + } + + return sqrt($dist); + } + + /** + * @param array> $points + * @return array + */ + public static function centroid(array $points): array + { + $centroid = []; + + foreach ($points as $point) { + foreach ($point as $n => $value) { + $centroid[$n] = ($centroid[$n] ?? 0) + $value; + } + } + + foreach ($centroid as &$value) { + $value /= count($points); + } + + return $centroid; + } + + /** + * The standard Box–Muller transform generates values from the standard normal + * distribution (i.e. standard normal deviates). + * + * @see https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform + * + * @return array{0: float, 1: float} + */ + public static function gaussianNoise(float $mu, float $sigma): array + { + static $twoPi = 2 * M_PI; + + // create two random numbers, make sure u1 is greater than epsilon + do { + $u1 = (float) mt_rand() / (float) mt_getrandmax(); + $u2 = (float) mt_rand() / (float) mt_getrandmax(); + } while ($u1 < PHP_FLOAT_EPSILON); + + // compute z0 and z1 + $mag = $sigma * sqrt(-2.0 * log($u1)); + $z0 = $mag * cos($twoPi * $u2) + $mu; + $z1 = $mag * sin($twoPi * $u2) + $mu; + + return [$z0, $z1]; + } + + public static int $earthRadius = 6371009; // meters + + /** + * Calculates the great-circle distance (in meters) between two points, + * with the Haversine formula. + * + * @see https://stackoverflow.com/a/14751773/17403258 + * + * @param array{0: float, 1: float} $from + * @param array{0: float, 1: float} $to + * @return float + */ + public static function haversine($from, $to): float + { + return 2 * self::$earthRadius * asin(sqrt( + pow(sin(deg2rad($to[0] - $from[0]) / 2), 2) + + cos(deg2rad($from[0])) * cos(deg2rad($to[0])) + * pow(sin(deg2rad($to[1] - $from[1]) / 2), 2) + )); + } + + /** + * Calculates the centroid of GPS coordinates. + * + * @see https://stackoverflow.com/questions/6671183 + * + * @param array $points + * @return array{0: float, 1: float} + */ + public static function gpsCentroid(array $points): array + { + if (count($points) == 1) { + return $points[0]; + } + + $x = $y = $z = 0; + + foreach ($points as $point) { + $lat = deg2rad($point[0]); + $long = deg2rad($point[1]); + + $x += cos($lat) * cos($long); + $y += cos($lat) * sin($long); + $z += sin($lat); + } + + $x /= count($points); + $y /= count($points); + $z /= count($points); + + $hypotenuse = sqrt(pow($x, 2) + pow($y, 2)); + + $long = atan2($y, $x); + $lat = atan2($z, $hypotenuse); + + return [rad2deg($lat), rad2deg($long)]; + } +} diff --git a/src/PointCollection.php b/src/PointCollection.php new file mode 100644 index 0000000..cd46183 --- /dev/null +++ b/src/PointCollection.php @@ -0,0 +1,82 @@ + + */ + protected \SplObjectStorage $points; + + /** + * @param array $points + */ + public function __construct(SpaceInterface $space, array $points = []) + { + $this->setSpace($space); + $this->points = new \SplObjectStorage(); + + foreach ($points as $point) { + $this->attach($point); + } + } + + public function contains(PointInterface $point): bool + { + return $this->points->contains($point); + } + + public function attach(PointInterface $point): void + { + if (! $point->belongsTo($this->getSpace())) { + throw new \InvalidArgumentException( + "Cannot add point to collection: point doesn't belong to the same space as collection" + ); + } + + $this->points->attach($point); + } + + public function detach(PointInterface $point): void + { + $this->points->detach($point); + } + + public function current(): PointInterface + { + return $this->points->current(); + } + + public function key(): int + { + return $this->points->key(); + } + + public function next(): void + { + $this->points->next(); + } + + public function rewind(): void + { + $this->points->rewind(); + } + + public function valid(): bool + { + return $this->points->valid(); + } + + public function count(): int + { + return count($this->points); + } +} diff --git a/src/RandomInitialization.php b/src/RandomInitialization.php new file mode 100644 index 0000000..e997894 --- /dev/null +++ b/src/RandomInitialization.php @@ -0,0 +1,42 @@ +getSpace()); + + // initialize N clusters with a random point + for ($n = 0; $n < $nbClusters; $n++) { + // assign all points to the first cluster only + $clusters->attach(new Cluster($this->getRandomPoint($points), $n == 0 ? $points : null)); + } + + return $clusters; + } + + protected function getRandomPoint(PointCollectionInterface $points): PointInterface + { + if (count($points) == 0) { + throw new \LogicException("Unable to pick a random point out of an empty point collection"); + } + + $arr = iterator_to_array($points); + return $arr[array_rand($arr)]; + } +} diff --git a/tests/Data/centroids_2d.csv b/tests/Data/centroids_2d.csv new file mode 100644 index 0000000..a2822a4 --- /dev/null +++ b/tests/Data/centroids_2d.csv @@ -0,0 +1,100 @@ +590,-924,544,188,-828,55,967,-370,318.25,-262.75 +772,-744,218,893,920,-505,202,-8,528,-91 +-843,-236,-145,-548,788,-857,917,-136,179.25,-444.25 +-156,-236,925,15,469,999,227,379,366.25,289.25 +-187,500,-766,847,-435,82,930,-781,-114.5,162 +445,969,-880,-772,956,329,-596,-935,-18.75,-102.25 +155,-131,581,228,698,-404,-519,-222,228.75,-132.25 +707,-203,456,-746,-784,162,-826,-756,-111.75,-385.75 +693,-357,-528,357,877,281,-401,890,160.25,292.75 +987,471,575,69,-94,904,449,-868,479.25,144 +-9,-74,124,-889,-675,432,-990,414,-387.5,-29.25 +-555,419,248,130,521,636,394,-455,152,182.5 +214,-491,713,-179,-50,-335,-14,190,215.75,-203.75 +478,-188,-281,-585,-277,679,266,-659,46.5,-188.25 +773,-330,664,-630,618,-813,-135,-581,480,-588.5 +830,164,632,-22,-342,-340,-191,-925,232.25,-280.75 +820,964,833,-71,128,230,242,977,505.75,525 +-453,-20,271,-193,-152,-121,-666,-712,-250,-261.5 +474,982,-282,63,804,415,-567,546,107.25,501.5 +159,325,659,-337,-488,-928,-785,560,-113.75,-95 +-366,304,373,209,618,727,8,731,158.25,492.75 +25,-124,327,704,-450,-997,990,-292,223,-177.25 +949,-269,929,193,-974,739,-207,-660,174.25,0.75 +619,-170,-503,173,930,925,-259,580,196.75,377 +934,-194,975,-198,-732,901,227,114,351,155.75 +-602,717,-936,-80,540,-266,663,-608,-83.75,-59.25 +70,-518,-847,318,908,-618,-435,261,-76,-139.25 +-599,-947,-34,940,458,-78,998,71,205.75,-3.5 +-469,-866,-553,896,975,237,728,-18,170.25,62.25 +511,795,430,-893,-40,649,-147,70,188.5,155.25 +851,405,483,-555,-303,-771,-171,983,215,15.5 +-324,129,480,68,218,511,-487,-346,-28.25,90.5 +296,551,119,163,-605,-840,-913,-85,-275.75,-52.75 +681,270,554,-861,-995,-662,3,157,60.75,-274 +546,274,-351,937,-414,188,230,364,2.75,440.75 +421,412,441,-915,799,732,-781,-783,220,-138.5 +-794,-999,237,-828,576,745,-855,-431,-209,-378.25 +263,-860,-151,-365,501,112,271,-854,221,-491.75 +855,-614,973,-607,175,404,559,-490,640.5,-326.75 +-271,-500,22,-19,-955,-45,893,522,-77.75,-10.5 +-736,961,107,29,341,521,378,433,22.5,486 +208,603,-19,-332,-83,-832,291,751,99.25,47.5 +-125,-657,-305,-359,184,811,-583,583,-207.25,94.5 +-356,-920,862,313,-187,-963,-203,-260,29,-457.5 +-223,502,626,-763,532,869,434,-789,342.25,-45.25 +663,728,-732,-456,663,-649,683,-715,319.25,-273 +691,337,-817,-148,-460,-511,671,710,21.25,97 +516,655,861,-544,96,832,297,-193,442.5,187.5 +-688,387,-265,-361,868,-39,608,-29,130.75,-10.5 +-757,-489,394,716,-106,-570,-684,280,-288.25,-15.75 +597,-876,-905,-977,960,615,710,-284,340.5,-380.5 +13,-879,863,134,730,-199,-999,-599,151.75,-385.75 +938,-294,999,990,545,220,812,-660,823.5,64 +752,52,-222,-273,514,485,-270,-894,193.5,-157.5 +-340,183,-789,-871,-521,124,762,-899,-222,-365.75 +-999,-786,363,-450,-754,-31,764,-649,-156.5,-479 +-173,557,204,541,766,870,625,499,355.5,616.75 +300,-750,-771,899,446,559,784,626,189.75,333.5 +-913,103,583,459,-73,-237,-152,-241,-138.75,21 +-667,945,514,-341,505,22,-564,-721,-53,-23.75 +-632,-162,-440,-703,-162,-550,815,626,-104.75,-197.25 +-342,948,607,806,394,905,-655,264,1,730.75 +-396,332,-980,628,498,-423,-723,55,-400.25,148 +-195,748,252,952,343,655,949,-702,337.25,413.25 +-954,-466,905,-692,-502,545,333,-439,-54.5,-263 +497,716,695,-319,-884,254,612,-464,230,46.75 +150,35,255,-525,-238,-118,546,-144,178.25,-188 +-989,325,695,94,-906,59,-294,773,-373.5,312.75 +836,-980,-241,-623,-980,770,489,174,26,-164.75 +613,451,808,231,-819,-162,445,455,261.75,243.75 +-668,-397,358,-682,-484,820,57,-883,-184.25,-285.5 +589,-535,586,890,303,310,-324,572,288.5,309.25 +604,347,368,8,615,-647,-139,-304,362,-149 +-435,690,989,-880,767,669,601,972,480.5,362.75 +-116,902,718,6,-894,541,805,442,128.25,472.75 +644,-946,-375,399,762,-534,-542,108,122.25,-243.25 +-119,-709,106,998,460,44,726,231,293.25,141 +-495,808,875,-893,-52,-186,-542,-803,-53.5,-268.5 +820,913,167,782,757,-844,527,977,567.75,457 +940,142,44,5,308,902,168,799,365,462 +-560,-141,975,-712,539,-738,-97,-70,214.25,-415.25 +16,977,-701,728,-445,854,166,-998,-241,390.25 +475,486,773,-109,309,745,-146,-627,352.75,123.75 +-473,721,-430,-177,753,-425,-692,59,-210.5,44.5 +544,666,-687,-702,146,-656,355,-31,89.5,-180.75 +738,178,-359,501,-466,-884,682,-778,148.75,-245.75 +-316,905,-156,-290,-561,-698,398,-628,-158.75,-177.75 +-311,770,924,637,-442,-680,-586,314,-103.75,260.25 +509,358,238,-909,875,-254,-731,728,222.75,-19.25 +902,-48,-994,-857,103,862,864,-854,218.75,-224.25 +361,820,838,-206,97,129,-48,12,312,188.75 +294,193,752,-78,62,-800,-241,-7,216.75,-173 +90,222,-84,-336,-169,696,-583,-172,-186.5,102.5 +-71,-284,458,-608,-609,-996,-965,-657,-296.75,-636.25 +654,378,-297,-671,-638,-662,464,130,45.75,-206.25 +624,255,633,-239,-884,-966,326,-234,174.75,-296 +364,-699,-326,580,-123,349,-3,522,-22,188 +-689,523,-649,-682,444,214,-730,628,-406,170.75 +-8,-265,-832,-876,251,-885,213,-572,-94,-649.5 +-553,-887,794,101,160,533,-514,-294,-28.25,-136.75 diff --git a/tests/Data/euclidean_distances_2d.csv b/tests/Data/euclidean_distances_2d.csv new file mode 100644 index 0000000..4841de5 --- /dev/null +++ b/tests/Data/euclidean_distances_2d.csv @@ -0,0 +1,100 @@ +-549,-241,311,-311,862.844134244418 +-932,506,526,-635,1851.39001833757 +-231,-565,710,824,1677.73716654308 +-211,-839,659,-311,1017.68560960642 +409,864,-138,-654,1613.54671453912 +-985,202,430,902,1578.67824460844 +501,-967,-858,-180,1570.42987745394 +323,-691,6,219,963.633228982895 +-898,-952,361,426,1866.53823962972 +583,511,-119,-681,1383.35389542951 +242,-648,-361,-757,612.77238841188 +-127,-226,705,-286,834.160655989001 +-64,-958,554,937,1993.2257774773 +889,950,557,470,583.630019789935 +533,-514,-86,-588,623.407571336762 +516,750,-255,591,787.22423743175 +239,-250,630,186,585.642382346087 +48,-581,-255,578,1197.95241975631 +-619,-699,-284,335,1086.91352001896 +664,-983,18,591,1701.40882800108 +-109,323,-701,408,598.07106601139 +912,-906,-357,62,1596.052943984 +-766,-949,576,561,2020.16434974979 +887,71,-401,-911,1619.65057960043 +-131,694,338,701,469.052235897027 +-490,764,-355,449,342.709789763876 +119,315,-69,-34,396.41518638922 +715,-660,398,557,1257.60804704805 +334,819,211,-757,1580.7925227556 +739,-45,862,820,873.701321963061 +180,444,-718,-492,1297.11217710728 +30,465,268,961,550.145435316881 +-687,439,954,420,1641.10999022003 +-394,266,857,-941,1738.34691589452 +-813,-932,873,-398,1768.54516481768 +466,896,-13,-155,1155.00735928391 +404,421,-86,-303,874.228803002967 +-821,184,315,-960,1612.21338538048 +-482,719,339,512,846.693569126399 +968,807,-879,-522,2275.44501142084 +750,-662,-54,948,1799.58773056498 +-808,24,411,-973,1574.79204976403 +-594,260,838,75,1443.90061984889 +-725,456,389,640,1129.09344166017 +943,925,-899,1000,1843.52624065946 +-806,-894,-337,-599,554.063173293443 +754,990,172,-759,1843.29189224062 +541,383,799,-245,678.931513482767 +-998,24,-155,273,879.005119439017 +7,975,934,-540,1776.10641573077 +323,-401,6,569,1020.48468876314 +86,-456,-257,-674,406.414812722174 +-710,-926,-181,-188,908.011563803017 +-732,-733,570,144,1569.8194163661 +176,554,728,-188,924.807006893871 +48,-436,203,-657,269.937029693964 +-516,182,-692,528,388.190674797837 +-709,453,-155,12,708.093920324133 +-195,145,818,139,1013.01776884712 +629,613,198,187,606.000825081947 +606,-742,-504,464,1639.06558746134 +319,746,701,129,725.681059419357 +618,-724,-357,-403,1026.48234276095 +-273,-934,634,-313,1099.22245246356 +81,584,-757,774,859.269457155321 +-631,489,-732,-601,1094.66935647254 +236,-704,-171,-501,454.816446492429 +929,-380,468,158,708.494883538336 +-805,819,204,-767,1879.75450524796 +-377,563,601,-91,1176.52029306765 +-86,-355,510,783,1284.62445874271 +555,218,721,125,190.276115158997 +-45,-620,439,745,1448.26827625271 +-280,184,-549,701,582.794989683336 +-106,430,-574,-963,1469.51454569188 +-27,414,102,-850,1270.56562207546 +815,-359,48,339,1037.05978612614 +320,-13,-158,-928,1032.33182649766 +-177,-319,-873,-458,709.744320160436 +154,-179,532,395,687.28451168348 +-114,-632,-765,642,1430.69109174552 +-158,-825,925,934,2065.66454198159 +-426,-223,-960,735,1096.77709677035 +-983,287,-729,323,256.538496136545 +336,627,-296,236,743.172254595124 +806,-956,-657,-715,1482.71710046118 +-232,-980,-115,884,1867.66833244021 +-509,-703,494,317,1430.52752507598 +89,626,95,199,427.042152486145 +800,925,-389,-961,2229.51048438889 +-378,-211,-462,-210,84.0059521700695 +-954,-551,600,646,1961.56187768829 +172,656,690,430,565.154846037792 +915,-507,226,-542,689.888396771536 +338,-625,1,-732,353.578845521052 +111,783,-193,-218,1046.14387155878 +-719,365,522,180,1254.71351311764 +-214,121,-917,-599,1006.28475095273 +-853,478,693,-759,1979.97095938299 +756,-268,149,524,997.854197766387 diff --git a/tests/Data/euclidean_distances_3d.csv b/tests/Data/euclidean_distances_3d.csv new file mode 100644 index 0000000..4e7ed6c --- /dev/null +++ b/tests/Data/euclidean_distances_3d.csv @@ -0,0 +1,100 @@ +647,195,682,47,-986,-745,1947.07216096374 +14,-800,273,-854,343,-56,1472.45169700062 +253,-611,381,-768,-438,-710,1504.21108891006 +129,912,281,-840,-79,-972,1868.43544175334 +-327,-664,-490,-231,-38,63,840.774048124703 +-273,-495,-596,-186,-305,-503,228.731283387297 +-669,514,-926,890,-146,832,2440.62389564636 +846,-173,-394,-669,579,159,1779.47688942565 +-220,850,-183,-84,-555,-862,1566.38501014278 +-895,-529,285,-659,933,-718,1788.61650445253 +-174,-723,-36,-633,230,363,1130.52686832291 +-627,-510,882,168,700,-473,1982.96495178306 +277,-86,939,-124,-243,95,947.515699078385 +305,128,68,-370,972,213,1090.40634627647 +-106,193,739,-203,-842,-181,1388.17650174609 +669,-192,523,695,220,525,412.82441788247 +283,537,-431,-406,-235,-589,1046.7420885777 +-81,-170,130,423,808,-394,1218.63694347414 +617,962,-594,829,26,-774,976.442522629981 +841,519,426,-208,-506,548,1471.70309505688 +-710,-616,816,475,544,-190,1939.55175233867 +883,443,-160,-57,-54,-756,1218.94421529453 +-809,661,-252,-388,-423,581,1430.44957967766 +254,463,354,238,-339,132,832.312441334383 +-228,-45,-796,941,-552,-486,1311.37713873622 +99,-340,184,-438,888,521,1381.99927641081 +425,-340,-579,139,-555,475,1113.07546913945 +486,96,-164,450,511,380,685.169322138696 +-857,630,418,-821,901,539,298.961535987491 +633,-138,560,168,966,-824,1830.43628679067 +-137,432,-97,440,407,-918,1003.79031674947 +918,313,100,-188,-124,-674,1418.90133554099 +290,-400,112,-221,-439,-338,682.013196353267 +826,812,653,336,-149,-606,1657.92098726085 +9,425,913,-401,-426,-85,1374.15610466934 +883,-771,654,985,263,-633,1654.06438810586 +759,749,390,718,-270,527,1028.98542263727 +-273,-738,599,468,-986,-937,1723.3342682138 +571,-751,186,-292,-466,-85,948.385470154409 +575,655,-817,844,-981,914,2396.91843832868 +468,-40,812,-954,700,371,1662.57781772764 +-133,64,715,-542,518,-376,1250.47111122169 +-743,-243,482,312,-954,-620,1683.13695224126 +-709,-35,132,325,513,-705,1438.7595351552 +205,-912,709,53,-599,-173,948.154523271392 +-249,-203,725,197,-337,-760,1556.30877399056 +712,-920,-874,493,-651,-48,895.878339954706 +798,-419,-7,-488,-414,81,1289.01706738119 +522,600,942,985,257,-410,1469.66730929146 +-270,-75,748,243,-798,594,899.785530001456 +458,-370,-117,-378,682,-532,1406.35166299187 +300,626,-93,-975,858,-28,1297.56464193504 +-465,-703,925,-139,-426,593,541.506232651112 +890,12,-703,-643,877,-489,1773.1638390177 +-867,-226,-887,-195,59,318,1408.84136793324 +-404,-771,166,482,595,573,1678.27321971126 +706,32,-171,162,-663,-231,884.624779214329 +-674,-844,-319,450,330,148,1691.07687583977 +-972,825,-21,672,212,-634,1858.56772811754 +295,-437,-927,538,-341,454,1405.49848808172 +427,-365,800,-336,-662,415,904.766820788649 +-987,-443,438,-672,699,234,1202.08360774116 +-782,-748,-222,-125,237,394,1334.66475191338 +-932,200,-867,757,232,-195,1818.05637976384 +299,-375,612,-807,-520,-733,1747.36544546354 +347,-798,950,1000,-60,866,988.993933247318 +414,-810,-196,382,-967,544,757.147937988343 +543,884,106,729,795,331,305.191743007572 +-63,427,526,-993,-596,147,1433.55153377896 +-756,171,-788,-752,282,413,1206.12520079799 +-894,302,-122,171,675,334,1217.08257731347 +-316,388,708,320,-153,-519,1484.15160950625 +544,291,-85,-261,-42,-658,1042.70945138135 +-661,-947,-984,747,759,-826,2217.62575742617 +337,106,928,376,117,937,41.5090351610345 +555,865,353,-353,-715,349,1822.32818120118 +-102,-108,-672,-827,172,-224,897.0668871383 +-280,-195,-271,955,-955,511,1647.5281484697 +806,-448,-512,-255,431,772,1883.35286125569 +310,-473,-197,-68,517,878,1509.50621065301 +-890,953,-107,782,-647,-658,2378.90415948184 +597,757,305,188,178,-58,796.423882113037 +133,-196,988,594,884,-708,2062.84681932518 +96,-532,206,512,-59,-78,690.971055833745 +-383,-84,-356,889,97,-511,1294.12905075189 +196,512,-273,397,-464,-399,1004.41674617661 +-198,49,-924,769,-117,861,2036.87751227215 +-683,-752,217,178,94,-723,1529.91404987339 +905,259,-204,-589,796,-394,1598.90743947234 +-901,441,-50,232,-532,-921,1728.88952799188 +723,491,-569,988,-7,-727,585.826766203116 +588,-791,860,-418,-576,-581,1770.52026252173 +-41,-975,145,-839,258,-368,1555.71912632069 +-752,-488,-219,-640,-748,513,784.836288661527 +-837,-453,-271,539,-914,35,1483.08226339607 +-507,781,547,435,-97,502,1288.51581286378 +515,-8,-308,-378,-71,953,1546.46015144264 +-264,-339,287,-2,45,-70,586.130531537131 +169,-813,473,-843,-198,438,1184.73372535773 +259,-567,-443,244,-731,-524,183.526564834631 diff --git a/tests/Data/gps_centroid.csv b/tests/Data/gps_centroid.csv new file mode 100644 index 0000000..4ff47c3 --- /dev/null +++ b/tests/Data/gps_centroid.csv @@ -0,0 +1,3 @@ +"Paris, Lyon, Marseille",45.9784058082879,4.226770011911983,48.85889,2.32004,45.75781,4.83201,43.29617,5.36995 +"Single point",48.85889,2.32004,48.85889,2.32004 +"5 close points",43.29619000000861,5.369947999981911,43.29617,5.36995,43.29616,5.36987,43.29625,5.36998,43.29621,5.37000,43.29616,5.36994 diff --git a/tests/Data/gps_centroid.py b/tests/Data/gps_centroid.py new file mode 100644 index 0000000..def69d2 --- /dev/null +++ b/tests/Data/gps_centroid.py @@ -0,0 +1,44 @@ +import numpy as np +import numpy.linalg as lin + +E = np.array([[0, 0, 1], + [0, 1, 0], + [-1, 0, 0]]) + +def lat_long2n_E(latitude,longitude): + res = [np.sin(np.deg2rad(latitude)), + np.sin(np.deg2rad(longitude)) * np.cos(np.deg2rad(latitude)), + -np.cos(np.deg2rad(longitude)) * np.cos(np.deg2rad(latitude))] + return np.dot(E.T,np.array(res)) + +def n_E2lat_long(n_E): + n_E = np.dot(E, n_E) + longitude=np.arctan2(n_E[1],-n_E[2]); + equatorial_component = np.sqrt(n_E[1]**2 + n_E[2]**2 ); + latitude=np.arctan2(n_E[0],equatorial_component); + return np.rad2deg(latitude), np.rad2deg(longitude) + +def average(coords): + res = [] + for lat,lon in coords: + res.append(lat_long2n_E(lat,lon)) + res = np.array(res) + m = np.mean(res,axis=0) + m = m / lin.norm(m) + return n_E2lat_long(m) + + +#paris = [48.85889,2.32004] +#lyon = [45.75781,4.83201] +#marseille = [43.29617,5.36995] +# +## 45.9784058082879, 4.226770011911983 +#print (average([paris, lyon, marseille])) + +print(average([ + [43.29617,5.36995], + [43.29616,5.36987], + [43.29625,5.36998], + [43.29621,5.37000], + [43.29616,5.36994] +])) diff --git a/tests/Data/haversine_distances.csv b/tests/Data/haversine_distances.csv new file mode 100644 index 0000000..9f0598e --- /dev/null +++ b/tests/Data/haversine_distances.csv @@ -0,0 +1,5 @@ +"Paris - New York",48.864716,2.349014,40.7128,74.0060,5514741.115351569 +"Paris - Neuilly",48.864716,2.349014,48.8848,2.2685,6297.56948974873 +"Paris - Paris",48.864716,2.349014,48.864716,2.349014,0.0 +"North Pole - South Pole",90.0,0.0,-90.0,0.0,20015114.442035925 +"Two very close points",48.85323,2.34903,48.85321,2.34902,2.3411651390339396 diff --git a/tests/Kmeans/ClusterTest.php b/tests/Kmeans/ClusterTest.php deleted file mode 100644 index 963f56b..0000000 --- a/tests/Kmeans/ClusterTest.php +++ /dev/null @@ -1,206 +0,0 @@ -assertInstanceOf( - Cluster::class, - new Cluster(new Space(2), [0,0]) - ); - } - - public function testToArray() - { - $space = new Space(2); - $cluster = new Cluster($space, [0,0]); - $points = [ - new Point($space, [0,0]), - new Point($space, [1,1]), - new Point($space, [2,2]), - ]; - - foreach ($points as $point) { - $cluster->attach($point); - } - - $this->assertEquals( - [ - 'centroid' => $points[0]->toArray(), - 'points' => array_map( - function ($p) { - return $p->toArray(); - }, - $points - ), - ], - $cluster->toArray() - ); - } - - public function testAttach() - { - $space = new Space(2); - $cluster = new Cluster($space, [0,0]); - $points = [ - new Point($space, [0,0]), - new Point($space, [1,1]), - new Point($space, [2,2]), - ]; - - foreach ($points as $point) { - $cluster->attach($point); - } - - $this->assertCount(3, $cluster); - } - - public function testAttachException() - { - $this->expectException(\LogicException::class); - - $space = new Space(2); - $cluster = new Cluster($space, [0,0]); - - $cluster->attach($cluster); - } - - public function testDetach() - { - $space = new Space(2); - $cluster = new Cluster($space, [0,0]); - $points = [ - new Point($space, [0,0]), - new Point($space, [1,1]), - new Point($space, [2,2]), - ]; - - foreach ($points as $point) { - $cluster->attach($point); - } - - $cluster->detach($points[0]); - $this->assertCount(2, $cluster); - - $cluster->detach($points[1]); - $this->assertCount(1, $cluster); - - $cluster->detach($points[2]); - $this->assertCount(0, $cluster); - } - - public function testAttachAll() - { - $space = new Space(2); - $cluster = new Cluster($space, [0,0]); - $points = [ - new Point($space, [0,0]), - new Point($space, [1,1]), - new Point($space, [2,2]), - ]; - - $storage = new \SplObjectStorage(); - foreach ($points as $point) { - $storage->attach($point); - } - - $cluster->attachAll($storage); - $this->assertCount(3, $cluster); - } - - public function testDetachAll() - { - $space = new Space(2); - $cluster = new Cluster($space, [0,0]); - $points = [ - new Point($space, [0,0]), - new Point($space, [1,1]), - new Point($space, [2,2]), - ]; - - foreach ($points as $point) { - $cluster->attach($point); - } - - $storage = new \SplObjectStorage(); - foreach ($points as $point) { - $storage->attach($point); - } - - $cluster->detachAll($storage); - $this->assertCount(0, $cluster); - } - - public function testUpdateCentroid() - { - $space = new Space(1); - $cluster = new Cluster($space, [0]); - - $cluster->updateCentroid(); - $this->assertEquals([0], $cluster->getCoordinates()); - - $cluster->attach(new Point($space, [5])); - $cluster->attach(new Point($space, [6])); - $cluster->attach(new Point($space, [7])); - - $cluster->updateCentroid(); - - $this->assertEquals([6], $cluster->getCoordinates()); - } - - public function testGetIterator() - { - $space = new Space(2); - $cluster = new Cluster($space, [0,0]); - $points = [ - new Point($space, [0,0]), - new Point($space, [1,1]), - new Point($space, [2,2]), - ]; - - foreach ($points as $point) { - $cluster->attach($point); - } - - $this->assertInstanceOf( - \SplObjectStorage::class, - $cluster->getIterator() - ); - } - - public function testCount() - { - $space = new Space(2); - $cluster = new Cluster($space, [0,0]); - $points = [ - new Point($space, [0,0]), - new Point($space, [1,1]), - new Point($space, [2,2]), - ]; - - $cluster->attach($points[0]); - $this->assertEquals(1, $cluster->count()); - - $cluster->attach($points[1]); - $this->assertEquals(2, $cluster->count()); - - $cluster->attach($points[2]); - $this->assertEquals(3, $cluster->count()); - - $cluster->detach($points[2]); - $this->assertEquals(2, $cluster->count()); - - $cluster->detach($points[1]); - $this->assertEquals(1, $cluster->count()); - - $cluster->detach($points[0]); - $this->assertEquals(0, $cluster->count()); - } -} diff --git a/tests/Kmeans/PointTest.php b/tests/Kmeans/PointTest.php deleted file mode 100644 index b06a28b..0000000 --- a/tests/Kmeans/PointTest.php +++ /dev/null @@ -1,157 +0,0 @@ -assertInstanceOf(Point::class, $point); - } - - public function testToArray() - { - $space = new Space(2); - $point = new Point($space, [0,0]); - - $this->assertEquals(['coordinates' => [0,0], 'data' => null], $point->toArray()); - - $space[$point] = "foobar"; - - $this->assertEquals(['coordinates' => [0,0], 'data' => "foobar"], $point->toArray()); - } - - public function testGetDistanceWith() - { - $space = new Space(2); - $point1 = new Point($space, [1,1]); - $point2 = new Point($space, [2,1]); - - $this->assertEquals(1, $point1->getDistanceWith($point2)); - } - - public function testGetDistanceWithException() - { - $this->expectException(\LogicException::class); - - $space = new Space(2); - $point1 = new Point($space, [1,1]); - - $space = new Space(3); - $point2 = new Point($space, [2,1,0]); - - $point1->getDistanceWith($point2); - } - - public function testGetDistanceWithPreciseFalse() - { - $space = new Space(2); - $point1 = new Point($space, [4,3]); - $point2 = new Point($space, [2,1]); - - $this->assertEquals(8, $point1->getDistanceWith($point2, false)); - } - - public function testGetClosest() - { - $space = new Space(2); - $points = [ - new Point($space, [-2,-2]), - new Point($space, [-1,-1]), - new Point($space, [ 0, 0]), - new Point($space, [ 1, 1]), - new Point($space, [ 2, 2]), - ]; - - $this->assertEquals($points[0], (new Point($space, [-2.1, -2.1]))->getClosest($points)); - $this->assertEquals($points[1], (new Point($space, [-1.1, -1.1]))->getClosest($points)); - $this->assertEquals($points[2], (new Point($space, [ 0.1, 0.1]))->getClosest($points)); - $this->assertEquals($points[3], (new Point($space, [ 1.1, 1.1]))->getClosest($points)); - $this->assertEquals($points[4], (new Point($space, [ 2.1, 2.1]))->getClosest($points)); - } - - public function testBelongsTo() - { - $space = new Space(2); - $point = new Point($space, [0,0]); - - $this->assertTrue($point->belongsTo($space)); - $this->assertFalse($point->belongsTo(new Space(2))); - } - - public function testGetSpace() - { - $space = new Space(2); - $point = new Point($space, [0,0]); - - $this->assertTrue($point->getSpace() === $space); - } - - public function testGetCoordinates() - { - $space = new Space(2); - $point = new Point($space, [0,0]); - - $this->assertEquals([0,0], $point->getCoordinates()); - } - - public function testOffsetExists() - { - $space = new Space(2); - $point = new Point($space, [0,0]); - - $this->assertTrue($point->offsetExists(0)); - $this->assertTrue($point->offsetExists(1)); - $this->assertFalse($point->offsetExists(2)); - } - - public function testOffsetGet() - { - $space = new Space(2); - $point = new Point($space, [1,2]); - - $this->assertEquals(1, $point->offsetGet(0)); - $this->assertEquals(2, $point->offsetGet(1)); - } - - public function testOffsetGetError() - { - $this->expectError(); - - $space = new Space(1); - $point = new Point($space, [1]); - - $point->offsetGet(1); - } - - public function testOffsetSet() - { - $space = new Space(2); - $point = new Point($space, [1,2]); - - $point->offsetSet(0, 3); - $point->offsetSet(1, 4); - - $this->assertEquals(3, $point->offsetGet(0)); - $this->assertEquals(4, $point->offsetGet(1)); - } - - public function testOffsetUnset() - { - $space = new Space(2); - $point = new Point($space, [1,2]); - - $point->offsetUnset(0); - $point->offsetUnset(1); - - $this->assertFalse($point->offsetExists(0)); - $this->assertFalse($point->offsetExists(1)); - } -} diff --git a/tests/Kmeans/SpaceTest.php b/tests/Kmeans/SpaceTest.php deleted file mode 100644 index 63abfdd..0000000 --- a/tests/Kmeans/SpaceTest.php +++ /dev/null @@ -1,222 +0,0 @@ -assertInstanceOf(Space::class, new Space(1)); - $this->assertInstanceOf(Space::class, new Space(2)); - $this->assertInstanceOf(Space::class, new Space(3)); - $this->assertInstanceOf(Space::class, new Space(50)); - } - - public function testConstructException() - { - $this->expectException(\LogicException::class); - - new Space(-1); - } - - public function testToArray() - { - $space = new Space(2); - $points = [ - new Point($space, [-2,-2]), - new Point($space, [-1,-1]), - new Point($space, [ 0, 0]), - new Point($space, [ 1, 1]), - new Point($space, [ 2, 2]), - ]; - - foreach ($points as $point) { - $space->attach($point); - } - - $this->assertEquals( - ['points' => array_map(function ($p) { - return $p->toArray(); - }, $points)], - $space->toArray() - ); - } - - public function testNewPoint() - { - $space = new Space(2); - - $this->assertInstanceOf(Point::class, $space->newPoint([0,0])); - } - - public function testNewPointException() - { - $this->expectException(\LogicException::class); - - $space = new Space(2); - $space->newPoint([1,2,3]); - } - - public function testAddPoint() - { - $space = new Space(2); - - $space->addPoint([0,0]); - $space->addPoint([1,1]); - $space->addPoint([2,2]); - - $this->assertCount(3, $space); - } - - public function testAttach() - { - $space = new Space(2); - - $space->attach(new Point($space, [0,0])); - $space->attach(new Point($space, [1,1])); - $space->attach(new Point($space, [2,2])); - - $this->assertCount(3, $space); - } - - public function testAttachException() - { - $this->expectException(\InvalidArgumentException::class); - - $space = new Space(2); - $space->attach("INVALID"); - } - - public function testGetDimention() - { - $this->assertEquals(1, (new Space(1))->getDimention()); - $this->assertEquals(2, (new Space(2))->getDimention()); - $this->assertEquals(3, (new Space(3))->getDimention()); - } - - public function testGetBoundaries() - { - $space = new Space(2); - - $this->assertEmpty($space->getBoundaries()); - - $space->attach($p1 = new Point($space, [ 0, 0])); - $space->attach($p2 = new Point($space, [ 0,10])); - $space->attach($p3 = new Point($space, [10, 0])); - $space->attach($p4 = new Point($space, [10,10])); - - $this->assertEquals([$p1, $p4], $space->getBoundaries()); - } - - public function testGetRandomPoint() - { - $space = new Space(1); - - $min = new Point($space, [0]); - $max = new Point($space, [10]); - - Space::setRng(function ($min, $max) { - return $min; - }); - $this->assertEquals($min, $space->getRandomPoint($min, $max)); - - Space::setRng(function ($min, $max) { - return $max; - }); - $this->assertEquals($max, $space->getRandomPoint($min, $max)); - } - - public function testSolve() - { - Space::setRng(function ($min, $max) { - static $values = [10, 0]; - return array_pop($values) ?? mt_rand($min, $max); - }); - - $space = new Space(1); - - $space->attach($space->newPoint([1])); - $space->attach($space->newPoint([2])); - $space->attach($space->newPoint([3])); - - $space->attach($space->newPoint([7])); - $space->attach($space->newPoint([8])); - $space->attach($space->newPoint([9])); - - $iterations = 0; - $history = []; - $callback = function ($space, $clusters) use (&$iterations, &$history) { - foreach ($clusters as $cluster) { - $history[$iterations][] = $cluster->getCoordinates()[0]; - } - - $iterations++; - }; - - $clusters = $space->solve(2, $callback); - - $this->assertEquals([[0,10],[2,8]], $history); - $this->assertEquals(2, $iterations); - $this->assertcount(2, $clusters); - - $this->assertEquals([2], $clusters[0]->getCoordinates()); - $this->assertEquals([8], $clusters[1]->getCoordinates()); - } - - public function testSolveSingleCluster() - { - $space = new Space(2); - $space->attach($space->newPoint([0,0])); - $space->solve(1); - } - - public function testSolveWithInvalidClustersNumber() - { - $this->expectException(\InvalidArgumentException::class); - - $space = new Space(2); - $space->attach($space->newPoint([0,0])); - $space->solve(-1); - } - - public function testSolveWithKmeansPlusPlus() - { - Space::setRng(function () { - return 52590703; - }); - - $space = new Space(1); - - $space->attach($space->newPoint([1])); - $space->attach($space->newPoint([2])); - $space->attach($space->newPoint([3])); - - $space->attach($space->newPoint([7])); - $space->attach($space->newPoint([8])); - $space->attach($space->newPoint([9])); - - $iterations = 0; - $history = []; - $callback = function ($space, $clusters) use (&$iterations, &$history) { - foreach ($clusters as $cluster) { - $history[$iterations][] = $cluster->getCoordinates()[0]; - } - - $iterations++; - }; - - $clusters = $space->solve(2, $callback, Cluster::INIT_KMEANS_PLUS_PLUS); - - $this->assertEquals([[1,3],[1.5,6.75],[2,8]], $history); - $this->assertEquals(3, $iterations); - $this->assertcount(2, $clusters); - - $this->assertEquals([2], $clusters[0]->getCoordinates()); - $this->assertEquals([8], $clusters[1]->getCoordinates()); - } -} diff --git a/tests/Unit/AlgorithmTest.php b/tests/Unit/AlgorithmTest.php new file mode 100644 index 0000000..37f3a9b --- /dev/null +++ b/tests/Unit/AlgorithmTest.php @@ -0,0 +1,174 @@ + + */ + abstract public function fitDataProvider(): array; + + /** + * @param array $center + * @return array + */ + abstract protected function random(array $center, float $radius): array; + + // ------------------------------------------------------------------------ + // tests + + /** + * @dataProvider fitDataProvider + */ + public function testFit( + SpaceInterface $space, + float $radius, + PointCollectionInterface $points, + PointCollectionInterface $initialCentroids, + PointCollectionInterface $expectedCentroids, + ): void { + $algorithm = $this->makeAlgorithm( + $this->mockInitScheme( + $this->makeClusters($points, $initialCentroids) + ) + ); + + $result = iterator_to_array( + $algorithm->fit($points, count($expectedCentroids)) + ); + + foreach ($expectedCentroids as $i => $expectedCentroid) { + $this->assertLessThan( + $radius, + $algorithm->getDistanceBetween( + $expectedCentroid, + $result[$i]->getCentroid() + ) + ); + + if ( + is_array($expectedCentroid->getData()) + && isset($expectedCentroid->getData()['count']) + ) { + $this->assertCount( + $expectedCentroid->getData()['count'], + $result[$i]->getPoints() + ); + } + } + } + + // ------------------------------------------------------------------------ + // helpers + + /** + * @param array> $centers + * @return ScenarioData + */ + protected function makeScenarioData( + SpaceInterface $space, + array $centers, + float $radius, + int $count + ): array { + $points = new PointCollection($space); + for ($i = 0; $i < count($centers); $i++) { + for ($j = 0; $j < $count; $j++) { + $point = $space->makePoint($this->random($centers[$i], $radius)); + $points->attach($point); + } + } + + $initialCentroids = new PointCollection($space); + for ($i = 0; $i < count($centers); $i++) { + $point = $space->makePoint($centers[$i]); + $initialCentroids->attach($point); + } + + $expectedCentroids = new PointCollection($space); + for ($i = 0; $i < count($centers); $i++) { + $point = $space->makePoint($centers[$i]); + $point->setData(['count' => $count]); + $expectedCentroids->attach($point); + } + + return compact( + 'space', + 'radius', + 'points', + 'initialCentroids', + 'expectedCentroids' + ); + } + + protected function makeClusters( + PointCollectionInterface $points, + PointCollectionInterface $centroids + ): ClusterCollectionInterface { + $clusters = new ClusterCollection($points->getSpace()); + + foreach ($centroids as $n => $centroid) { + // attach all points to the first cluster + $clusters->attach(new Cluster($centroid, $n == 0 ? $points : null)); + } + + return $clusters; + } + + protected function mockInitScheme( + ClusterCollectionInterface $clusters + ): InitializationSchemeInterface { + /** @var InitializationSchemeInterface */ + $initScheme = Mockery::mock(InitializationSchemeInterface::class); + + /** @phpstan-ignore-next-line */ + $initScheme + ->shouldReceive('initializeClusters') + ->with(PointCollectionInterface::class, Mockery::type('integer')) + ->andReturn($clusters); + + return $initScheme; + } +} diff --git a/tests/Unit/ClusterCollectionTest.php b/tests/Unit/ClusterCollectionTest.php new file mode 100644 index 0000000..04bc451 --- /dev/null +++ b/tests/Unit/ClusterCollectionTest.php @@ -0,0 +1,127 @@ +assertTrue( + $collection->contains($cluster) + ); + + $this->assertFalse( + $collection->contains(new Cluster($point)) + ); + } + + public function testAddingAndRemovingClustersFromCollection(): void + { + $space = new Space(4); + $collection = new ClusterCollection($space); + + $clusterA = new Cluster(new Point($space, [1,2,3,4])); + $clusterB = new Cluster(new Point($space, [5,6,7,8])); + $clusterC = new Cluster(new Point($space, [9,0,1,2])); + + $collection->attach($clusterA); + $collection->attach($clusterC); + + $this->assertTrue( + $collection->contains($clusterA) + ); + + $this->assertFalse( + $collection->contains($clusterB) + ); + + $this->assertTrue( + $collection->contains($clusterC) + ); + + $collection->detach($clusterC); + + $this->assertFalse( + $collection->contains($clusterC) + ); + } + + public function testAddingInvalidClusterToCollection(): void + { + $this->expectException(\InvalidArgumentException::class); + + $spaceA = new Space(2); + $spaceB = new Space(3); + + $collection = new ClusterCollection($spaceA); + $cluster = new Cluster(new Point($spaceB, [1, 2, 3])); + + $collection->attach($cluster); + } + + public function testCount(): void + { + $space = new Space(4); + $collection = new ClusterCollection($space); + + $clusterA = new Cluster(new Point($space, [1,2,3,4])); + $clusterB = new Cluster(new Point($space, [5,6,7,8])); + $clusterC = new Cluster(new Point($space, [9,0,1,2])); + + $collection->attach($clusterA); + $collection->attach($clusterB); + $collection->attach($clusterC); + + $this->assertEquals(3, count($collection)); + + $collection->detach($clusterA); + $this->assertEquals(2, count($collection)); + + $collection->detach($clusterB); + $this->assertEquals(1, count($collection)); + + $collection->detach($clusterC); + $this->assertEquals(0, count($collection)); + } + + public function testIterator(): void + { + $space = new Space(4); + $collection = new ClusterCollection($space); + + $clusterA = new Cluster(new Point($space, [1,2,3,4])); + $clusterB = new Cluster(new Point($space, [5,6,7,8])); + $clusterC = new Cluster(new Point($space, [9,0,1,2])); + + $collection->attach($clusterA); + $collection->attach($clusterB); + $collection->attach($clusterC); + + $iterations = 0; + foreach ($collection as $i => $cluster) { + $this->assertInstanceof(ClusterInterface::class, $cluster); + $iterations++; + } + + $this->assertEquals(3, $iterations); + } +} diff --git a/tests/Unit/ClusterTest.php b/tests/Unit/ClusterTest.php new file mode 100644 index 0000000..349f6cd --- /dev/null +++ b/tests/Unit/ClusterTest.php @@ -0,0 +1,102 @@ +assertTrue( + $cluster->belongsTo(new Space(2)) + ); + } + + public function testGetCentroid(): void + { + $cluster = self::makeCluster(); + + $this->assertSame( + [3.0,3.0], + $cluster->getCentroid()->getCoordinates() + ); + } + + public function testSetCentroid(): void + { + $cluster = self::makeCluster(); + + $cluster->setCentroid( + $centroid = new Point(new Space(2), [1,1]) + ); + + $this->assertSame( + $centroid, + $cluster->getCentroid() + ); + } + + public function testSetCentroidFailsWithInvalidCentroid(): void + { + $cluster = self::makeCluster(); + + $this->expectException(\LogicException::class); + $this->expectExceptionMessageMatches('/^Cannot set centroid/'); + + $cluster->setCentroid( + new Point(new Space(3), [6,6,6]) + ); + } + + public function testGetPoints(): void + { + $cluster = self::makeCluster(); + + $this->assertCount(5, $cluster->getPoints()); + } + + public function testAttach(): Cluster + { + $cluster = self::makeCluster(); + + $cluster->attach( + new Point(new Space(2), [6,6]) + ); + + $this->assertCount(6, $cluster->getPoints()); + + return $cluster; + } + + public function testDetach(): void + { + $cluster = self::makeCluster(); + $points = iterator_to_array($cluster->getPoints()); + $point = $points[array_rand($points)]; + + $cluster->detach($point); + + $this->assertCount(4, $cluster->getPoints()); + } +} diff --git a/tests/Unit/Concerns/HasSpaceTraitTest.php b/tests/Unit/Concerns/HasSpaceTraitTest.php new file mode 100644 index 0000000..89406af --- /dev/null +++ b/tests/Unit/Concerns/HasSpaceTraitTest.php @@ -0,0 +1,45 @@ +space = new Space(2); + + $this->point = new class ($this->space) implements SpaceBoundInterface { + use HasSpaceTrait; + + public function __construct(SpaceInterface $space) + { + $this->setSpace($space); + } + }; + } + + public function testGetSpace(): void + { + $this->assertSame($this->space, $this->point->getSpace()); + } + + public function testBelongsTo(): void + { + $this->assertTrue($this->point->belongsTo($this->space)); + $this->assertTrue($this->point->belongsTo(new Space(2))); + $this->assertFalse($this->point->belongsTo(new Space(3))); + } +} diff --git a/tests/Unit/Euclidean/AlgorithmTest.php b/tests/Unit/Euclidean/AlgorithmTest.php new file mode 100644 index 0000000..023480e --- /dev/null +++ b/tests/Unit/Euclidean/AlgorithmTest.php @@ -0,0 +1,190 @@ +makeAlgorithm( + $this->mockInitScheme( + $this->makeClusters($points, $initialCentroids) + ) + ); + + $called = false; + $algorithm->registerIterationCallback(function () use (&$called) { + $called = true; + }); + + $algorithm->fit($points, count($expectedCentroids)); + + $this->assertTrue($called); + } + + protected function makeAlgorithm( + InitializationSchemeInterface $initScheme + ): AlgorithmInterface { + return new Algorithm($initScheme); + } + + /** + * @return array + */ + public function fitDataProvider(): array + { + return [ + '1D' => $this->makeScenarioData( + new Space(1), + [ + [-100], + [0], + [100] + ], + 2, // radius + 10, // points per clusters + ), + '2D' => $this->makeScenarioData( + new Space(2), + [ + [-100, -100], + [0, 0], + [100, 100], + ], + 2, // radius + 10, // points per clusters + ), + '3D' => $this->makeScenarioData( + new Space(3), + [ + [-100, -100, -100], + [0, 0, 0], + [100, 100, 100], + ], + 2, // radius + 10, // points per clusters + ), + ]; + } + + /** + * @param array $center + * @return array + */ + protected function random(array $center, float $radius): array + { + $point = $center; + + foreach ($point as &$c) { + $blur = Math::gaussianNoise($c, $radius); + $c = $blur[array_rand($blur)]; + } + + return $point; + } + + /** + * @uses \Kmeans\Gps\Point + * @uses \Kmeans\Gps\Space + */ + public function testGetDistanceBetweenException(): void + { + $this->expectException(\InvalidArgumentException::class); + + /** @var InitializationSchemeInterface */ + $initScheme = \Mockery::mock(InitializationSchemeInterface::class); + + $algorithm = new Algorithm($initScheme); + $algorithm->getDistanceBetween( + new \Kmeans\Gps\Point(0, 0), + new \Kmeans\Gps\Point(0, 0) + ); + } + + public function testFindCentroidException(): void + { + $this->expectException(\InvalidArgumentException::class); + + /** @var InitializationSchemeInterface */ + $initScheme = \Mockery::mock(InitializationSchemeInterface::class); + + $algorithm = new Algorithm($initScheme); + $algorithm->findCentroid( + new PointCollection(new \Kmeans\Gps\Space(), []) + ); + } + + public function testMaxIterations(): void + { + $algorithm = new class (new RandomInitialization()) extends Algorithm { + protected function iterate(ClusterCollectionInterface $clusters): bool + { + // do nothing and iterate indefinitely + return true; + } + }; + + $iterations = 0; + $algorithm->registerIterationCallback(function () use (&$iterations) { + $iterations++; + }); + + $space = new Space(1); + $points = new PointCollection( + $space, + array_map([$space, 'makePoint'], [[1],[2],[3]]) + ); + + $algorithm->fit($points, 3, 300); + + $this->assertEquals( + 300, + $iterations + ); + } + + public function testMaxIterationsException(): void + { + $this->expectException(\UnexpectedValueException::class); + $this->expectExceptionMessageMatches('/^Invalid maximum number of iterations/'); + + $algorithm = new Algorithm(new RandomInitialization()); + $algorithm->fit(new PointCollection(new Space(1), []), 3, 0); + } +} diff --git a/tests/Unit/Euclidean/PointTest.php b/tests/Unit/Euclidean/PointTest.php new file mode 100644 index 0000000..fa0e4ee --- /dev/null +++ b/tests/Unit/Euclidean/PointTest.php @@ -0,0 +1,63 @@ +expectException(\LogicException::class); + $this->expectExceptionMessage("An euclidean point must belong to an euclidean space"); + + $space = new GpsSpace(); + $point = new Point($space, [48.85889, 2.32004]); + } + + public function testCoordinates(): void + { + $space = new Space(2); + $point = new Point($space, [1.2, 3.4]); + + $this->assertSame([1.2, 3.4], $point->getCoordinates()); + } + + public function testInvalidCoordinates(): void + { + $this->expectException(\LogicException::class); + $this->expectExceptionMessage("Invalid set of coordinates: 3 coordinates expected, 2 given"); + + $space = new Space(3); // 3d space + $point = new Point($space, [0.0, 0.0]); // 2d space point + } + + public function testInvalidCoordinatesValues(): void + { + $this->expectException(\LogicException::class); + $this->expectExceptionMessage("values at offsets [0,2] could not be converted to numbers"); + + $space = new Space(3); // 3d space + $point = new Point($space, [NAN, 1.0, "hello!"]); /** @phpstan-ignore-line */ + } + + public function testAssociateData(): void + { + $space = new Space(2); + $point = new Point($space, [0.0, 0.0]); + + $data = (object) ['foo' => "bar"]; + $point->setData($data); + + $this->assertSame($data, $point->getData()); + } +} diff --git a/tests/Unit/Euclidean/SpaceTest.php b/tests/Unit/Euclidean/SpaceTest.php new file mode 100644 index 0000000..f76bd2c --- /dev/null +++ b/tests/Unit/Euclidean/SpaceTest.php @@ -0,0 +1,55 @@ +assertEquals(1, $space->getDimensions()); + + $space = new Space(2); + + $this->assertEquals(2, $space->getDimensions()); + + $space = new Space(3); + + $this->assertEquals(3, $space->getDimensions()); + } + + public function testInvalidSpaceDimensions(): void + { + $this->expectException(\InvalidArgumentException::class); + + $space = new Space(0); + } + + public function testIsEqualTo(): void + { + $this->assertTrue( + (new Space(1))->isEqualTo(new Space(1)) + ); + + $this->assertFalse( + (new Space(1))->isEqualTo(new Space(2)) + ); + } + + public function testMakePoint(): void + { + $this->assertInstanceof( + Point::class, + (new Space(1))->makePoint([1]) + ); + } +} diff --git a/tests/Unit/Gps/AlgorithmTest.php b/tests/Unit/Gps/AlgorithmTest.php new file mode 100644 index 0000000..26aef51 --- /dev/null +++ b/tests/Unit/Gps/AlgorithmTest.php @@ -0,0 +1,108 @@ + + */ + public function fitDataProvider(): array + { + return [ + 'French cities' => $this->makeScenarioData( + new Space(), + [ + [48.85889, 2.32004], // Paris + [45.75781, 4.83201], // Lyon + [43.29617, 5.36995], // Marseille + ], + 10e3, // 10km radius + 10, // points per cluster + ), + ]; + } + + /** + * @param array{0: float, 1: float} $center + * @return array{0: float, 1: float} + */ + protected function random(array $center, float $radius): array + { + //about 111300 meters in one degree + $rd = $radius / 111300; + + $u = mt_rand() / mt_getrandmax(); + $v = mt_rand() / mt_getrandmax(); + + $w = $rd * sqrt($u); + $t = 2 * pi() * $v; + $x = $w * cos($t); + $y = $w * sin($t); + + return [$y + $center[0], $x + $center[1]]; + } + + public function testGetDistanceBetweenWithInvalidPoints(): void + { + $this->expectException(\InvalidArgumentException::class); + $this->expectExceptionMessageMatches('/^GPS algorithm can only calculate distance from GPS locations/'); + + /** @var InitializationSchemeInterface */ + $initScheme = Mockery::mock(InitializationSchemeInterface::class); + + $algorithm = new Algorithm($initScheme); + $algorithm->getDistanceBetween( + new EuclideanPoint(new EuclideanSpace(2), [0, 1]), + new EuclideanPoint(new EuclideanSpace(2), [1, 0]) + ); + } + + public function testGetCentroiWithInvalidPoins(): void + { + $this->expectException(\InvalidArgumentException::class); + $this->expectExceptionMessageMatches('/^Point collection should consist of GPS coordinates/'); + + /** @var InitializationSchemeInterface */ + $initScheme = Mockery::mock(InitializationSchemeInterface::class); + + $algorithm = new Algorithm($initScheme); + $algorithm->findCentroid(new PointCollection(new EuclideanSpace(2), [ + new EuclideanPoint(new EuclideanSpace(2), [0, 1]), + new EuclideanPoint(new EuclideanSpace(2), [1, 0]) + ])); + } +} diff --git a/tests/Unit/Gps/PointTest.php b/tests/Unit/Gps/PointTest.php new file mode 100644 index 0000000..6185ba5 --- /dev/null +++ b/tests/Unit/Gps/PointTest.php @@ -0,0 +1,60 @@ +assertTrue( + $point->getSpace()->isEqualTo(new Space()) + ); + + return $point; + } + + /** + * @depends testConstruct + */ + public function testGetCoordinates(Point $point): void + { + $this->assertEquals( + [48.85889, 2.32004], + $point->getCoordinates(), + ); + } + + /** + * @dataProvider invalidGpsCoordinatesDataProvider + */ + public function testConstructWithInvalidCoordinates(float $lat, float $long): void + { + $this->expectException(\InvalidArgumentException::class); + $this->expectExceptionMessage("Invalid GPS coordinates"); + + $point = new Point($lat, $long); + } + + /** + * @return array + */ + public function invalidGpsCoordinatesDataProvider(): array + { + return [ + 'invalid lat (-91)' => [-91,0], + 'invalid lat (91)' => [91,0], + 'invalid long (-181)' => [-181,0], + 'invalid long (181)' => [181,0], + ]; + } +} diff --git a/tests/Unit/Gps/SpaceTest.php b/tests/Unit/Gps/SpaceTest.php new file mode 100644 index 0000000..9301596 --- /dev/null +++ b/tests/Unit/Gps/SpaceTest.php @@ -0,0 +1,48 @@ +assertInstanceof( + Space::class, + Space::singleton(), + ); + + $this->assertSame( + Space::singleton(), + Space::singleton(), + ); + } + + public function testIsEqualTo(): void + { + $this->assertTrue( + (new Space())->isEqualTo(new Space()) + ); + + $this->assertFalse( + (new Space())->isEqualTo(new EuclideanSpace(1)) + ); + } + + public function testMakePoint(): void + { + $this->assertInstanceof( + Point::class, + (new Space())->makePoint([48.85889, 2.32004]) + ); + } +} diff --git a/tests/Unit/MathTest.php b/tests/Unit/MathTest.php new file mode 100644 index 0000000..a385d92 --- /dev/null +++ b/tests/Unit/MathTest.php @@ -0,0 +1,180 @@ + $a + * @param array $b + * @param float $dist + */ + public function testEuclideanDist(array $a, array $b, float $dist): void + { + $this->assertEquals(round($dist, 6), round(Math::euclideanDist($a, $b), 6)); + } + + /** + * @return \Generator> + */ + public function euclidianDistanceDataProvider(): \Generator + { + /** @var array $row */ + foreach ($this->readCsv('euclidean_distances_2d') as $row) { + list($x1, $y1, $x2, $y2, $dist) = array_map('floatval', $row); + yield [[$x1, $y1], [$x2, $y2], $dist]; + } + + /** @var array $row */ + foreach ($this->readCsv('euclidean_distances_3d') as $row) { + list($x1, $y1, $z1, $x2, $y2, $z2, $dist) = array_map('floatval', $row); + yield [[$x1, $y1, $z1], [$x2, $y2, $z2], $dist]; + } + } + + // ------------------------------------------------------------------------ + // Centroid + + /** + * @dataProvider centroidDataProvider + * @param array $centroid + * @param array ...$points + */ + public function testCentroid(array $centroid, array ...$points): void + { + $this->assertEquals($centroid, Math::centroid($points)); + } + + /** + * @return \Generator>> + */ + public function centroidDataProvider(): \Generator + { + /** @var array $row */ + foreach ($this->readCsv('centroids_2d') as $row) { + list($x1, $y1, $x2, $y2, $x3, $y3, $x4, $y4, $cx, $cy) = array_map('floatval', $row); + yield [[$cx, $cy], [$x1, $y1], [$x2, $y2], [$x3, $y3], [$x4, $y4]]; + } + } + + // ------------------------------------------------------------------------ + // Gaussian Noise + + /** + * @dataProvider gaussianNoiseDataProvider + */ + public function testGaussianNoise(float $mu, float $sigma = 1, float $nb = 1e3): void + { + // let's generate $nb numbers and sum them + for ($sum = 0, $i = 0; $i < $nb; $i++) { + $sum += array_sum(Math::gaussianNoise($mu, $sigma)); + } + + // cumpute the mean (which should be $mu) + $sum /= ($nb * 2); + + // verify the mean is around $mu (plus or minus $sigma) + $this->assertTrue( + $sum >= $mu - $sigma && $sum <= $mu + $sigma + ); + } + + /** + * @return array + */ + public function gaussianNoiseDataProvider(): array + { + return [ + ['mu' => 10], + ['mu' => 100], + ['mu' => 1000], + ['mu' => -10], + ['mu' => -100], + ['mu' => -1000], + ]; + } + + // ------------------------------------------------------------------------ + // Haversine + + /** + * @dataProvider haversineDataProvider + * @param array{0: float, 1: float} $from + * @param array{0: float, 1: float} $to + */ + public function testHaversine(string $label, array $from, array $to, float $expected): void + { + $obtained = Math::haversine($from, $to); + + $this->assertLessThan( + 1, // meter + $obtained - $expected, + "Haversine distance for $label should be around $expected meters", + ); + } + + public function haversineDataProvider(): \Generator + { + /** @var array $row */ + foreach ($this->readCsv('haversine_distances') as $row) { + $label = array_shift($row); + $row = array_map('floatval', $row); + yield [$label, [$row[0], $row[1]], [$row[2], $row[3]], $row[4]]; + } + } + + // ------------------------------------------------------------------------ + // GPS Centroid + + /** + * @dataProvider gpsCentroidDataProvider + * @param array{0: float, 1: float} $expected + * @param array $points + */ + public function testGpsCentroid(string $label, array $expected, array $points): void + { + $obtained = Math::gpsCentroid($points); + + $this->assertLessThan( + 1, + Math::haversine($expected, $obtained), + "Centroid of $label should be near " . implode(', ', $expected), + ); + } + + public function gpsCentroidDataProvider(): \Generator + { + /** @var array $row */ + foreach ($this->readCsv('gps_centroid') as $row) { + $label = array_shift($row); + $points = array_chunk(array_map('floatval', $row), 2); + yield [$label, array_shift($points), $points]; + } + } + + // ------------------------------------------------------------------------ + // Helpers + + private static function readCsv(string $path): \SplFileObject + { + $csv = new \SplFileObject(__DIR__ . "/../Data/{$path}.csv"); + + $csv->setFlags( + \SplFileObject::READ_CSV | + \SplFileObject::SKIP_EMPTY | + \SplFileObject::READ_AHEAD + ); + + return $csv; + } +} diff --git a/tests/Unit/PointCollectionTest.php b/tests/Unit/PointCollectionTest.php new file mode 100644 index 0000000..8a90cac --- /dev/null +++ b/tests/Unit/PointCollectionTest.php @@ -0,0 +1,123 @@ +attach( + new Point(new Space(2), [6,6]) + ); + + $this->assertCount(6, $points); + } + + public function testAttachTwiceHasNoEffect(): void + { + $points = self::makePointCollection(); + + $points->attach( + $point = new Point(new Space(2), [6,6]) + ); + + $points->attach($point); + + $this->assertCount(6, $points); + } + + public function testAttachInvalidPointFails(): void + { + $points = self::makePointCollection(); + + $this->expectException(\InvalidArgumentException::class); + $this->expectExceptionMessageMatches('/^Cannot add point to collection/'); + + $points->attach( + $point = new Point(new Space(3), [6,6,6]) + ); + + $points->attach($point); + + $this->assertCount(11, $points); + } + + public function testContains(): void + { + $points = self::makePointCollection(); + $arr = iterator_to_array($points); + $point = $arr[array_rand($arr)]; + + $this->assertTrue( + $points->contains($point) + ); + + $this->assertFalse( + $points->contains( + new Point(new Space(2), [6,6]) + ) + ); + } + + public function testDetach(): void + { + $points = self::makePointCollection(); + $arr = iterator_to_array($points); + $point = $arr[array_rand($arr)]; + + $points->detach($point); + + $this->assertCount(4, $points); + } + + public function testDetachTwiceHasNoEffect(): void + { + $points = self::makePointCollection(); + $arr = iterator_to_array($points); + $point = $arr[array_rand($arr)]; + + $points->detach($point); + $points->detach($point); + + $this->assertCount(4, $points); + } + + public function testIteration(): void + { + $points = self::makePointCollection(); + + foreach ($points as $key => $point) { + $this->assertInstanceof(PointInterface::class, $point); + } + } +} diff --git a/tests/Unit/RandomInitializationTest.php b/tests/Unit/RandomInitializationTest.php new file mode 100644 index 0000000..fbd4ae4 --- /dev/null +++ b/tests/Unit/RandomInitializationTest.php @@ -0,0 +1,99 @@ +space = new Space(2); + + $this->points = new PointCollection($this->space, array_map( + fn ($coordinates) => new Point($this->space, $coordinates), + [[0,0], [1,1], [2,2], [3,3], [4,4], [5,5], [6,6], [7,7], [8,8], [9,9]], + )); + + $this->scheme = new RandomInitialization(); + } + + public function tearDown(): void + { + unset( + $this->space, + $this->points, + $this->scheme + ); + } + + /** + * @covers ::initializeClusters + * @covers ::getRandomPoint + */ + public function testInitializeClusters(): void + { + $clusters = $this->scheme->initializeClusters($this->points, 3); + + $this->assertCount(3, $clusters); + + $expectedNbPoints = [10, 0, 0]; + + foreach ($clusters as $i => $cluster) { + $this->assertCount( + array_shift($expectedNbPoints), + $cluster->getPoints() + ); + + $coordinates = $cluster->getCentroid()->getCoordinates(); + + $this->assertGreaterThanOrEqual(0, $coordinates[0]); + $this->assertGreaterThanOrEqual(0, $coordinates[1]); + + $this->assertLessThanOrEqual(9, $coordinates[0]); + $this->assertLessThanOrEqual(9, $coordinates[1]); + } + } + + /** + * @covers ::initializeClusters + */ + public function testInitializeClustersWithInvalidClusterCount(): void + { + $this->expectException(\InvalidArgumentException::class); + $this->expectExceptionMessageMatches('/^Invalid cluster count/'); + + $this->scheme->initializeClusters($this->points, 0); + } + + /** + * @covers ::initializeClusters + * @covers ::getRandomPoint + */ + public function testInitializeClustersWithoutPoints(): void + { + $this->expectException(\LogicException::class); + $this->expectExceptionMessageMatches('/^Unable to pick a random point out of an empty point collection/'); + + $this->scheme->initializeClusters(new PointCollection($this->space), 3); + } +}