From d6dd74abeed706850b47027f6cfe20f7a43faebb Mon Sep 17 00:00:00 2001 From: Benjamin Delespierre Date: Wed, 18 Aug 2021 09:17:28 +0200 Subject: [PATCH 01/20] part one --- .gitignore | 1 + composer.json | 4 +- demo_deterministic.php | 43 ++++++ phpunit.xml.dist | 56 ++++---- phpunit.xml.dist_old | 34 +++++ src/Algorithm.php | 14 ++ src/Cluster.php | 39 ++++++ src/ClusterCollection.php | 73 ++++++++++ src/Concerns/HasSpaceTrait.php | 23 ++++ src/Interfaces/AlgorithmInterface.php | 11 ++ src/Interfaces/ClusterCollectionInterface.php | 17 +++ src/Interfaces/ClusterInterface.php | 19 +++ src/Interfaces/PointCollectionInterface.php | 17 +++ src/Interfaces/PointInterface.php | 18 +++ src/Interfaces/SpaceInterface.php | 8 ++ src/Point.php | 68 ++++++++++ src/PointCollection.php | 73 ++++++++++ src/Space.php | 24 ++++ {src => src_old}/KMeans/Cluster.php | 0 {src => src_old}/KMeans/Point.php | 0 {src => src_old}/KMeans/Space.php | 0 tests/Unit/ClusterCollectionTest.php | 125 ++++++++++++++++++ tests/Unit/ClusterTest.php | 109 +++++++++++++++ tests/Unit/PointCollectionTest.php | 123 +++++++++++++++++ tests/Unit/PointTest.php | 75 +++++++++++ tests/Unit/SpaceTest.php | 41 ++++++ {tests => tests_old}/Kmeans/ClusterTest.php | 0 {tests => tests_old}/Kmeans/PointTest.php | 0 {tests => tests_old}/Kmeans/SpaceTest.php | 0 29 files changed, 981 insertions(+), 34 deletions(-) create mode 100644 demo_deterministic.php create mode 100644 phpunit.xml.dist_old create mode 100644 src/Algorithm.php create mode 100644 src/Cluster.php create mode 100644 src/ClusterCollection.php create mode 100644 src/Concerns/HasSpaceTrait.php create mode 100644 src/Interfaces/AlgorithmInterface.php create mode 100644 src/Interfaces/ClusterCollectionInterface.php create mode 100644 src/Interfaces/ClusterInterface.php create mode 100644 src/Interfaces/PointCollectionInterface.php create mode 100644 src/Interfaces/PointInterface.php create mode 100644 src/Interfaces/SpaceInterface.php create mode 100644 src/Point.php create mode 100644 src/PointCollection.php create mode 100644 src/Space.php rename {src => src_old}/KMeans/Cluster.php (100%) rename {src => src_old}/KMeans/Point.php (100%) rename {src => src_old}/KMeans/Space.php (100%) create mode 100644 tests/Unit/ClusterCollectionTest.php create mode 100644 tests/Unit/ClusterTest.php create mode 100644 tests/Unit/PointCollectionTest.php create mode 100644 tests/Unit/PointTest.php create mode 100644 tests/Unit/SpaceTest.php rename {tests => tests_old}/Kmeans/ClusterTest.php (100%) rename {tests => tests_old}/Kmeans/PointTest.php (100%) rename {tests => tests_old}/Kmeans/SpaceTest.php (100%) diff --git a/.gitignore b/.gitignore index 896e906..69dd635 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ docs vendor coverage .phpunit.result.cache +.phpunit.cache diff --git a/composer.json b/composer.json index 137d615..c08f6f6 100644 --- a/composer.json +++ b/composer.json @@ -22,8 +22,8 @@ "phpunit/phpunit": "^9.3" }, "autoload": { - "psr-0": { - "KMeans": "src/" + "psr-4": { + "Bdelespierre\\Kmeans\\": "src/" } }, "autoload-dev": { diff --git a/demo_deterministic.php b/demo_deterministic.php new file mode 100644 index 0000000..0247bd6 --- /dev/null +++ b/demo_deterministic.php @@ -0,0 +1,43 @@ + $coordinates) { + $space->addPoint($coordinates); +} + +// cluster these 50 points in 3 clusters +$clusters = $space->solve(3); + +// display the cluster centers and attached points +foreach ($clusters as $num => $cluster) { + $coordinates = $cluster->getCoordinates(); + printf( + "Cluster %s [%d,%d]: %d points\n", + $num, + $coordinates[0], + $coordinates[1], + count($cluster) + ); +} diff --git a/phpunit.xml.dist b/phpunit.xml.dist index b9ba704..0cd59ec 100644 --- a/phpunit.xml.dist +++ b/phpunit.xml.dist @@ -1,34 +1,26 @@ - - - - src/ - - - - - - - - - - tests - - - - - + + + + tests/Unit + + + + + + src + + diff --git a/phpunit.xml.dist_old b/phpunit.xml.dist_old new file mode 100644 index 0000000..b9ba704 --- /dev/null +++ b/phpunit.xml.dist_old @@ -0,0 +1,34 @@ + + + + + src/ + + + + + + + + + + tests + + + + + + diff --git a/src/Algorithm.php b/src/Algorithm.php new file mode 100644 index 0000000..ab73116 --- /dev/null +++ b/src/Algorithm.php @@ -0,0 +1,14 @@ +centroid = $centroid; + $this->points = $points ?? new PointCollection($centroid->getSpace()); + } + + public function getCentroid(): PointInterface + { + return $this->centroid; + } + + public function getPoints(): PointCollectionInterface + { + return $this->points; + } + + public function attach(PointInterface $point): void + { + $this->points->add($point); + } + + public function detach(PointInterface $point): void + { + $this->points->remove($point); + } +} diff --git a/src/ClusterCollection.php b/src/ClusterCollection.php new file mode 100644 index 0000000..b061047 --- /dev/null +++ b/src/ClusterCollection.php @@ -0,0 +1,73 @@ +setSpace($space); + + $this->storage = new \SplObjectStorage(); + } + + public function has(ClusterInterface $cluster): bool + { + return $this->storage->contains($cluster); + } + + public function add(ClusterInterface $cluster): void + { + if ($cluster->getCentroid()->getSpace() !== $this->getSpace()) { + throw new \InvalidArgumentException( + "Cannot add cluster to collection: cluster space is not same as collection space" + ); + } + + $this->storage->attach($cluster); + } + + public function remove(ClusterInterface $cluster): void + { + $this->storage->detach($cluster); + } + + public function current() + { + return $this->storage->current(); + } + + public function key() + { + return $this->storage->key(); + } + + public function next(): void + { + $this->storage->next(); + } + + public function rewind(): void + { + $this->storage->rewind(); + } + + public function valid(): bool + { + return $this->storage->valid(); + } + + public function count(): int + { + return count($this->storage); + } +} diff --git a/src/Concerns/HasSpaceTrait.php b/src/Concerns/HasSpaceTrait.php new file mode 100644 index 0000000..9ccf2f3 --- /dev/null +++ b/src/Concerns/HasSpaceTrait.php @@ -0,0 +1,23 @@ +space = $space; + } + + public function getSpace(): SpaceInterface + { + return $this->space; + } +} diff --git a/src/Interfaces/AlgorithmInterface.php b/src/Interfaces/AlgorithmInterface.php new file mode 100644 index 0000000..470d72c --- /dev/null +++ b/src/Interfaces/AlgorithmInterface.php @@ -0,0 +1,11 @@ +setSpace($space); + $this->setCoordinates($coordinates); + } + + public function getCoordinates(): array + { + return $this->coordinates; + } + + public function setCoordinates(array $coordinates): void + { + $this->coordinates = $this->sanitizeCoordinates($coordinates); + } + + public function getData() + { + return $this->data; + } + + public function setData($data): void + { + $this->data = $data; + } + + /** + * @codeCoverageIgnore + */ + private function sanitizeCoordinates(array $coordinates): array + { + if (count($coordinates) != $this->space->getDimensions()) { + throw new \LogicException(sprintf( + "Invalid set of coordinates: %d coordinates expected, %d coordinates given", + $this->space->getDimensions(), + count($coordinates) + )); + } + + $coordinates = filter_var_array($coordinates, FILTER_VALIDATE_FLOAT); + + $errors = array_keys($coordinates, false, true); + + if ($errors) { + throw new \LogicException(sprintf( + "Invalid set of coordinates: values at offsets [%s] could not be converted to numbers", + implode(',', $errors) + )); + } + + return $coordinates; + } +} diff --git a/src/PointCollection.php b/src/PointCollection.php new file mode 100644 index 0000000..4c01364 --- /dev/null +++ b/src/PointCollection.php @@ -0,0 +1,73 @@ +setSpace($space); + + $this->storage = new \SplObjectStorage(); + } + + public function has(PointInterface $point): bool + { + return $this->storage->contains($point); + } + + public function add(PointInterface $point): void + { + if ($point->getSpace() !== $this->getSpace()) { + throw new \InvalidArgumentException( + "Cannot add point to collection: point space is not same as collection space" + ); + } + + $this->storage->attach($point); + } + + public function remove(PointInterface $point): void + { + $this->storage->detach($point); + } + + public function current() + { + return $this->storage->current(); + } + + public function key() + { + return $this->storage->key(); + } + + public function next(): void + { + $this->storage->next(); + } + + public function rewind(): void + { + $this->storage->rewind(); + } + + public function valid(): bool + { + return $this->storage->valid(); + } + + public function count(): int + { + return count($this->storage); + } +} diff --git a/src/Space.php b/src/Space.php new file mode 100644 index 0000000..7ea33ca --- /dev/null +++ b/src/Space.php @@ -0,0 +1,24 @@ +dimensions = $dimensions; + } + + public function getDimensions(): int + { + return $this->dimensions; + } +} diff --git a/src/KMeans/Cluster.php b/src_old/KMeans/Cluster.php similarity index 100% rename from src/KMeans/Cluster.php rename to src_old/KMeans/Cluster.php diff --git a/src/KMeans/Point.php b/src_old/KMeans/Point.php similarity index 100% rename from src/KMeans/Point.php rename to src_old/KMeans/Point.php diff --git a/src/KMeans/Space.php b/src_old/KMeans/Space.php similarity index 100% rename from src/KMeans/Space.php rename to src_old/KMeans/Space.php diff --git a/tests/Unit/ClusterCollectionTest.php b/tests/Unit/ClusterCollectionTest.php new file mode 100644 index 0000000..1f88ff6 --- /dev/null +++ b/tests/Unit/ClusterCollectionTest.php @@ -0,0 +1,125 @@ +add($clusterA); + $collection->add($clusterC); + + $this->assertTrue($collection->has($clusterA)); + $this->assertFalse($collection->has($clusterB)); + $this->assertTrue($collection->has($clusterC)); + + $collection->remove($clusterC); + $this->assertFalse($collection->has($clusterC)); + } + + /** + * @covers ::__construct + * @covers ::add + */ + public function testAddingInvalidClusterToCollection() + { + $this->expectException(\InvalidArgumentException::class); + + $spaceA = new Space(2); + $spaceB = new Space(3); + + $collection = new ClusterCollection($spaceA); + $cluster = new Cluster(new Point($spaceB, [1, 2, 3])); + + $collection->add($cluster); + } + + /** + * @covers ::__construct + * @covers ::add + * @covers ::remove + * @covers ::count + */ + public function testCount() + { + $space = new Space(4); + $collection = new ClusterCollection($space); + + $clusterA = new Cluster(new Point($space, [1,2,3,4])); + $clusterB = new Cluster(new Point($space, [5,6,7,8])); + $clusterC = new Cluster(new Point($space, [9,0,1,2])); + + $collection->add($clusterA); + $collection->add($clusterB); + $collection->add($clusterC); + + $this->assertEquals(3, count($collection)); + + $collection->remove($clusterA); + $this->assertEquals(2, count($collection)); + + $collection->remove($clusterB); + $this->assertEquals(1, count($collection)); + + $collection->remove($clusterC); + $this->assertEquals(0, count($collection)); + } + + /** + * @covers ::__construct + * @covers ::add + * @covers ::current + * @covers ::key + * @covers ::next + * @covers ::rewind + * @covers ::valid + */ + public function testIterator() + { + $space = new Space(4); + $collection = new ClusterCollection($space); + + $clusterA = new Cluster(new Point($space, [1,2,3,4])); + $clusterB = new Cluster(new Point($space, [5,6,7,8])); + $clusterC = new Cluster(new Point($space, [9,0,1,2])); + + $collection->add($clusterA); + $collection->add($clusterB); + $collection->add($clusterC); + + $iterations = 0; + foreach ($collection as $i => $cluster) { + $this->assertInstanceof(ClusterInterface::class, $cluster); + $iterations++; + } + + $this->assertEquals(3, $iterations); + } +} diff --git a/tests/Unit/ClusterTest.php b/tests/Unit/ClusterTest.php new file mode 100644 index 0000000..473da19 --- /dev/null +++ b/tests/Unit/ClusterTest.php @@ -0,0 +1,109 @@ +assertSame( + $centroid, + $cluster->getCentroid() + ); + } + + /** + * @covers ::__construct + * @covers ::getPoints + */ + public function testGetPoints() + { + $space = new Space(2); + $centroid = new Point($space, [0,0]); + $collection = new PointCollection($space); + $cluster = new Cluster($centroid, $collection); + + foreach (range(1, 10) as $i) { + $collection->add( + new Point($space, [0,$i]) + ); + } + + $this->assertCount( + 10, + $cluster->getPoints() + ); + } + + /** + * @covers ::__construct + * @covers ::attach + * @covers ::getPoints + */ + public function testAttach() + { + $space = new Space(2); + $centroid = new Point($space, [0,0]); + $cluster = new Cluster($centroid); + + foreach (range(1, 10) as $i) { + $cluster->attach( + new Point($space, [0,$i]) + ); + } + + $this->assertCount( + 10, + $cluster->getPoints() + ); + } + + /** + * @covers ::__construct + * @covers ::detach + * @covers ::getPoints + */ + public function testDetach() + { + $space = new Space(2); + $centroid = new Point($space, [0,0]); + $collection = new PointCollection($space); + $cluster = new Cluster($centroid, $collection); + + $pointA = new Point($space, [1,1]); + $pointB = new Point($space, [2,2]); + $pointC = new Point($space, [3,3]); + + $collection->add($pointA); + $collection->add($pointB); + $collection->add($pointC); + + $cluster->detach($pointA); + $cluster->detach($pointC); + + $this->assertCount( + 1, + $cluster->getPoints() + ); + } +} diff --git a/tests/Unit/PointCollectionTest.php b/tests/Unit/PointCollectionTest.php new file mode 100644 index 0000000..614d44a --- /dev/null +++ b/tests/Unit/PointCollectionTest.php @@ -0,0 +1,123 @@ +add($pointA); + $collection->add($pointC); + + $this->assertTrue($collection->has($pointA)); + $this->assertFalse($collection->has($pointB)); + $this->assertTrue($collection->has($pointC)); + + $collection->remove($pointC); + $this->assertFalse($collection->has($pointC)); + } + + /** + * @covers ::__construct + * @covers ::add + */ + public function testAddPointFails() + { + $this->expectException(\InvalidArgumentException::class); + + $spaceA = new Space(2); + $spaceB = new Space(3); + + $collection = new PointCollection($spaceA); + $point = new Point($spaceB, [1, 2, 3]); + + $collection->add($point); + } + + /** + * @covers ::__construct + * @covers ::add + * @covers ::remove + * @covers ::count + */ + public function testCount() + { + $space = new Space(4); + $collection = new PointCollection($space); + + $pointA = new Point($space, [1,2,3,4]); + $pointB = new Point($space, [5,6,7,8]); + $pointC = new Point($space, [9,0,1,2]); + + $collection->add($pointA); + $collection->add($pointB); + $collection->add($pointC); + + $this->assertEquals(3, count($collection)); + + $collection->remove($pointA); + $this->assertEquals(2, count($collection)); + + $collection->remove($pointB); + $this->assertEquals(1, count($collection)); + + $collection->remove($pointC); + $this->assertEquals(0, count($collection)); + } + + /** + * @covers ::__construct + * @covers ::add + * @covers ::current + * @covers ::key + * @covers ::next + * @covers ::rewind + * @covers ::valid + */ + public function testIterator() + { + $space = new Space(4); + $collection = new PointCollection($space); + + $pointA = new Point($space, [1,2,3,4]); + $pointB = new Point($space, [5,6,7,8]); + $pointC = new Point($space, [9,0,1,2]); + + $collection->add($pointA); + $collection->add($pointB); + $collection->add($pointC); + + $iterations = 0; + foreach ($collection as $i => $point) { + $this->assertInstanceof(PointInterface::class, $point); + $iterations++; + } + + $this->assertEquals(3, $iterations); + } +} diff --git a/tests/Unit/PointTest.php b/tests/Unit/PointTest.php new file mode 100644 index 0000000..19c871d --- /dev/null +++ b/tests/Unit/PointTest.php @@ -0,0 +1,75 @@ +assertSame([0.0, 0.0], $point->getCoordinates()); + + $point->setCoordinates([1.2, 3.4]); + + $this->assertSame([1.2, 3.4], $point->getCoordinates()); + } + + /** + * @covers ::__construct + * @covers ::setCoordinates + */ + public function testInvalidCoordinates() + { + $this->expectException(\LogicException::class); + $this->expectExceptionMessage("Invalid set of coordinates: 3 coordinates expected, 2 coordinates given"); + + $space = new Space(3); // 3d space + $point = new Point($space, [0.0, 0.0]); // 2d space point + } + + /** + * @covers ::__construct + * @covers ::setCoordinates + */ + public function testInvalidCoordinatesValues() + { + $this->expectException(\LogicException::class); + $this->expectExceptionMessage("values at offsets [0,2] could not be converted to numbers"); + + $space = new Space(3); // 3d space + $point = new Point($space, [NAN, 1.0, "hello!"]); + } + + /** + * @covers ::__construct + * @covers ::setCoordinates + * @covers ::getData + * @covers ::setData + */ + public function testAssociateData() + { + $space = new Space(2); + $point = new Point($space, [0.0, 0.0]); + + $data = (object) ['foo' => "bar"]; + + $point->setData($data); + + $this->assertSame($data, $point->getData()); + } +} diff --git a/tests/Unit/SpaceTest.php b/tests/Unit/SpaceTest.php new file mode 100644 index 0000000..4380f6b --- /dev/null +++ b/tests/Unit/SpaceTest.php @@ -0,0 +1,41 @@ +assertEquals(1, $space->getDimensions()); + + $space = new Space(2); + + $this->assertEquals(2, $space->getDimensions()); + + $space = new Space(3); + + $this->assertEquals(3, $space->getDimensions()); + } + + /** + * @covers ::__construct + */ + public function testInvalidSpaceDimensions() + { + $this->expectException(\LogicException::class); + + $space = new Space(0); + } +} diff --git a/tests/Kmeans/ClusterTest.php b/tests_old/Kmeans/ClusterTest.php similarity index 100% rename from tests/Kmeans/ClusterTest.php rename to tests_old/Kmeans/ClusterTest.php diff --git a/tests/Kmeans/PointTest.php b/tests_old/Kmeans/PointTest.php similarity index 100% rename from tests/Kmeans/PointTest.php rename to tests_old/Kmeans/PointTest.php diff --git a/tests/Kmeans/SpaceTest.php b/tests_old/Kmeans/SpaceTest.php similarity index 100% rename from tests/Kmeans/SpaceTest.php rename to tests_old/Kmeans/SpaceTest.php From 43faa7f9519853a3a1bcfadacf351513b55e8f01 Mon Sep 17 00:00:00 2001 From: Benjamin Delespierre Date: Thu, 2 Sep 2021 13:07:09 +0200 Subject: [PATCH 02/20] adding makefile & qa tools --- composer.json | 4 +++- makefile | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 makefile diff --git a/composer.json b/composer.json index c08f6f6..9899830 100644 --- a/composer.json +++ b/composer.json @@ -19,7 +19,9 @@ "php": "^7.3|^8.0" }, "require-dev": { - "phpunit/phpunit": "^9.3" + "phpunit/phpunit": "^9.3", + "squizlabs/php_codesniffer": "^3.6", + "phpstan/phpstan": "^0.12.97" }, "autoload": { "psr-4": { diff --git a/makefile b/makefile new file mode 100644 index 0000000..e8f0ee2 --- /dev/null +++ b/makefile @@ -0,0 +1,35 @@ + +# ----------------------------------------------------------------------------- +# Code Quality +# ----------------------------------------------------------------------------- + +qa: phplint phpcs phpstan + +QA_PATHS = src/ +QA_STANDARD = psr12 + +phplint: + find $(QA_PATHS) -name "*.php" -print0 | xargs -0 -n1 -P8 php -l > /dev/null + +phpstan: + vendor/bin/phpstan analyse $(QA_PATHS) + +phpcs: + vendor/bin/phpcs --standard=$(QA_STANDARD) $(QA_PATHS) + +phpcbf: + vendor/bin/phpcbf --standard=$(QA_STANDARD) $(QA_PATHS) + +todolist: + git grep -C2 -p -E '[@]todo' + +# ----------------------------------------------------------------------------- +# Tests +# ----------------------------------------------------------------------------- + +test: + vendor/bin/phpunit + +.PHONY: coverage +coverage: + vendor/bin/phpunit --coverage-html coverage From d8b7fbfbe45b606bdb006de7a050c4e70369c717 Mon Sep 17 00:00:00 2001 From: Benjamin Delespierre Date: Thu, 2 Sep 2021 13:07:28 +0200 Subject: [PATCH 03/20] v3 algorithm implementation (#27) --- .gitignore | 1 - .scrutinizer.yml | 1 - .styleci.yml | 2 +- .travis.yml | 1 + README.md | 76 +---- composer.json | 10 +- demo.php | 24 +- demo_deterministic.php | 43 --- makefile | 8 +- phpstan.neon | 5 + phpunit.xml.dist | 5 + src/Algorithm.php | 113 ++++++- src/Cluster.php | 34 ++- src/ClusterCollection.php | 49 ++-- src/Concerns/HasSpaceTrait.php | 14 +- src/Interfaces/AlgorithmInterface.php | 7 +- src/Interfaces/ClusterCollectionInterface.php | 18 +- src/Interfaces/ClusterInterface.php | 11 +- .../ClusterizationResultInterface.php | 17 ++ .../InitializationSchemeInterface.php | 11 + src/Interfaces/PointCollectionInterface.php | 18 +- src/Interfaces/PointInterface.php | 19 +- src/Interfaces/SpaceBoundInterface.php | 10 + src/Interfaces/SpaceInterface.php | 7 +- src/Point.php | 36 ++- src/PointCollection.php | 57 ++-- src/RandomInitialization.php | 49 ++++ src/Space.php | 16 +- src/math.php | 63 ++++ src_old/KMeans/Cluster.php | 110 ------- src_old/KMeans/Point.php | 115 -------- src_old/KMeans/Space.php | 277 ------------------ tests/Data/boundaries_2d.csv | 100 +++++++ tests/Data/centroids_2d.csv | 100 +++++++ tests/Data/euclidean_distances_2d.csv | 100 +++++++ tests/Data/euclidean_distances_3d.csv | 100 +++++++ tests/Unit/AlgorithmTest.php | 256 ++++++++++++++++ tests/Unit/ClusterCollectionTest.php | 109 ++++--- tests/Unit/ClusterTest.php | 179 +++++++---- tests/Unit/Concerns/HasSpaceTraitTest.php | 54 ++++ tests/Unit/MathTest.php | 135 +++++++++ tests/Unit/PointCollectionTest.php | 183 +++++++----- tests/Unit/PointTest.php | 36 +-- tests/Unit/RandomInitializationTest.php | 99 +++++++ tests/Unit/SpaceTest.php | 26 +- tests_old/Kmeans/ClusterTest.php | 206 ------------- tests_old/Kmeans/PointTest.php | 157 ---------- tests_old/Kmeans/SpaceTest.php | 222 -------------- 48 files changed, 1758 insertions(+), 1531 deletions(-) delete mode 100644 demo_deterministic.php create mode 100644 phpstan.neon create mode 100644 src/Interfaces/ClusterizationResultInterface.php create mode 100644 src/Interfaces/InitializationSchemeInterface.php create mode 100644 src/Interfaces/SpaceBoundInterface.php create mode 100644 src/RandomInitialization.php create mode 100644 src/math.php delete mode 100644 src_old/KMeans/Cluster.php delete mode 100644 src_old/KMeans/Point.php delete mode 100644 src_old/KMeans/Space.php create mode 100644 tests/Data/boundaries_2d.csv create mode 100644 tests/Data/centroids_2d.csv create mode 100644 tests/Data/euclidean_distances_2d.csv create mode 100644 tests/Data/euclidean_distances_3d.csv create mode 100644 tests/Unit/AlgorithmTest.php create mode 100644 tests/Unit/Concerns/HasSpaceTraitTest.php create mode 100644 tests/Unit/MathTest.php create mode 100644 tests/Unit/RandomInitializationTest.php delete mode 100644 tests_old/Kmeans/ClusterTest.php delete mode 100644 tests_old/Kmeans/PointTest.php delete mode 100644 tests_old/Kmeans/SpaceTest.php diff --git a/.gitignore b/.gitignore index 69dd635..652b2af 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,5 @@ build composer.lock docs vendor -coverage .phpunit.result.cache .phpunit.cache diff --git a/.scrutinizer.yml b/.scrutinizer.yml index df16b68..1165e72 100644 --- a/.scrutinizer.yml +++ b/.scrutinizer.yml @@ -16,4 +16,3 @@ checks: fix_line_ending: true fix_identation_4spaces: true fix_doc_comments: true - diff --git a/.styleci.yml b/.styleci.yml index 247a09c..ac8d606 100644 --- a/.styleci.yml +++ b/.styleci.yml @@ -1 +1 @@ -preset: psr2 +preset: psr12 diff --git a/.travis.yml b/.travis.yml index 8370411..2780ef2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,6 +3,7 @@ language: php php: - 7.3 - 7.4 + - 8.0 env: matrix: diff --git a/README.md b/README.md index a42620b..a1de4ce 100644 --- a/README.md +++ b/README.md @@ -7,8 +7,6 @@ [K-mean](http://en.wikipedia.org/wiki/K-means_clustering) clustering algorithm implementation in PHP. -Please also see the [FAQ](#faq) - ## Installation You can install the package via composer: @@ -22,8 +20,7 @@ composer require bdelespierre/php-kmeans ```PHP require "vendor/autoload.php"; -// prepare 50 points of 2D space to be clustered -$points = [ +$data = [ [80,55],[86,59],[19,85],[41,47],[57,58], [76,22],[94,60],[13,93],[90,48],[52,54], [62,46],[88,44],[85,24],[63,14],[51,40], @@ -37,30 +34,35 @@ $points = [ ]; // create a 2-dimentions space -$space = new KMeans\Space(2); +$space = new Kmeans\Space(2); + +// prepare the points +$points = new Kmeans\PointCollection($space); -// add points to space -foreach ($points as $i => $coordinates) { - $space->addPoint($coordinates); +foreach ($data as $coordinates) { + $points->attach(new Kmeans\Point($space, $coordinates)); } +// prepare the algorithm +$algorithm = new Kmeans\Algorithm(new Kmeans\RandomInitialization()); + // cluster these 50 points in 3 clusters -$clusters = $space->solve(3); +$clusters = $algorithm->clusterize($points, 3); // display the cluster centers and attached points foreach ($clusters as $num => $cluster) { - $coordinates = $cluster->getCoordinates(); + $coordinates = $cluster->getCentroid()->getCoordinates(); printf( - "Cluster %s [%d,%d]: %d points\n", + "Cluster #%s [%d,%d] has %d points\n", $num, $coordinates[0], $coordinates[1], - count($cluster) + count($cluster->getPoints()) ); } ``` -**Note:** the example is given with points of a 2D space but it will work with any dimention >1. +**Note:** the example is given with points of a 2D space but it will work with any dimention greater than or equal to 1. ### Testing @@ -89,51 +91,3 @@ If you discover any security related issues, please email benjamin.delespierre@g ## License Lesser General Public License (LGPL). Please see [License File](LICENSE.md) for more information. - -## FAQ - -### How to get coordinates of a point/cluster: -```PHP -$x = $point[0]; -$y = $point[1]; - -// or - -list($x,$y) = $point->getCoordinates(); -``` - -### List all points of a space/cluster: - -```PHP -foreach ($cluster as $point) { - printf('[%d,%d]', $point[0], $point[1]); -} -``` - -### Attach data to a point: - -```PHP -$point = $space->addPoint([$x, $y, $z], "user #123"); -``` - -### Retrieve point data: - -```PHP -$data = $space[$point]; // e.g. "user #123" -``` - -### Watch the algorithm run - -Each iteration step can be monitored using a callback function passed to `Kmeans\Space::solve`: - -```PHP -$clusters = $space->solve(3, function($space, $clusters) { - static $iterations = 0; - - printf("Iteration: %d\n", ++$iterations); - - foreach ($clusters as $i => $cluster) { - printf("Cluster %d [%d,%d]: %d points\n", $i, $cluster[0], $cluster[1], count($cluster)); - } -}); -``` diff --git a/composer.json b/composer.json index 9899830..fca5c51 100644 --- a/composer.json +++ b/composer.json @@ -21,12 +21,16 @@ "require-dev": { "phpunit/phpunit": "^9.3", "squizlabs/php_codesniffer": "^3.6", - "phpstan/phpstan": "^0.12.97" + "phpstan/phpstan": "^0.12.97", + "mockery/mockery": "^1.4" }, "autoload": { "psr-4": { - "Bdelespierre\\Kmeans\\": "src/" - } + "Kmeans\\": "src/" + }, + "files": [ + "src/math.php" + ] }, "autoload-dev": { "psr-4": { diff --git a/demo.php b/demo.php index 408ad99..7458f59 100644 --- a/demo.php +++ b/demo.php @@ -2,8 +2,7 @@ require "vendor/autoload.php"; -// prepare 50 points of 2D space to be clustered -$points = [ +$data = [ [80,55],[86,59],[19,85],[41,47],[57,58], [76,22],[94,60],[13,93],[90,48],[52,54], [62,46],[88,44],[85,24],[63,14],[51,40], @@ -17,24 +16,29 @@ ]; // create a 2-dimentions space -$space = new KMeans\Space(2); +$space = new Kmeans\Space(2); -// add points to space -foreach ($points as $i => $coordinates) { - $space->addPoint($coordinates); +// prepare the points +$points = new Kmeans\PointCollection($space); + +foreach ($data as $coordinates) { + $points->attach(new Kmeans\Point($space, $coordinates)); } +// prepare the algorithm +$algorithm = new Kmeans\Algorithm(new Kmeans\RandomInitialization()); + // cluster these 50 points in 3 clusters -$clusters = $space->solve(3); +$clusters = $algorithm->clusterize($points, 3); // display the cluster centers and attached points foreach ($clusters as $num => $cluster) { - $coordinates = $cluster->getCoordinates(); + $coordinates = $cluster->getCentroid()->getCoordinates(); printf( - "Cluster %s [%d,%d]: %d points\n", + "Cluster #%s [%d,%d] has %d points\n", $num, $coordinates[0], $coordinates[1], - count($cluster) + count($cluster->getPoints()) ); } diff --git a/demo_deterministic.php b/demo_deterministic.php deleted file mode 100644 index 0247bd6..0000000 --- a/demo_deterministic.php +++ /dev/null @@ -1,43 +0,0 @@ - $coordinates) { - $space->addPoint($coordinates); -} - -// cluster these 50 points in 3 clusters -$clusters = $space->solve(3); - -// display the cluster centers and attached points -foreach ($clusters as $num => $cluster) { - $coordinates = $cluster->getCoordinates(); - printf( - "Cluster %s [%d,%d]: %d points\n", - $num, - $coordinates[0], - $coordinates[1], - count($cluster) - ); -} diff --git a/makefile b/makefile index e8f0ee2..fe0064b 100644 --- a/makefile +++ b/makefile @@ -5,7 +5,7 @@ qa: phplint phpcs phpstan -QA_PATHS = src/ +QA_PATHS = src/ tests/ QA_STANDARD = psr12 phplint: @@ -28,8 +28,4 @@ todolist: # ----------------------------------------------------------------------------- test: - vendor/bin/phpunit - -.PHONY: coverage -coverage: - vendor/bin/phpunit --coverage-html coverage + vendor/bin/phpunit --colors diff --git a/phpstan.neon b/phpstan.neon new file mode 100644 index 0000000..6c31ff1 --- /dev/null +++ b/phpstan.neon @@ -0,0 +1,5 @@ +parameters: + paths: + - src + # The level 8 is the highest level + level: 8 diff --git a/phpunit.xml.dist b/phpunit.xml.dist index 0cd59ec..c264e4b 100644 --- a/phpunit.xml.dist +++ b/phpunit.xml.dist @@ -22,5 +22,10 @@ src + + + + + diff --git a/src/Algorithm.php b/src/Algorithm.php index ab73116..ccec8f7 100644 --- a/src/Algorithm.php +++ b/src/Algorithm.php @@ -1,14 +1,117 @@ */ + private array $iterationCallbacks = []; + + + public function __construct(InitializationSchemeInterface $initScheme) + { + $this->initScheme = $initScheme; + } + + public function registerIterationCallback(callable $callback): void + { + $this->iterationCallbacks[] = $callback; + } + + public function clusterize(PointCollectionInterface $points, int $nbClusters): ClusterCollectionInterface + { + try { + // initialize clusters + $clusters = $this->initScheme->initializeClusters($points, $nbClusters); + } catch (\Exception $e) { + throw new \RuntimeException("Cannot initialize clusters", 0, $e); + } + + // iterate until convergence is reached + do { + $this->invokeIterationCallbacks($clusters); + } while ($this->iterate($clusters)); + + // clustering is done. + return $clusters; + } + + protected function iterate(ClusterCollectionInterface $clusters): bool + { + /** @var \SplObjectStorage */ + $changed = new \SplObjectStorage(); + + // calculate proximity amongst points and clusters + foreach ($clusters as $cluster) { + foreach ($cluster->getPoints() as $point) { + // find the closest cluster + $closest = $this->getClosestCluster($clusters, $point); + + if ($closest !== $cluster) { + // move the point from its current cluster to its closest + $cluster->detach($point); + $closest->attach($point); + + // flag both clusters as changed + $changed->attach($cluster); + $changed->attach($closest); + } + } + } + + // update changed clusters' centroid + foreach ($changed as $cluster) { + $cluster->setCentroid($this->findCentroid($cluster->getPoints())); + } + + // return true if something changed during this iteration + return count($changed) > 0; + } + + protected function getClosestCluster(ClusterCollectionInterface $clusters, PointInterface $point): ClusterInterface + { + $min = null; + $closest = null; + + foreach ($clusters as $cluster) { + $distance = $this->getDistanceBetween($point, $cluster->getCentroid()); + + if (is_null($min) || $distance < $min) { + $min = $distance; + $closest = $cluster; + } + } + + assert($closest !== null); + return $closest; + } + + protected function getDistanceBetween(PointInterface $pointA, PointInterface $pointB): float + { + return euclidean_dist($pointA->getCoordinates(), $pointB->getCoordinates()); + } + + protected function findCentroid(PointCollectionInterface $points): PointInterface + { + return new Point($points->getSpace(), find_centroid( + array_map(fn ($point) => $point->getCoordinates(), iterator_to_array($points)) + )); + } + + protected function invokeIterationCallbacks(ClusterCollectionInterface $clusters): void { - // + foreach ($this->iterationCallbacks as $callback) { + $callback($this, $clusters); + } } } diff --git a/src/Cluster.php b/src/Cluster.php index 683bc88..efa876d 100644 --- a/src/Cluster.php +++ b/src/Cluster.php @@ -1,10 +1,11 @@ centroid = $centroid; $this->points = $points ?? new PointCollection($centroid->getSpace()); + $this->setCentroid($centroid); + } + + public function getSpace(): SpaceInterface + { + return $this->points->getSpace(); + } + + public function belongsTo(SpaceInterface $space): bool + { + return $this->getSpace()->isEqualTo($space); } public function getCentroid(): PointInterface @@ -22,6 +33,15 @@ public function getCentroid(): PointInterface return $this->centroid; } + public function setCentroid(PointInterface $point): void + { + if (! $point->belongsTo($this->getSpace())) { + throw new \LogicException("Cannot set centroid: invalid point space"); + } + + $this->centroid = $point; + } + public function getPoints(): PointCollectionInterface { return $this->points; @@ -29,11 +49,11 @@ public function getPoints(): PointCollectionInterface public function attach(PointInterface $point): void { - $this->points->add($point); + $this->points->attach($point); } public function detach(PointInterface $point): void { - $this->points->remove($point); + $this->points->detach($point); } } diff --git a/src/ClusterCollection.php b/src/ClusterCollection.php index b061047..18b7218 100644 --- a/src/ClusterCollection.php +++ b/src/ClusterCollection.php @@ -1,31 +1,40 @@ + */ + protected \SplObjectStorage $clusters; - public function __construct(SpaceInterface $space) + /** + * @param array $clusters + */ + public function __construct(SpaceInterface $space, array $clusters = []) { $this->setSpace($space); + $this->clusters = new \SplObjectStorage(); - $this->storage = new \SplObjectStorage(); + foreach ($clusters as $cluster) { + $this->attach($cluster); + } } - public function has(ClusterInterface $cluster): bool + public function contains(ClusterInterface $cluster): bool { - return $this->storage->contains($cluster); + return $this->clusters->contains($cluster); } - public function add(ClusterInterface $cluster): void + public function attach(ClusterInterface $cluster): void { if ($cluster->getCentroid()->getSpace() !== $this->getSpace()) { throw new \InvalidArgumentException( @@ -33,41 +42,41 @@ public function add(ClusterInterface $cluster): void ); } - $this->storage->attach($cluster); + $this->clusters->attach($cluster); } - public function remove(ClusterInterface $cluster): void + public function detach(ClusterInterface $cluster): void { - $this->storage->detach($cluster); + $this->clusters->detach($cluster); } public function current() { - return $this->storage->current(); + return $this->clusters->current(); } public function key() { - return $this->storage->key(); + return $this->clusters->key(); } public function next(): void { - $this->storage->next(); + $this->clusters->next(); } public function rewind(): void { - $this->storage->rewind(); + $this->clusters->rewind(); } public function valid(): bool { - return $this->storage->valid(); + return $this->clusters->valid(); } public function count(): int { - return count($this->storage); + return count($this->clusters); } } diff --git a/src/Concerns/HasSpaceTrait.php b/src/Concerns/HasSpaceTrait.php index 9ccf2f3..b466613 100644 --- a/src/Concerns/HasSpaceTrait.php +++ b/src/Concerns/HasSpaceTrait.php @@ -1,17 +1,14 @@ space = $space; } @@ -20,4 +17,9 @@ public function getSpace(): SpaceInterface { return $this->space; } + + public function belongsTo(SpaceInterface $space): bool + { + return $this->getSpace()->isEqualTo($space); + } } diff --git a/src/Interfaces/AlgorithmInterface.php b/src/Interfaces/AlgorithmInterface.php index 470d72c..79210d9 100644 --- a/src/Interfaces/AlgorithmInterface.php +++ b/src/Interfaces/AlgorithmInterface.php @@ -1,11 +1,8 @@ + */ +interface ClusterCollectionInterface extends SpaceBoundInterface, \Iterator, \Countable { - public function getSpace(): SpaceInterface; - - public function has(ClusterInterface $cluster): bool; + public function contains(ClusterInterface $cluster): bool; - public function add(ClusterInterface $cluster): void; + public function attach(ClusterInterface $cluster): void; - public function remove(ClusterInterface $cluster): void; + public function detach(ClusterInterface $cluster): void; } diff --git a/src/Interfaces/ClusterInterface.php b/src/Interfaces/ClusterInterface.php index 7c4db8d..d51de69 100644 --- a/src/Interfaces/ClusterInterface.php +++ b/src/Interfaces/ClusterInterface.php @@ -1,16 +1,13 @@ + */ + public function iterationsCount(): int; + + public function getClusters(): ClusterCollectionInterface; + + public function resume(PointCollectionInterface $newPoints): self; +} diff --git a/src/Interfaces/InitializationSchemeInterface.php b/src/Interfaces/InitializationSchemeInterface.php new file mode 100644 index 0000000..d3cdfec --- /dev/null +++ b/src/Interfaces/InitializationSchemeInterface.php @@ -0,0 +1,11 @@ + + */ +interface PointCollectionInterface extends SpaceBoundInterface, \Iterator, \Countable { - public function getSpace(): SpaceInterface; - - public function has(PointInterface $point): bool; + public function contains(PointInterface $point): bool; - public function add(PointInterface $point): void; + public function attach(PointInterface $point): void; - public function remove(PointInterface $point): void; + public function detach(PointInterface $point): void; } diff --git a/src/Interfaces/PointInterface.php b/src/Interfaces/PointInterface.php index e3f209b..62adc77 100644 --- a/src/Interfaces/PointInterface.php +++ b/src/Interfaces/PointInterface.php @@ -1,18 +1,21 @@ + */ public function getCoordinates(): array; - public function setCoordinates(array $coordinates): void; - + /** + * @return mixed + */ public function getData(); + /** + * @param mixed $data + */ public function setData($data): void; } diff --git a/src/Interfaces/SpaceBoundInterface.php b/src/Interfaces/SpaceBoundInterface.php new file mode 100644 index 0000000..779c463 --- /dev/null +++ b/src/Interfaces/SpaceBoundInterface.php @@ -0,0 +1,10 @@ + + */ public function getDimensions(): int; + + public function isEqualTo(self $space): bool; } diff --git a/src/Point.php b/src/Point.php index 16aaed0..140f652 100644 --- a/src/Point.php +++ b/src/Point.php @@ -1,22 +1,32 @@ + */ private array $coordinates; + + /** + * @var mixed + */ private $data; + /** + * @param array $coordinates + */ public function __construct(SpaceInterface $space, array $coordinates) { $this->setSpace($space); - $this->setCoordinates($coordinates); + $this->coordinates = $this->sanitizeCoordinates($coordinates); } public function getCoordinates(): array @@ -24,11 +34,6 @@ public function getCoordinates(): array return $this->coordinates; } - public function setCoordinates(array $coordinates): void - { - $this->coordinates = $this->sanitizeCoordinates($coordinates); - } - public function getData() { return $this->data; @@ -40,24 +45,25 @@ public function setData($data): void } /** - * @codeCoverageIgnore + * @param array $coordinates + * @return array */ private function sanitizeCoordinates(array $coordinates): array { if (count($coordinates) != $this->space->getDimensions()) { - throw new \LogicException(sprintf( - "Invalid set of coordinates: %d coordinates expected, %d coordinates given", + throw new \InvalidArgumentException(sprintf( + "Invalid set of coordinates: %d coordinates expected, %d given", $this->space->getDimensions(), count($coordinates) )); } $coordinates = filter_var_array($coordinates, FILTER_VALIDATE_FLOAT); - + assert(is_array($coordinates)); $errors = array_keys($coordinates, false, true); if ($errors) { - throw new \LogicException(sprintf( + throw new \InvalidArgumentException(sprintf( "Invalid set of coordinates: values at offsets [%s] could not be converted to numbers", implode(',', $errors) )); diff --git a/src/PointCollection.php b/src/PointCollection.php index 4c01364..cd46183 100644 --- a/src/PointCollection.php +++ b/src/PointCollection.php @@ -1,73 +1,82 @@ + */ + protected \SplObjectStorage $points; - public function __construct(SpaceInterface $space) + /** + * @param array $points + */ + public function __construct(SpaceInterface $space, array $points = []) { $this->setSpace($space); + $this->points = new \SplObjectStorage(); - $this->storage = new \SplObjectStorage(); + foreach ($points as $point) { + $this->attach($point); + } } - public function has(PointInterface $point): bool + public function contains(PointInterface $point): bool { - return $this->storage->contains($point); + return $this->points->contains($point); } - public function add(PointInterface $point): void + public function attach(PointInterface $point): void { - if ($point->getSpace() !== $this->getSpace()) { + if (! $point->belongsTo($this->getSpace())) { throw new \InvalidArgumentException( - "Cannot add point to collection: point space is not same as collection space" + "Cannot add point to collection: point doesn't belong to the same space as collection" ); } - $this->storage->attach($point); + $this->points->attach($point); } - public function remove(PointInterface $point): void + public function detach(PointInterface $point): void { - $this->storage->detach($point); + $this->points->detach($point); } - public function current() + public function current(): PointInterface { - return $this->storage->current(); + return $this->points->current(); } - public function key() + public function key(): int { - return $this->storage->key(); + return $this->points->key(); } public function next(): void { - $this->storage->next(); + $this->points->next(); } public function rewind(): void { - $this->storage->rewind(); + $this->points->rewind(); } public function valid(): bool { - return $this->storage->valid(); + return $this->points->valid(); } public function count(): int { - return count($this->storage); + return count($this->points); } } diff --git a/src/RandomInitialization.php b/src/RandomInitialization.php new file mode 100644 index 0000000..da841f7 --- /dev/null +++ b/src/RandomInitialization.php @@ -0,0 +1,49 @@ +getSpace()); + + // initialize N clusters with a random point within space boundaries + for ($n = 0; $n < $nbClusters; $n++) { + // assign all points to the first cluster only + $clusters->attach(new Cluster($this->getRandomPoint($points), $n == 0 ? $points : null)); + } + + return $clusters; + } + + protected function getRandomPoint(PointCollectionInterface $points): PointInterface + { + if (count($points) == 0) { + throw new \LogicException("Unable to pick a random point out of an empty point collection"); + } + + $num = mt_rand(0, count($points) - 1); + foreach ($points as $i => $point) { + if ($i > $num) { + break; + } + } + + assert(isset($point)); + return $point; + } +} diff --git a/src/Space.php b/src/Space.php index 7ea33ca..e114821 100644 --- a/src/Space.php +++ b/src/Space.php @@ -1,17 +1,20 @@ + */ + protected int $dimensions; public function __construct(int $dimensions) { if ($dimensions < 1) { - throw new \LogicException("Dimensions cannot be null or negative"); + throw new \InvalidArgumentException("Invalid space dimentions: {$dimensions}"); } $this->dimensions = $dimensions; @@ -21,4 +24,9 @@ public function getDimensions(): int { return $this->dimensions; } + + public function isEqualTo(SpaceInterface $space): bool + { + return $this->getDimensions() == $space->getDimensions(); + } } diff --git a/src/math.php b/src/math.php new file mode 100644 index 0000000..2138794 --- /dev/null +++ b/src/math.php @@ -0,0 +1,63 @@ + $a + * @param array $b + */ +function euclidean_dist(array $a, array $b): float +{ + assert(count($a) == count($b)); + + for ($dist = 0, $n = 0; $n < count($a); $n++) { + $dist += pow($a[$n] - $b[$n], 2); + } + + return sqrt($dist); +} + +/** + * @param array> $points + * @return array + */ +function find_centroid(array $points): array +{ + $centroid = []; + + foreach ($points as $point) { + foreach ($point as $n => $value) { + $centroid[$n] = ($centroid[$n] ?? 0) + $value; + } + } + + foreach ($centroid as &$value) { + $value /= count($points); + } + + return $centroid; +} + +/** + * The standard Box–Muller transform generates values from the standard normal + * distribution (i.e. standard normal deviates). + * + * @see https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform + * + * @return array{float, float} + */ +function generate_gaussian_noise(float $mu, float $sigma): array +{ + static $twoPi = 2 * M_PI; + + // create two random numbers, make sure u1 is greater than epsilon + do { + $u1 = (float) mt_rand() / (float) mt_getrandmax(); + $u2 = (float) mt_rand() / (float) mt_getrandmax(); + } while ($u1 < PHP_FLOAT_EPSILON); + + // compute z0 and z1 + $mag = $sigma * sqrt(-2.0 * log($u1)); + $z0 = $mag * cos($twoPi * $u2) + $mu; + $z1 = $mag * sin($twoPi * $u2) + $mu; + + return [$z0, $z1]; +} diff --git a/src_old/KMeans/Cluster.php b/src_old/KMeans/Cluster.php deleted file mode 100644 index e49b06f..0000000 --- a/src_old/KMeans/Cluster.php +++ /dev/null @@ -1,110 +0,0 @@ -points = new \SplObjectStorage(); - } - - public function toArray(): array - { - $points = []; - foreach ($this->points as $point) { - $points[] = $point->toArray(); - } - - return [ - 'centroid' => parent::toArray(), - 'points' => $points, - ]; - } - - public function attach(Point $point): Point - { - if ($point instanceof self) { - throw new \LogicException("cannot attach a cluster to another"); - } - - $this->points->attach($point); - return $point; - } - - public function detach(Point $point): Point - { - $this->points->detach($point); - return $point; - } - - public function attachAll(\SplObjectStorage $points): void - { - $this->points->addAll($points); - } - - public function detachAll(\SplObjectStorage $points): void - { - $this->points->removeAll($points); - } - - public function updateCentroid(): void - { - if (!$count = count($this->points)) { - return; - } - - $centroid = $this->space->newPoint(array_fill(0, $this->dimention, 0)); - - foreach ($this->points as $point) { - for ($n = 0; $n < $this->dimention; $n++) { - $centroid->coordinates[$n] += $point->coordinates[$n]; - } - } - - for ($n = 0; $n < $this->dimention; $n++) { - $this->coordinates[$n] = $centroid->coordinates[$n] / $count; - } - } - - public function getIterator(): \Iterator - { - return $this->points; - } - - public function count(): int - { - return count($this->points); - } -} diff --git a/src_old/KMeans/Point.php b/src_old/KMeans/Point.php deleted file mode 100644 index 6f92e7e..0000000 --- a/src_old/KMeans/Point.php +++ /dev/null @@ -1,115 +0,0 @@ -space = $space; - $this->dimention = $space->getDimention(); - $this->coordinates = $coordinates; - } - - public function toArray(): array - { - return [ - 'coordinates' => $this->coordinates, - 'data' => isset($this->space[$this]) ? $this->space[$this] : null, - ]; - } - - public function getDistanceWith(self $point, bool $precise = true): float - { - if ($point->space !== $this->space) { - throw new \LogicException("can only calculate distances from points in the same space"); - } - - $distance = 0; - for ($n = 0; $n < $this->dimention; $n++) { - $difference = $this->coordinates[$n] - $point->coordinates[$n]; - $distance += $difference * $difference; - } - - return $precise ? sqrt($distance) : $distance; - } - - public function getClosest(iterable $points): ?Point - { - $minDistance = PHP_INT_MAX; - $minPoint = null; - foreach ($points as $point) { - $distance = $this->getDistanceWith($point, false); - - if ($distance < $minDistance) { - $minDistance = $distance; - $minPoint = $point; - } - } - - return $minPoint; - } - - public function belongsTo(Space $space): bool - { - return $this->space === $space; - } - - public function getSpace(): Space - { - return $this->space; - } - - public function getCoordinates(): array - { - return $this->coordinates; - } - - public function offsetExists($offset): bool - { - return isset($this->coordinates[$offset]); - } - - public function offsetGet($offset) - { - return $this->coordinates[$offset]; - } - - public function offsetSet($offset, $value): void - { - $this->coordinates[$offset] = $value; - } - - public function offsetUnset($offset): void - { - unset($this->coordinates[$offset]); - } -} diff --git a/src_old/KMeans/Space.php b/src_old/KMeans/Space.php deleted file mode 100644 index 103375e..0000000 --- a/src_old/KMeans/Space.php +++ /dev/null @@ -1,277 +0,0 @@ -dimention = $dimention; - } - - public static function setRng(callable $fn): void - { - static::$rng = $fn; - } - - public function toArray(): array - { - $points = []; - foreach ($this as $point) { - $points[] = $point->toArray(); - } - - return ['points' => $points]; - } - - public function newPoint(array $coordinates): Point - { - if (count($coordinates) != $this->dimention) { - throw new \LogicException("(" . implode(',', $coordinates) . ") is not a point of this space"); - } - - return new Point($this, $coordinates); - } - - public function addPoint(array $coordinates, $data = null): Point - { - $this->attach($point = $this->newPoint($coordinates), $data); - - return $point; - } - - public function attach($point, $data = null): void - { - if (!$point instanceof Point) { - throw new \InvalidArgumentException("can only attach points to spaces"); - } - - parent::attach($point, $data); - } - - public function getDimention(): int - { - return $this->dimention; - } - - public function getBoundaries(): array - { - if (!count($this)) { - return []; - } - - $min = $this->newPoint(array_fill(0, $this->dimention, null)); - $max = $this->newPoint(array_fill(0, $this->dimention, null)); - - foreach ($this as $point) { - for ($n = 0; $n < $this->dimention; $n++) { - if ($min[$n] === null || $min[$n] > $point[$n]) { - $min[$n] = $point[$n]; - } - - if ($max[$n] === null || $max[$n] < $point[$n]) { - $max[$n] = $point[$n]; - } - } - } - - return [$min, $max]; - } - - public function getRandomPoint(Point $min, Point $max): Point - { - $point = $this->newPoint(array_fill(0, $this->dimention, null)); - $rng = static::$rng; - - for ($n = 0; $n < $this->dimention; $n++) { - $point[$n] = $rng($min[$n], $max[$n]); - } - - return $point; - } - - public function solve(int $nbClusters, callable $iterationCallback = null, $initMethod = Cluster::INIT_RANDOM): array - { - // initialize K clusters - $clusters = $this->initializeClusters($nbClusters, $initMethod); - - // there's only one cluster, clusterization has no meaning - if (count($clusters) == 1) { - return $clusters; - } - - // until convergence is reached - do { - if ($iterationCallback) { - $iterationCallback($this, $clusters); - } - } while ($this->iterate($clusters)); - - // clustering is done. - return $clusters; - } - - protected function initializeClusters(int $nbClusters, int $initMethod): array - { - if ($nbClusters <= 0) { - throw new \InvalidArgumentException("invalid clusters number"); - } - - switch ($initMethod) { - case Cluster::INIT_RANDOM: - $clusters = $this->initializeRandomClusters($nbClusters); - - break; - - case Cluster::INIT_KMEANS_PLUS_PLUS: - $clusters = $this->initializeKmeansPlusPlusClusters($nbClusters); - - break; - - default: - return []; - } - - // assign all points to the first cluster - $clusters[0]->attachAll($this); - - return $clusters; - } - - protected function initializeKmeansPlusPlusClusters(int $nbClusters): array - { - $clusters = []; - $clusters[] = new Cluster($this, $this->current()->getCoordinates()); - - for ($i = 1; $i < $nbClusters; ++$i) { - $sum = 0; - $distances = []; - foreach ($this as $point) { - $distance = $point->getDistanceWith($point->getClosest($clusters), false); - $distances[] = $distance; - $sum += $distance; - } - - $probabilities = []; - foreach ($distances as $distance) { - $probabilities[] = $distance / $sum; - } - - $cumulativeProbabilities = array_reduce($probabilities, function ($c, $i) { - $c[] = end($c) + $i; - return $c; - }, []); - - $rng = static::$rng; - $rand = $rng() / mt_getrandmax(); - foreach ($cumulativeProbabilities as $j => $cumulativeProbability) { - if ($rand < $cumulativeProbability) { - foreach ($this as $key => $value) { - if ($j == $key) { - $clusters[] = new Cluster($this, $value->getCoordinates()); - break; - } - } - break; - } - } - } - - return $clusters; - } - - protected function initializeRandomClusters(int $nbClusters): array - { - $clusters = []; - - // get the space boundaries to avoid placing clusters centroid too far from points - list($min, $max) = $this->getBoundaries(); - - // initialize N clusters with a random point within space boundaries - for ($n = 0; $n < $nbClusters; $n++) { - $clusters[] = new Cluster($this, $this->getRandomPoint($min, $max)->getCoordinates()); - } - return $clusters; - } - - protected function iterate(array $clusters): bool - { - $continue = false; - - // migration storages - $attach = new \SplObjectStorage(); - $detach = new \SplObjectStorage(); - - // calculate proximity amongst points and clusters - foreach ($clusters as $cluster) { - foreach ($cluster as $point) { - // find the closest cluster - $closest = $point->getClosest($clusters); - - // move the point from its old cluster to its closest - if ($closest !== $cluster) { - if (! isset($attach[$closest])) { - $attach[$closest] = new \SplObjectStorage(); - } - - if (! isset($detach[$cluster])) { - $detach[$cluster] = new \SplObjectStorage(); - } - - $attach[$closest]->attach($point); - $detach[$cluster]->attach($point); - - $continue = true; - } - } - } - - // perform points migrations - foreach ($attach as $cluster) { - $cluster->attachAll($attach[$cluster]); - } - - foreach ($detach as $cluster) { - $cluster->detachAll($detach[$cluster]); - } - - // update all cluster's centroids - foreach ($clusters as $cluster) { - $cluster->updateCentroid(); - } - - return $continue; - } -} diff --git a/tests/Data/boundaries_2d.csv b/tests/Data/boundaries_2d.csv new file mode 100644 index 0000000..1b2a334 --- /dev/null +++ b/tests/Data/boundaries_2d.csv @@ -0,0 +1,100 @@ +-13,-337,-762,291,-470,-31,-262,505,-613,399,-762,-337,-13,505 +-951,-313,-537,-977,780,620,-182,244,142,554,-951,-977,780,620 +767,723,973,-648,142,-229,-156,682,-726,616,-726,-648,973,723 +-793,-469,-254,759,283,118,31,197,-594,784,-793,-469,283,784 +359,562,-72,-517,392,-710,556,-636,-204,-414,-204,-710,556,562 +-929,221,-411,584,737,-872,380,-101,315,46,-929,-872,737,584 +-461,220,308,-656,-5,79,-781,363,648,-158,-781,-656,648,363 +428,673,359,770,985,560,-49,-237,-491,-416,-491,-416,985,770 +541,593,193,-636,133,571,661,213,617,843,133,-636,661,843 +-690,660,657,-587,-939,-625,-532,-860,301,-315,-939,-860,657,660 +910,854,724,-954,-818,871,-146,972,-263,-47,-818,-954,910,972 +-831,435,336,-982,884,-460,537,-559,609,-872,-831,-982,884,435 +-987,253,482,-656,973,-839,-242,-920,-60,-180,-987,-920,973,253 +-517,-862,571,417,419,-271,725,735,449,395,-517,-862,725,735 +942,296,-256,-745,587,-54,-546,941,-258,827,-546,-745,942,941 +-310,-569,-290,595,649,437,-410,-78,-73,168,-410,-569,649,595 +916,864,-210,660,-866,-314,877,-44,-692,796,-866,-314,916,864 +-181,352,-448,731,339,-850,248,210,55,-542,-448,-850,339,731 +-994,523,-644,145,924,944,-246,604,989,-388,-994,-388,989,944 +-754,-386,152,920,510,578,-766,800,735,748,-766,-386,735,920 +-869,684,277,333,-869,-437,-364,55,-359,312,-869,-437,277,684 +-460,-509,392,818,-369,-133,-410,758,-249,-989,-460,-989,392,818 +15,-419,856,-280,-142,311,555,975,502,-254,-142,-419,856,975 +378,-417,-356,-532,-783,438,-90,908,245,-36,-783,-532,378,908 +461,258,-139,-100,427,-717,44,-556,103,-146,-139,-717,461,258 +591,-928,58,964,-483,-252,-665,-479,-935,502,-935,-928,591,964 +396,-590,-359,-198,365,30,686,-841,-724,461,-724,-841,686,461 +945,-797,-933,-295,-377,748,682,461,537,318,-933,-797,945,748 +-536,858,-889,-564,-871,-835,17,-395,-28,300,-889,-835,17,858 +131,365,873,859,493,-860,362,-358,230,988,131,-860,873,988 +928,375,249,874,297,-682,769,-495,755,-816,249,-816,928,874 +35,711,-382,-621,-971,837,-108,663,459,924,-971,-621,459,924 +-809,862,805,504,481,886,-136,700,-213,253,-809,253,805,886 +901,245,434,-839,774,-290,-613,-834,-847,-218,-847,-839,901,245 +-254,739,892,-319,-55,854,117,717,295,796,-254,-319,892,854 +-211,583,418,130,-414,845,116,704,-561,-609,-561,-609,418,845 +777,-133,-896,-592,-830,712,-276,297,119,697,-896,-592,777,712 +91,710,32,691,-714,-446,58,85,-487,512,-714,-446,91,710 +-406,231,705,-543,-210,-661,205,-621,925,-483,-406,-661,925,231 +943,339,-237,-997,993,726,533,-845,-903,740,-903,-997,993,740 +539,137,233,-681,-301,-607,-27,-352,534,-818,-301,-818,539,137 +648,-174,-163,293,441,-117,-516,901,-600,-884,-600,-884,648,901 +685,929,605,626,-649,190,-87,829,-120,355,-649,190,685,929 +541,181,368,-652,-624,-437,-951,-757,-316,-36,-951,-757,541,181 +-257,869,617,46,624,279,-99,-826,-980,-23,-980,-826,624,869 +-509,946,-413,-678,-140,320,711,812,-712,430,-712,-678,711,946 +-823,-565,935,13,540,-226,214,-973,-171,540,-823,-973,935,540 +-815,418,28,-14,-584,-956,-24,-856,281,423,-815,-956,281,423 +361,692,537,180,607,247,-419,185,269,552,-419,180,607,692 +698,889,666,322,26,879,913,-639,391,248,26,-639,913,889 +614,101,201,-280,-111,-7,-133,-281,-639,-733,-639,-733,614,101 +-518,949,880,-675,-265,-329,254,-777,805,51,-518,-777,880,949 +-181,-184,771,-231,205,-387,-231,628,-133,-344,-231,-387,771,628 +-324,-295,189,-415,-218,-304,-865,-925,-852,-503,-865,-925,189,-295 +337,-255,-235,-159,-62,-788,436,-87,709,-231,-235,-788,709,-87 +854,-143,143,-473,-308,-662,-71,574,52,-362,-308,-662,854,574 +-561,-667,-589,-233,318,-157,-63,3,-975,269,-975,-667,318,269 +-659,855,478,649,173,-672,-199,-584,15,-251,-659,-672,478,855 +-529,-592,789,-287,569,239,-422,695,977,534,-529,-592,977,695 +-979,-322,-505,-657,733,-648,-603,-324,485,70,-979,-657,733,70 +-477,-256,-256,-391,-477,843,-873,622,635,-108,-873,-391,635,843 +-941,-534,597,-462,-696,458,-332,103,-534,-683,-941,-683,597,458 +783,735,-160,277,455,-851,788,-960,-821,907,-821,-960,788,907 +-162,-584,-883,597,64,-997,799,-496,448,-357,-883,-997,799,597 +257,-282,127,-847,328,-418,513,-900,727,-381,127,-900,727,-282 +225,220,443,277,129,-75,-315,-192,-888,48,-888,-192,443,277 +-477,253,-494,931,-205,127,-31,849,409,-639,-494,-639,409,931 +-339,-739,461,905,-420,297,376,-227,660,-117,-420,-739,660,905 +310,-886,-238,-928,89,-217,76,-59,-620,-223,-620,-928,310,-59 +740,-629,-301,836,207,844,-501,-32,82,903,-501,-629,740,903 +-507,-602,-984,979,140,-725,344,-971,204,494,-984,-971,344,979 +895,205,-162,221,530,350,-313,692,-669,-360,-669,-360,895,692 +168,-879,839,137,-408,314,974,-913,-38,565,-408,-913,974,565 +696,-110,-93,-934,317,-496,-94,-483,-365,419,-365,-934,696,419 +710,-241,-787,150,-535,277,-630,-544,-549,-980,-787,-980,710,277 +513,455,550,565,-65,-77,288,-72,671,-388,-65,-388,671,565 +709,-604,-655,-420,787,905,282,-949,-676,-603,-676,-949,787,905 +93,-464,305,54,-107,534,-565,-441,736,-142,-565,-464,736,534 +-597,-242,665,-222,549,100,919,800,-51,-217,-597,-242,919,800 +-218,-956,63,-359,-500,-802,-769,162,-922,-573,-922,-956,63,162 +524,325,909,142,183,-172,18,12,-751,168,-751,-172,909,325 +951,927,424,-476,-457,-776,-769,883,554,-277,-769,-776,951,927 +-750,-675,-928,-927,-750,831,-52,-606,-903,731,-928,-927,-52,831 +-285,982,817,280,47,-139,-615,-40,990,-823,-615,-823,990,982 +-952,-990,-874,-93,178,-8,-140,-295,325,612,-952,-990,325,612 +-386,446,599,230,173,448,-264,-905,-351,-574,-386,-905,599,448 +-916,-153,-781,642,986,-449,283,468,-295,555,-916,-449,986,642 +-15,-692,-679,123,514,825,189,450,912,376,-679,-692,912,825 +287,-270,-440,-372,193,696,-333,618,-36,-593,-440,-593,287,696 +253,433,424,-183,-679,-268,220,-663,869,-284,-679,-663,869,433 +-797,-970,843,-348,642,-230,429,-938,376,857,-797,-970,843,857 +-917,649,-648,-264,-494,-595,-52,-976,976,-606,-917,-976,976,649 +249,-737,745,689,-327,367,-18,439,-969,-304,-969,-737,745,689 +-587,327,63,-981,7,-379,-312,906,844,954,-587,-981,844,954 +-423,793,471,318,840,-981,697,670,39,-867,-423,-981,840,793 +-418,202,145,-953,24,-447,-6,-250,453,-907,-418,-953,453,202 +863,401,-621,-319,-236,-632,694,-39,-181,638,-621,-632,863,638 +187,-745,-182,967,718,709,44,-128,574,650,-182,-745,718,967 +495,336,929,994,-792,165,-966,-55,-407,-518,-966,-518,929,994 +391,672,729,-450,-209,-370,624,785,-473,-52,-473,-450,729,785 diff --git a/tests/Data/centroids_2d.csv b/tests/Data/centroids_2d.csv new file mode 100644 index 0000000..a2822a4 --- /dev/null +++ b/tests/Data/centroids_2d.csv @@ -0,0 +1,100 @@ +590,-924,544,188,-828,55,967,-370,318.25,-262.75 +772,-744,218,893,920,-505,202,-8,528,-91 +-843,-236,-145,-548,788,-857,917,-136,179.25,-444.25 +-156,-236,925,15,469,999,227,379,366.25,289.25 +-187,500,-766,847,-435,82,930,-781,-114.5,162 +445,969,-880,-772,956,329,-596,-935,-18.75,-102.25 +155,-131,581,228,698,-404,-519,-222,228.75,-132.25 +707,-203,456,-746,-784,162,-826,-756,-111.75,-385.75 +693,-357,-528,357,877,281,-401,890,160.25,292.75 +987,471,575,69,-94,904,449,-868,479.25,144 +-9,-74,124,-889,-675,432,-990,414,-387.5,-29.25 +-555,419,248,130,521,636,394,-455,152,182.5 +214,-491,713,-179,-50,-335,-14,190,215.75,-203.75 +478,-188,-281,-585,-277,679,266,-659,46.5,-188.25 +773,-330,664,-630,618,-813,-135,-581,480,-588.5 +830,164,632,-22,-342,-340,-191,-925,232.25,-280.75 +820,964,833,-71,128,230,242,977,505.75,525 +-453,-20,271,-193,-152,-121,-666,-712,-250,-261.5 +474,982,-282,63,804,415,-567,546,107.25,501.5 +159,325,659,-337,-488,-928,-785,560,-113.75,-95 +-366,304,373,209,618,727,8,731,158.25,492.75 +25,-124,327,704,-450,-997,990,-292,223,-177.25 +949,-269,929,193,-974,739,-207,-660,174.25,0.75 +619,-170,-503,173,930,925,-259,580,196.75,377 +934,-194,975,-198,-732,901,227,114,351,155.75 +-602,717,-936,-80,540,-266,663,-608,-83.75,-59.25 +70,-518,-847,318,908,-618,-435,261,-76,-139.25 +-599,-947,-34,940,458,-78,998,71,205.75,-3.5 +-469,-866,-553,896,975,237,728,-18,170.25,62.25 +511,795,430,-893,-40,649,-147,70,188.5,155.25 +851,405,483,-555,-303,-771,-171,983,215,15.5 +-324,129,480,68,218,511,-487,-346,-28.25,90.5 +296,551,119,163,-605,-840,-913,-85,-275.75,-52.75 +681,270,554,-861,-995,-662,3,157,60.75,-274 +546,274,-351,937,-414,188,230,364,2.75,440.75 +421,412,441,-915,799,732,-781,-783,220,-138.5 +-794,-999,237,-828,576,745,-855,-431,-209,-378.25 +263,-860,-151,-365,501,112,271,-854,221,-491.75 +855,-614,973,-607,175,404,559,-490,640.5,-326.75 +-271,-500,22,-19,-955,-45,893,522,-77.75,-10.5 +-736,961,107,29,341,521,378,433,22.5,486 +208,603,-19,-332,-83,-832,291,751,99.25,47.5 +-125,-657,-305,-359,184,811,-583,583,-207.25,94.5 +-356,-920,862,313,-187,-963,-203,-260,29,-457.5 +-223,502,626,-763,532,869,434,-789,342.25,-45.25 +663,728,-732,-456,663,-649,683,-715,319.25,-273 +691,337,-817,-148,-460,-511,671,710,21.25,97 +516,655,861,-544,96,832,297,-193,442.5,187.5 +-688,387,-265,-361,868,-39,608,-29,130.75,-10.5 +-757,-489,394,716,-106,-570,-684,280,-288.25,-15.75 +597,-876,-905,-977,960,615,710,-284,340.5,-380.5 +13,-879,863,134,730,-199,-999,-599,151.75,-385.75 +938,-294,999,990,545,220,812,-660,823.5,64 +752,52,-222,-273,514,485,-270,-894,193.5,-157.5 +-340,183,-789,-871,-521,124,762,-899,-222,-365.75 +-999,-786,363,-450,-754,-31,764,-649,-156.5,-479 +-173,557,204,541,766,870,625,499,355.5,616.75 +300,-750,-771,899,446,559,784,626,189.75,333.5 +-913,103,583,459,-73,-237,-152,-241,-138.75,21 +-667,945,514,-341,505,22,-564,-721,-53,-23.75 +-632,-162,-440,-703,-162,-550,815,626,-104.75,-197.25 +-342,948,607,806,394,905,-655,264,1,730.75 +-396,332,-980,628,498,-423,-723,55,-400.25,148 +-195,748,252,952,343,655,949,-702,337.25,413.25 +-954,-466,905,-692,-502,545,333,-439,-54.5,-263 +497,716,695,-319,-884,254,612,-464,230,46.75 +150,35,255,-525,-238,-118,546,-144,178.25,-188 +-989,325,695,94,-906,59,-294,773,-373.5,312.75 +836,-980,-241,-623,-980,770,489,174,26,-164.75 +613,451,808,231,-819,-162,445,455,261.75,243.75 +-668,-397,358,-682,-484,820,57,-883,-184.25,-285.5 +589,-535,586,890,303,310,-324,572,288.5,309.25 +604,347,368,8,615,-647,-139,-304,362,-149 +-435,690,989,-880,767,669,601,972,480.5,362.75 +-116,902,718,6,-894,541,805,442,128.25,472.75 +644,-946,-375,399,762,-534,-542,108,122.25,-243.25 +-119,-709,106,998,460,44,726,231,293.25,141 +-495,808,875,-893,-52,-186,-542,-803,-53.5,-268.5 +820,913,167,782,757,-844,527,977,567.75,457 +940,142,44,5,308,902,168,799,365,462 +-560,-141,975,-712,539,-738,-97,-70,214.25,-415.25 +16,977,-701,728,-445,854,166,-998,-241,390.25 +475,486,773,-109,309,745,-146,-627,352.75,123.75 +-473,721,-430,-177,753,-425,-692,59,-210.5,44.5 +544,666,-687,-702,146,-656,355,-31,89.5,-180.75 +738,178,-359,501,-466,-884,682,-778,148.75,-245.75 +-316,905,-156,-290,-561,-698,398,-628,-158.75,-177.75 +-311,770,924,637,-442,-680,-586,314,-103.75,260.25 +509,358,238,-909,875,-254,-731,728,222.75,-19.25 +902,-48,-994,-857,103,862,864,-854,218.75,-224.25 +361,820,838,-206,97,129,-48,12,312,188.75 +294,193,752,-78,62,-800,-241,-7,216.75,-173 +90,222,-84,-336,-169,696,-583,-172,-186.5,102.5 +-71,-284,458,-608,-609,-996,-965,-657,-296.75,-636.25 +654,378,-297,-671,-638,-662,464,130,45.75,-206.25 +624,255,633,-239,-884,-966,326,-234,174.75,-296 +364,-699,-326,580,-123,349,-3,522,-22,188 +-689,523,-649,-682,444,214,-730,628,-406,170.75 +-8,-265,-832,-876,251,-885,213,-572,-94,-649.5 +-553,-887,794,101,160,533,-514,-294,-28.25,-136.75 diff --git a/tests/Data/euclidean_distances_2d.csv b/tests/Data/euclidean_distances_2d.csv new file mode 100644 index 0000000..4841de5 --- /dev/null +++ b/tests/Data/euclidean_distances_2d.csv @@ -0,0 +1,100 @@ +-549,-241,311,-311,862.844134244418 +-932,506,526,-635,1851.39001833757 +-231,-565,710,824,1677.73716654308 +-211,-839,659,-311,1017.68560960642 +409,864,-138,-654,1613.54671453912 +-985,202,430,902,1578.67824460844 +501,-967,-858,-180,1570.42987745394 +323,-691,6,219,963.633228982895 +-898,-952,361,426,1866.53823962972 +583,511,-119,-681,1383.35389542951 +242,-648,-361,-757,612.77238841188 +-127,-226,705,-286,834.160655989001 +-64,-958,554,937,1993.2257774773 +889,950,557,470,583.630019789935 +533,-514,-86,-588,623.407571336762 +516,750,-255,591,787.22423743175 +239,-250,630,186,585.642382346087 +48,-581,-255,578,1197.95241975631 +-619,-699,-284,335,1086.91352001896 +664,-983,18,591,1701.40882800108 +-109,323,-701,408,598.07106601139 +912,-906,-357,62,1596.052943984 +-766,-949,576,561,2020.16434974979 +887,71,-401,-911,1619.65057960043 +-131,694,338,701,469.052235897027 +-490,764,-355,449,342.709789763876 +119,315,-69,-34,396.41518638922 +715,-660,398,557,1257.60804704805 +334,819,211,-757,1580.7925227556 +739,-45,862,820,873.701321963061 +180,444,-718,-492,1297.11217710728 +30,465,268,961,550.145435316881 +-687,439,954,420,1641.10999022003 +-394,266,857,-941,1738.34691589452 +-813,-932,873,-398,1768.54516481768 +466,896,-13,-155,1155.00735928391 +404,421,-86,-303,874.228803002967 +-821,184,315,-960,1612.21338538048 +-482,719,339,512,846.693569126399 +968,807,-879,-522,2275.44501142084 +750,-662,-54,948,1799.58773056498 +-808,24,411,-973,1574.79204976403 +-594,260,838,75,1443.90061984889 +-725,456,389,640,1129.09344166017 +943,925,-899,1000,1843.52624065946 +-806,-894,-337,-599,554.063173293443 +754,990,172,-759,1843.29189224062 +541,383,799,-245,678.931513482767 +-998,24,-155,273,879.005119439017 +7,975,934,-540,1776.10641573077 +323,-401,6,569,1020.48468876314 +86,-456,-257,-674,406.414812722174 +-710,-926,-181,-188,908.011563803017 +-732,-733,570,144,1569.8194163661 +176,554,728,-188,924.807006893871 +48,-436,203,-657,269.937029693964 +-516,182,-692,528,388.190674797837 +-709,453,-155,12,708.093920324133 +-195,145,818,139,1013.01776884712 +629,613,198,187,606.000825081947 +606,-742,-504,464,1639.06558746134 +319,746,701,129,725.681059419357 +618,-724,-357,-403,1026.48234276095 +-273,-934,634,-313,1099.22245246356 +81,584,-757,774,859.269457155321 +-631,489,-732,-601,1094.66935647254 +236,-704,-171,-501,454.816446492429 +929,-380,468,158,708.494883538336 +-805,819,204,-767,1879.75450524796 +-377,563,601,-91,1176.52029306765 +-86,-355,510,783,1284.62445874271 +555,218,721,125,190.276115158997 +-45,-620,439,745,1448.26827625271 +-280,184,-549,701,582.794989683336 +-106,430,-574,-963,1469.51454569188 +-27,414,102,-850,1270.56562207546 +815,-359,48,339,1037.05978612614 +320,-13,-158,-928,1032.33182649766 +-177,-319,-873,-458,709.744320160436 +154,-179,532,395,687.28451168348 +-114,-632,-765,642,1430.69109174552 +-158,-825,925,934,2065.66454198159 +-426,-223,-960,735,1096.77709677035 +-983,287,-729,323,256.538496136545 +336,627,-296,236,743.172254595124 +806,-956,-657,-715,1482.71710046118 +-232,-980,-115,884,1867.66833244021 +-509,-703,494,317,1430.52752507598 +89,626,95,199,427.042152486145 +800,925,-389,-961,2229.51048438889 +-378,-211,-462,-210,84.0059521700695 +-954,-551,600,646,1961.56187768829 +172,656,690,430,565.154846037792 +915,-507,226,-542,689.888396771536 +338,-625,1,-732,353.578845521052 +111,783,-193,-218,1046.14387155878 +-719,365,522,180,1254.71351311764 +-214,121,-917,-599,1006.28475095273 +-853,478,693,-759,1979.97095938299 +756,-268,149,524,997.854197766387 diff --git a/tests/Data/euclidean_distances_3d.csv b/tests/Data/euclidean_distances_3d.csv new file mode 100644 index 0000000..4e7ed6c --- /dev/null +++ b/tests/Data/euclidean_distances_3d.csv @@ -0,0 +1,100 @@ +647,195,682,47,-986,-745,1947.07216096374 +14,-800,273,-854,343,-56,1472.45169700062 +253,-611,381,-768,-438,-710,1504.21108891006 +129,912,281,-840,-79,-972,1868.43544175334 +-327,-664,-490,-231,-38,63,840.774048124703 +-273,-495,-596,-186,-305,-503,228.731283387297 +-669,514,-926,890,-146,832,2440.62389564636 +846,-173,-394,-669,579,159,1779.47688942565 +-220,850,-183,-84,-555,-862,1566.38501014278 +-895,-529,285,-659,933,-718,1788.61650445253 +-174,-723,-36,-633,230,363,1130.52686832291 +-627,-510,882,168,700,-473,1982.96495178306 +277,-86,939,-124,-243,95,947.515699078385 +305,128,68,-370,972,213,1090.40634627647 +-106,193,739,-203,-842,-181,1388.17650174609 +669,-192,523,695,220,525,412.82441788247 +283,537,-431,-406,-235,-589,1046.7420885777 +-81,-170,130,423,808,-394,1218.63694347414 +617,962,-594,829,26,-774,976.442522629981 +841,519,426,-208,-506,548,1471.70309505688 +-710,-616,816,475,544,-190,1939.55175233867 +883,443,-160,-57,-54,-756,1218.94421529453 +-809,661,-252,-388,-423,581,1430.44957967766 +254,463,354,238,-339,132,832.312441334383 +-228,-45,-796,941,-552,-486,1311.37713873622 +99,-340,184,-438,888,521,1381.99927641081 +425,-340,-579,139,-555,475,1113.07546913945 +486,96,-164,450,511,380,685.169322138696 +-857,630,418,-821,901,539,298.961535987491 +633,-138,560,168,966,-824,1830.43628679067 +-137,432,-97,440,407,-918,1003.79031674947 +918,313,100,-188,-124,-674,1418.90133554099 +290,-400,112,-221,-439,-338,682.013196353267 +826,812,653,336,-149,-606,1657.92098726085 +9,425,913,-401,-426,-85,1374.15610466934 +883,-771,654,985,263,-633,1654.06438810586 +759,749,390,718,-270,527,1028.98542263727 +-273,-738,599,468,-986,-937,1723.3342682138 +571,-751,186,-292,-466,-85,948.385470154409 +575,655,-817,844,-981,914,2396.91843832868 +468,-40,812,-954,700,371,1662.57781772764 +-133,64,715,-542,518,-376,1250.47111122169 +-743,-243,482,312,-954,-620,1683.13695224126 +-709,-35,132,325,513,-705,1438.7595351552 +205,-912,709,53,-599,-173,948.154523271392 +-249,-203,725,197,-337,-760,1556.30877399056 +712,-920,-874,493,-651,-48,895.878339954706 +798,-419,-7,-488,-414,81,1289.01706738119 +522,600,942,985,257,-410,1469.66730929146 +-270,-75,748,243,-798,594,899.785530001456 +458,-370,-117,-378,682,-532,1406.35166299187 +300,626,-93,-975,858,-28,1297.56464193504 +-465,-703,925,-139,-426,593,541.506232651112 +890,12,-703,-643,877,-489,1773.1638390177 +-867,-226,-887,-195,59,318,1408.84136793324 +-404,-771,166,482,595,573,1678.27321971126 +706,32,-171,162,-663,-231,884.624779214329 +-674,-844,-319,450,330,148,1691.07687583977 +-972,825,-21,672,212,-634,1858.56772811754 +295,-437,-927,538,-341,454,1405.49848808172 +427,-365,800,-336,-662,415,904.766820788649 +-987,-443,438,-672,699,234,1202.08360774116 +-782,-748,-222,-125,237,394,1334.66475191338 +-932,200,-867,757,232,-195,1818.05637976384 +299,-375,612,-807,-520,-733,1747.36544546354 +347,-798,950,1000,-60,866,988.993933247318 +414,-810,-196,382,-967,544,757.147937988343 +543,884,106,729,795,331,305.191743007572 +-63,427,526,-993,-596,147,1433.55153377896 +-756,171,-788,-752,282,413,1206.12520079799 +-894,302,-122,171,675,334,1217.08257731347 +-316,388,708,320,-153,-519,1484.15160950625 +544,291,-85,-261,-42,-658,1042.70945138135 +-661,-947,-984,747,759,-826,2217.62575742617 +337,106,928,376,117,937,41.5090351610345 +555,865,353,-353,-715,349,1822.32818120118 +-102,-108,-672,-827,172,-224,897.0668871383 +-280,-195,-271,955,-955,511,1647.5281484697 +806,-448,-512,-255,431,772,1883.35286125569 +310,-473,-197,-68,517,878,1509.50621065301 +-890,953,-107,782,-647,-658,2378.90415948184 +597,757,305,188,178,-58,796.423882113037 +133,-196,988,594,884,-708,2062.84681932518 +96,-532,206,512,-59,-78,690.971055833745 +-383,-84,-356,889,97,-511,1294.12905075189 +196,512,-273,397,-464,-399,1004.41674617661 +-198,49,-924,769,-117,861,2036.87751227215 +-683,-752,217,178,94,-723,1529.91404987339 +905,259,-204,-589,796,-394,1598.90743947234 +-901,441,-50,232,-532,-921,1728.88952799188 +723,491,-569,988,-7,-727,585.826766203116 +588,-791,860,-418,-576,-581,1770.52026252173 +-41,-975,145,-839,258,-368,1555.71912632069 +-752,-488,-219,-640,-748,513,784.836288661527 +-837,-453,-271,539,-914,35,1483.08226339607 +-507,781,547,435,-97,502,1288.51581286378 +515,-8,-308,-378,-71,953,1546.46015144264 +-264,-339,287,-2,45,-70,586.130531537131 +169,-813,473,-843,-198,438,1184.73372535773 +259,-567,-443,244,-731,-524,183.526564834631 diff --git a/tests/Unit/AlgorithmTest.php b/tests/Unit/AlgorithmTest.php new file mode 100644 index 0000000..4c1f9cd --- /dev/null +++ b/tests/Unit/AlgorithmTest.php @@ -0,0 +1,256 @@ + $dimensions + * @param array> $expected + * @param array> $initialClusterCentroids + * @param int<0, max> $nbPointsPerCentroid + */ + public function testClusterize( + int $dimensions, + array $expected, + array $initialClusterCentroids, + int $nbPointsPerCentroid + ): void { + $space = new Space($dimensions); + $radius = 1; + + $points = $this->makePointsAround( + $space, + $expected, + $radius, + $nbPointsPerCentroid, + ); + + $clusters = $this->makeClusters( + $points, + $initialClusterCentroids + ); + + $algo = new Algorithm( + $this->mockInitScheme($clusters) + ); + + $resultClusters = iterator_to_array( + $algo->clusterize($points, count($expected)) + ); + + foreach ($expected as $n => $expectedCentroid) { + // assert found cluster centroids are in the vicinity + // of expected centroids + $this->assertLessThan(1, euclidean_dist( + $expectedCentroid, + $resultClusters[$n]->getCentroid()->getCoordinates() + )); + + // assert found cluster has $nbPoints points attached + $this->assertCount( + $nbPointsPerCentroid, + $resultClusters[$n]->getPoints() + ); + } + } + + /** + * @return array + */ + public function clusterizeDataProvider(): array + { + return [ + 'one dimension, 3 clusters, 5 points per cluster' => [ + 'dimension' => 1, + 'expected' => [ + [-50], + [0], + [50], + ], + 'initialClusterCentroids' => [ + [-10], + [0], + [10] + ], + 'nbPointsPerCentroid' => 5, + ], + + 'two dimensions, 3 clusters, 50 points per cluster' => [ + 'dimension' => 2, + 'expected' => [ + [20, 10], + [40, 20], + [60, 15], + ], + 'initialClusterCentroids' => [ + [12, 10], + [33, 20], + [60, 10], + ], + 'nbPointsPerCentroid' => 50, + ], + ]; + } + + /** + * @covers ::__construct + * @covers ::clusterize + */ + public function testClusterizeFailsWhenClusterInitializationFails(): void + { + /** @var InitializationSchemeInterface */ + $initScheme = Mockery::mock(InitializationSchemeInterface::class); + + /** @phpstan-ignore-next-line */ + $initScheme + ->shouldReceive('initializeClusters') + ->with(PointCollectionInterface::class, Mockery::type('integer')) + ->andThrow(new \Exception('n/a')); + + $this->expectException(\RuntimeException::class); + $this->expectExceptionMessage("Cannot initialize clusters"); + + (new Algorithm($initScheme))->clusterize(new PointCollection(new Space(1)), 1); + } + + /** + * @covers ::__construct + * @covers ::registerIterationCallback + * @covers ::clusterize + * @covers ::iterate + * @covers ::invokeIterationCallbacks + * @covers ::getClosestCluster + * @covers ::getDistanceBetween + * @covers euclidean_dist + */ + public function testIterationCallback(): void + { + $space = new Space(1); + + $points = new PointCollection($space, array_map( + fn ($coordinates) => new Point($space, $coordinates), + [[1],[2],[3],[4],[5]] + )); + + $clusters = new ClusterCollection($space, [ + new Cluster(new Point($space, [6]), $points) + ]); + + $callbackCalled = false; + + $algo = new Algorithm($this->mockInitScheme($clusters)); + $algo->registerIterationCallback( + function (AlgorithmInterface $algo, ClusterCollectionInterface $cluster) use (&$callbackCalled) { + $callbackCalled = true; + } + ); + + $algo->clusterize($points, 1); + + $this->assertTrue($callbackCalled); + } + + /** + * @param array> $centroids + * @param int<0, max> $nbPointsPerCentroid + */ + private function makePointsAround( + SpaceInterface $space, + array $centroids, + float $radius, + int $nbPointsPerCentroid + ): PointCollectionInterface { + $points = new PointCollection($space); + + foreach ($centroids as $centroid) { + for ($i = 0; $i < $nbPointsPerCentroid; $i++) { + $coordinates = $centroid; + + foreach ($coordinates as &$n) { + list($n) = generate_gaussian_noise($n, $radius); + } + + $points->attach(new Point($space, $coordinates)); + } + } + + return $points; + } + + /** + * @param array> $centroids + */ + private function makeClusters(PointCollectionInterface $points, array $centroids): ClusterCollectionInterface + { + $clusters = new ClusterCollection($points->getSpace()); + + foreach ($centroids as $n => $centroid) { + $clusters->attach(new Cluster( + new Point($points->getSpace(), $centroid), + $n == 0 ? $points : null + )); + } + + return $clusters; + } + + private function mockInitScheme(ClusterCollectionInterface $clusters): InitializationSchemeInterface + { + /** @var InitializationSchemeInterface */ + $initScheme = Mockery::mock(InitializationSchemeInterface::class); + + /** @phpstan-ignore-next-line */ + $initScheme + ->shouldReceive('initializeClusters') + ->with(PointCollectionInterface::class, Mockery::type('integer')) + ->andReturn($clusters); + + return $initScheme; + } +} diff --git a/tests/Unit/ClusterCollectionTest.php b/tests/Unit/ClusterCollectionTest.php index 1f88ff6..08440c9 100644 --- a/tests/Unit/ClusterCollectionTest.php +++ b/tests/Unit/ClusterCollectionTest.php @@ -2,29 +2,51 @@ namespace Tests\Unit; -use Bdelespierre\Kmeans\Cluster; -use Bdelespierre\Kmeans\ClusterCollection; -use Bdelespierre\Kmeans\Interfaces\ClusterInterface; -use Bdelespierre\Kmeans\Point; -use Bdelespierre\Kmeans\Space; +use Kmeans\Cluster; +use Kmeans\ClusterCollection; +use Kmeans\Interfaces\ClusterInterface; +use Kmeans\Point; +use Kmeans\Space; use PHPUnit\Framework\TestCase; /** - * @coversDefaultClass \Bdelespierre\Kmeans\ClusterCollection - * @uses Bdelespierre\Kmeans\Space - * @uses Bdelespierre\Kmeans\Cluster - * @uses Bdelespierre\Kmeans\Point - * @uses Bdelespierre\Kmeans\PointCollection + * @coversDefaultClass \Kmeans\ClusterCollection + * @uses \Kmeans\Space + * @uses \Kmeans\Cluster + * @uses \Kmeans\Point + * @uses \Kmeans\PointCollection */ class ClusterCollectionTest extends TestCase { /** * @covers ::__construct - * @covers ::add - * @covers ::has - * @covers ::remove + * @covers ::getSpace + * @covers ::attach + * @covers ::contains */ - public function testAddingAndRemovingClustersFromCollection() + public function testConstructingClusterWithPoints(): void + { + $space = new Space(1); + $point = new Point($space, [1]); + $cluster = new Cluster($point); + $collection = new ClusterCollection($space, [$cluster]); + + $this->assertTrue( + $collection->contains($cluster) + ); + + $this->assertFalse( + $collection->contains(new Cluster($point)) + ); + } + + /** + * @covers ::__construct + * @covers ::attach + * @covers ::contains + * @covers ::detach + */ + public function testAddingAndRemovingClustersFromCollection(): void { $space = new Space(4); $collection = new ClusterCollection($space); @@ -33,22 +55,33 @@ public function testAddingAndRemovingClustersFromCollection() $clusterB = new Cluster(new Point($space, [5,6,7,8])); $clusterC = new Cluster(new Point($space, [9,0,1,2])); - $collection->add($clusterA); - $collection->add($clusterC); + $collection->attach($clusterA); + $collection->attach($clusterC); + + $this->assertTrue( + $collection->contains($clusterA) + ); + + $this->assertFalse( + $collection->contains($clusterB) + ); + + $this->assertTrue( + $collection->contains($clusterC) + ); - $this->assertTrue($collection->has($clusterA)); - $this->assertFalse($collection->has($clusterB)); - $this->assertTrue($collection->has($clusterC)); + $collection->detach($clusterC); - $collection->remove($clusterC); - $this->assertFalse($collection->has($clusterC)); + $this->assertFalse( + $collection->contains($clusterC) + ); } /** * @covers ::__construct - * @covers ::add + * @covers ::attach */ - public function testAddingInvalidClusterToCollection() + public function testAddingInvalidClusterToCollection(): void { $this->expectException(\InvalidArgumentException::class); @@ -58,16 +91,16 @@ public function testAddingInvalidClusterToCollection() $collection = new ClusterCollection($spaceA); $cluster = new Cluster(new Point($spaceB, [1, 2, 3])); - $collection->add($cluster); + $collection->attach($cluster); } /** * @covers ::__construct - * @covers ::add - * @covers ::remove + * @covers ::attach + * @covers ::detach * @covers ::count */ - public function testCount() + public function testCount(): void { $space = new Space(4); $collection = new ClusterCollection($space); @@ -76,32 +109,32 @@ public function testCount() $clusterB = new Cluster(new Point($space, [5,6,7,8])); $clusterC = new Cluster(new Point($space, [9,0,1,2])); - $collection->add($clusterA); - $collection->add($clusterB); - $collection->add($clusterC); + $collection->attach($clusterA); + $collection->attach($clusterB); + $collection->attach($clusterC); $this->assertEquals(3, count($collection)); - $collection->remove($clusterA); + $collection->detach($clusterA); $this->assertEquals(2, count($collection)); - $collection->remove($clusterB); + $collection->detach($clusterB); $this->assertEquals(1, count($collection)); - $collection->remove($clusterC); + $collection->detach($clusterC); $this->assertEquals(0, count($collection)); } /** * @covers ::__construct - * @covers ::add + * @covers ::attach * @covers ::current * @covers ::key * @covers ::next * @covers ::rewind * @covers ::valid */ - public function testIterator() + public function testIterator(): void { $space = new Space(4); $collection = new ClusterCollection($space); @@ -110,9 +143,9 @@ public function testIterator() $clusterB = new Cluster(new Point($space, [5,6,7,8])); $clusterC = new Cluster(new Point($space, [9,0,1,2])); - $collection->add($clusterA); - $collection->add($clusterB); - $collection->add($clusterC); + $collection->attach($clusterA); + $collection->attach($clusterB); + $collection->attach($clusterC); $iterations = 0; foreach ($collection as $i => $cluster) { diff --git a/tests/Unit/ClusterTest.php b/tests/Unit/ClusterTest.php index 473da19..6b1a4fc 100644 --- a/tests/Unit/ClusterTest.php +++ b/tests/Unit/ClusterTest.php @@ -2,108 +2,161 @@ namespace Tests\Unit; -use Bdelespierre\Kmeans\Cluster; -use Bdelespierre\Kmeans\Point; -use Bdelespierre\Kmeans\PointCollection; -use Bdelespierre\Kmeans\Space; +use Kmeans\Cluster; +use Kmeans\Point; +use Kmeans\PointCollection; +use Kmeans\Space; use PHPUnit\Framework\TestCase; /** - * @coversDefaultClass \Bdelespierre\Kmeans\Cluster - * @uses Bdelespierre\Kmeans\Space - * @uses Bdelespierre\Kmeans\Point - * @uses Bdelespierre\Kmeans\PointCollection + * @coversDefaultClass \Kmeans\Cluster + * @uses \Kmeans\Space + * @uses \Kmeans\Point + * @uses \Kmeans\PointCollection */ class ClusterTest extends TestCase { + private Space $space; + /** @var array */ + private array $pointsArray; + private Point $centroid; + private PointCollection $points; + private Cluster $cluster; + + public function setUp(): void + { + $this->space = new Space(2); + + $this->pointsArray = array_map( + fn ($i) => new Point($this->space, [$i, $i]), + range(1, 10) + ); + + $this->points = new PointCollection( + $this->space, + $this->pointsArray + ); + + $this->centroid = new Point($this->space, [0, 0]); + + $this->cluster = new Cluster( + $this->centroid, + $this->points + ); + } + + public function tearDown(): void + { + unset( + $this->space, + $this->pointsArray, + $this->points, + $this->centroid, + $this->cluster, + ); + } + + /** + * @covers ::__construct + * @covers ::getSpace + * @covers ::setCentroid + * @covers ::belongsTo + */ + public function testBelongsTo(): void + { + $this->assertTrue( + $this->cluster->belongsTo($this->space) + ); + } + + /** + * @covers ::__construct + * @covers ::getSpace + * @covers ::setCentroid + * @covers ::getCentroid + */ + public function testGetCentroid(): void + { + $this->assertSame( + $this->centroid, + $this->cluster->getCentroid() + ); + } + /** * @covers ::__construct + * @covers ::getSpace + * @covers ::setCentroid * @covers ::getCentroid */ - public function testGetCentroid() + public function testSetCentroid(): void { - $space = new Space(2); - $centroid = new Point($space, [0,0]); - $cluster = new Cluster($centroid, new PointCollection($space)); + $this->cluster->setCentroid( + $centroid = new Point($this->space, [1, 1]) + ); $this->assertSame( $centroid, - $cluster->getCentroid() + $this->cluster->getCentroid() ); } /** * @covers ::__construct - * @covers ::getPoints + * @covers ::getSpace + * @covers ::setCentroid + * @covers ::getCentroid */ - public function testGetPoints() + public function testSetCentroidFailsWithInvalidCentroid(): void { - $space = new Space(2); - $centroid = new Point($space, [0,0]); - $collection = new PointCollection($space); - $cluster = new Cluster($centroid, $collection); - - foreach (range(1, 10) as $i) { - $collection->add( - new Point($space, [0,$i]) - ); - } - - $this->assertCount( - 10, - $cluster->getPoints() + $this->expectException(\LogicException::class); + $this->expectExceptionMessageMatches('/^Cannot set centroid/'); + + $this->cluster->setCentroid( + new Point(new Space(3), [2, 2, 2]) ); } /** * @covers ::__construct + * @covers ::getSpace + * @covers ::setCentroid + * @covers ::getPoints + */ + public function testGetPoints(): void + { + $this->assertCount(10, $this->cluster->getPoints()); + } + + /** + * @covers ::__construct + * @covers ::getSpace + * @covers ::setCentroid * @covers ::attach * @covers ::getPoints */ - public function testAttach() + public function testAttach(): void { - $space = new Space(2); - $centroid = new Point($space, [0,0]); - $cluster = new Cluster($centroid); - - foreach (range(1, 10) as $i) { - $cluster->attach( - new Point($space, [0,$i]) - ); - } - - $this->assertCount( - 10, - $cluster->getPoints() + $this->cluster->attach( + new Point($this->space, [11, 11]) ); + + $this->assertCount(11, $this->cluster->getPoints()); } /** * @covers ::__construct + * @covers ::getSpace + * @covers ::setCentroid * @covers ::detach * @covers ::getPoints */ - public function testDetach() + public function testDetach(): void { - $space = new Space(2); - $centroid = new Point($space, [0,0]); - $collection = new PointCollection($space); - $cluster = new Cluster($centroid, $collection); - - $pointA = new Point($space, [1,1]); - $pointB = new Point($space, [2,2]); - $pointC = new Point($space, [3,3]); - - $collection->add($pointA); - $collection->add($pointB); - $collection->add($pointC); - - $cluster->detach($pointA); - $cluster->detach($pointC); - - $this->assertCount( - 1, - $cluster->getPoints() + $this->cluster->detach( + $this->pointsArray[array_rand($this->pointsArray)] ); + + $this->assertCount(9, $this->cluster->getPoints()); } } diff --git a/tests/Unit/Concerns/HasSpaceTraitTest.php b/tests/Unit/Concerns/HasSpaceTraitTest.php new file mode 100644 index 0000000..2897a08 --- /dev/null +++ b/tests/Unit/Concerns/HasSpaceTraitTest.php @@ -0,0 +1,54 @@ +space = new Space(2); + + $this->point = new class ($this->space) implements SpaceBoundInterface { + use HasSpaceTrait; + + public function __construct(SpaceInterface $space) + { + $this->setSpace($space); + } + }; + } + + /** + * @covers ::setSpace + * @covers ::getSpace + */ + public function testGetSpace(): void + { + $this->assertSame($this->space, $this->point->getSpace()); + } + + /** + * @covers ::setSpace + * @covers ::getSpace + * @covers ::belongsTo + */ + public function testBelongsTo(): void + { + $this->assertTrue($this->point->belongsTo($this->space)); + $this->assertTrue($this->point->belongsTo(new Space(2))); + $this->assertFalse($this->point->belongsTo(new Space(3))); + } +} diff --git a/tests/Unit/MathTest.php b/tests/Unit/MathTest.php new file mode 100644 index 0000000..1e316f1 --- /dev/null +++ b/tests/Unit/MathTest.php @@ -0,0 +1,135 @@ + $a + * @param array $b + * @param float $dist + */ + public function testEuclideanDist(array $a, array $b, float $dist): void + { + $this->assertEquals(round($dist, 6), round(euclidean_dist($a, $b), 6)); + } + + /** + * @return \Generator> + */ + public function euclidianDistanceDataProvider(): \Generator + { + /** @var array $row */ + foreach ($this->openCsv('euclidean_distances_2d.csv') as $row) { + list($x1, $y1, $x2, $y2, $dist) = array_map('floatval', $row); + yield [[$x1, $y1], [$x2, $y2], $dist]; + } + + /** @var array $row */ + foreach ($this->openCsv('euclidean_distances_3d.csv') as $row) { + list($x1, $y1, $z1, $x2, $y2, $z2, $dist) = array_map('floatval', $row); + yield [[$x1, $y1, $z1], [$x2, $y2, $z2], $dist]; + } + } + + /** + * @covers find_centroid + * @dataProvider centroidDataProvider + * @param array $centroid + * @param array ...$points + */ + public function testFindCentroid(array $centroid, array ...$points): void + { + $this->assertEquals($centroid, find_centroid($points)); + } + + /** + * @return \Generator>> + */ + public function centroidDataProvider(): \Generator + { + /** @var array $row */ + foreach ($this->openCsv('centroids_2d.csv') as $row) { + list($x1, $y1, $x2, $y2, $x3, $y3, $x4, $y4, $cx, $cy) = array_map('floatval', $row); + yield [[$cx, $cy], [$x1, $y1], [$x2, $y2], [$x3, $y3], [$x4, $y4]]; + } + } + + /** + * @return \Generator>> + */ + public function boundariesDataProvider(): \Generator + { + /** @var array $row */ + foreach ($this->openCsv('boundaries_2d.csv') as $row) { + list($x1, $y1, $x2, $y2, $x3, $y3, $x4, $y4, $x5, $y5, $ax, $ay, $bx, $by) = array_map('floatval', $row); + yield [[$ax, $ay], [$bx, $by], [$x1, $y1], [$x2, $y2], [$x3, $y3], [$x4, $y4], [$x5, $y5]]; + } + } + + /** + * @return array + */ + public function frandDataProvider(): array + { + return [ + ['min' => 0, 'max' => 1], + ['min' => 10, 'max' => 20], + ['min' => 0, 'max' => 100], + ['min' => -100, 'max' => 100], + ['min' => -1e6, 'max' => 1e6], + ]; + } + + /** + * @covers generate_gaussian_noise + * @dataProvider gaussianNoiseDataProvider + */ + public function testGenerateGaussianNoise(float $mu, float $sigma = 1, float $nb = 1e3): void + { + // let's generate $nb numbers and sum them + for ($sum = 0, $i = 0; $i < $nb; $i++) { + $sum += array_sum(generate_gaussian_noise($mu, $sigma)); + } + + // cumpute the mean (which should be $mu) + $sum /= ($nb * 2); + + // verify the mean is around $mu (plus or minus $sigma) + $this->assertTrue( + $sum >= $mu - $sigma && $sum <= $mu + $sigma + ); + } + + /** + * @return array + */ + public function gaussianNoiseDataProvider(): array + { + return [ + ['mu' => 10], + ['mu' => 100], + ['mu' => 1000], + ['mu' => -10], + ['mu' => -100], + ['mu' => -1000], + ]; + } + + private static function openCsv(string $path): \SplFileObject + { + $csv = new \SplFileObject(__DIR__ . '/../Data/' . $path); + $csv->setFlags(\SplFileObject::READ_CSV | \SplFileObject::SKIP_EMPTY | \SplFileObject::READ_AHEAD); + + return $csv; + } +} diff --git a/tests/Unit/PointCollectionTest.php b/tests/Unit/PointCollectionTest.php index 614d44a..42f0873 100644 --- a/tests/Unit/PointCollectionTest.php +++ b/tests/Unit/PointCollectionTest.php @@ -2,122 +2,165 @@ namespace Tests\Unit; -use Bdelespierre\Kmeans\Interfaces\PointCollectionInterface; -use Bdelespierre\Kmeans\Interfaces\PointInterface; -use Bdelespierre\Kmeans\Point; -use Bdelespierre\Kmeans\PointCollection; -use Bdelespierre\Kmeans\Space; +use Kmeans\Interfaces\PointCollectionInterface; +use Kmeans\Interfaces\PointInterface; +use Kmeans\Point; +use Kmeans\PointCollection; +use Kmeans\Space; use PHPUnit\Framework\TestCase; /** - * @coversDefaultClass \Bdelespierre\Kmeans\PointCollection - * @uses Bdelespierre\Kmeans\Space - * @uses Bdelespierre\Kmeans\Point + * @coversDefaultClass \Kmeans\PointCollection + * @uses \Kmeans\Space + * @uses \Kmeans\Point */ class PointCollectionTest extends TestCase { + private Space $space; + /** @var array */ + private array $pointsArray; + private PointCollection $points; + + public function setUp(): void + { + $this->space = new Space(2); + + $this->pointsArray = array_map( + fn ($i) => new Point($this->space, [$i, $i]), + range(1, 10) + ); + + $this->points = new PointCollection( + $this->space, + $this->pointsArray + ); + } + + public function tearDown(): void + { + unset( + $this->space, + $this->pointsArray, + $this->points, + ); + } + /** * @covers ::__construct - * @covers ::add - * @covers ::has - * @covers ::remove + * @covers ::attach + * @covers ::count */ - public function testAddingPointsToCollection() + public function testAttach(): void { - $space = new Space(4); - $collection = new PointCollection($space); + $this->points->attach( + new Point($this->space, [11, 11]) + ); - $pointA = new Point($space, [1,2,3,4]); - $pointB = new Point($space, [5,6,7,8]); - $pointC = new Point($space, [9,0,1,2]); + $this->assertCount(11, $this->points); + } - $collection->add($pointA); - $collection->add($pointC); + /** + * @covers ::__construct + * @covers ::attach + * @covers ::count + */ + public function testAttachTwiceHasNoEffect(): void + { + $this->points->attach( + $point = new Point($this->space, [11, 11]) + ); - $this->assertTrue($collection->has($pointA)); - $this->assertFalse($collection->has($pointB)); - $this->assertTrue($collection->has($pointC)); + $this->points->attach($point); - $collection->remove($pointC); - $this->assertFalse($collection->has($pointC)); + $this->assertCount(11, $this->points); } /** * @covers ::__construct - * @covers ::add + * @covers ::attach + * @covers ::count */ - public function testAddPointFails() + public function testAttachInvalidPointFails(): void { $this->expectException(\InvalidArgumentException::class); + $this->expectExceptionMessageMatches('/^Cannot add point to collection/'); - $spaceA = new Space(2); - $spaceB = new Space(3); + $this->points->attach( + $point = new Point(new Space(3), [11, 11, 11]) + ); - $collection = new PointCollection($spaceA); - $point = new Point($spaceB, [1, 2, 3]); + $this->points->attach($point); - $collection->add($point); + $this->assertCount(11, $this->points); } /** * @covers ::__construct - * @covers ::add - * @covers ::remove - * @covers ::count + * @covers ::contains + * @covers ::attach */ - public function testCount() + public function testContains(): void { - $space = new Space(4); - $collection = new PointCollection($space); - - $pointA = new Point($space, [1,2,3,4]); - $pointB = new Point($space, [5,6,7,8]); - $pointC = new Point($space, [9,0,1,2]); + $this->assertTrue( + $this->points->contains( + $this->pointsArray[array_rand($this->pointsArray)] + ) + ); + + $this->assertFalse( + $this->points->contains( + new Point($this->space, [11, 11]) + ) + ); + } - $collection->add($pointA); - $collection->add($pointB); - $collection->add($pointC); + /** + * @covers ::__construct + * @covers ::attach + * @covers ::detach + * @covers ::count + */ + public function testDetach(): void + { + $this->points->detach( + $this->pointsArray[array_rand($this->pointsArray)] + ); - $this->assertEquals(3, count($collection)); + $this->assertCount(9, $this->points); + } - $collection->remove($pointA); - $this->assertEquals(2, count($collection)); + /** + * @covers ::__construct + * @covers ::attach + * @covers ::detach + * @covers ::count + */ + public function testDetachTwiceHasNoEffect(): void + { + $this->points->detach( + $point = $this->pointsArray[array_rand($this->pointsArray)] + ); - $collection->remove($pointB); - $this->assertEquals(1, count($collection)); + $this->points->detach($point); - $collection->remove($pointC); - $this->assertEquals(0, count($collection)); + $this->assertCount(9, $this->points); } /** * @covers ::__construct - * @covers ::add + * @covers ::attach * @covers ::current * @covers ::key * @covers ::next * @covers ::rewind * @covers ::valid */ - public function testIterator() + public function testIteration(): void { - $space = new Space(4); - $collection = new PointCollection($space); - - $pointA = new Point($space, [1,2,3,4]); - $pointB = new Point($space, [5,6,7,8]); - $pointC = new Point($space, [9,0,1,2]); - - $collection->add($pointA); - $collection->add($pointB); - $collection->add($pointC); - - $iterations = 0; - foreach ($collection as $i => $point) { - $this->assertInstanceof(PointInterface::class, $point); - $iterations++; + foreach ($this->points as $key => $point) { + $this->assertTrue( + array_search($point, $this->pointsArray, true) !== false + ); } - - $this->assertEquals(3, $iterations); } } diff --git a/tests/Unit/PointTest.php b/tests/Unit/PointTest.php index 19c871d..29c074b 100644 --- a/tests/Unit/PointTest.php +++ b/tests/Unit/PointTest.php @@ -2,41 +2,38 @@ namespace Tests\Unit; -use Bdelespierre\Kmeans\Point; -use Bdelespierre\Kmeans\Space; +use Kmeans\Point; +use Kmeans\Space; use PHPUnit\Framework\TestCase; /** - * @coversDefaultClass \Bdelespierre\Kmeans\Point - * @uses Bdelespierre\Kmeans\Space + * @coversDefaultClass \Kmeans\Point + * @uses \Kmeans\Space + * @uses \Kmeans\Concerns\HasSpaceTrait */ class PointTest extends TestCase { /** * @covers ::__construct - * @covers ::setCoordinates + * @covers ::sanitizeCoordinates * @covers ::getCoordinates */ - public function testCoordinates() + public function testCoordinates(): void { $space = new Space(2); - $point = new Point($space, [0.0, 0.0]); - - $this->assertSame([0.0, 0.0], $point->getCoordinates()); - - $point->setCoordinates([1.2, 3.4]); + $point = new Point($space, [1.2, 3.4]); $this->assertSame([1.2, 3.4], $point->getCoordinates()); } /** * @covers ::__construct - * @covers ::setCoordinates + * @covers ::sanitizeCoordinates */ - public function testInvalidCoordinates() + public function testInvalidCoordinates(): void { $this->expectException(\LogicException::class); - $this->expectExceptionMessage("Invalid set of coordinates: 3 coordinates expected, 2 coordinates given"); + $this->expectExceptionMessage("Invalid set of coordinates: 3 coordinates expected, 2 given"); $space = new Space(3); // 3d space $point = new Point($space, [0.0, 0.0]); // 2d space point @@ -44,30 +41,29 @@ public function testInvalidCoordinates() /** * @covers ::__construct - * @covers ::setCoordinates + * @covers ::sanitizeCoordinates */ - public function testInvalidCoordinatesValues() + public function testInvalidCoordinatesValues(): void { $this->expectException(\LogicException::class); $this->expectExceptionMessage("values at offsets [0,2] could not be converted to numbers"); $space = new Space(3); // 3d space - $point = new Point($space, [NAN, 1.0, "hello!"]); + $point = new Point($space, [NAN, 1.0, "hello!"]); /** @phpstan-ignore-line */ } /** * @covers ::__construct - * @covers ::setCoordinates + * @covers ::sanitizeCoordinates * @covers ::getData * @covers ::setData */ - public function testAssociateData() + public function testAssociateData(): void { $space = new Space(2); $point = new Point($space, [0.0, 0.0]); $data = (object) ['foo' => "bar"]; - $point->setData($data); $this->assertSame($data, $point->getData()); diff --git a/tests/Unit/RandomInitializationTest.php b/tests/Unit/RandomInitializationTest.php new file mode 100644 index 0000000..8aff71b --- /dev/null +++ b/tests/Unit/RandomInitializationTest.php @@ -0,0 +1,99 @@ +space = new Space(2); + + $this->points = new PointCollection($this->space, array_map( + fn ($coordinates) => new Point($this->space, $coordinates), + [[0,0], [1,1], [2,2], [3,3], [4,4], [5,5], [6,6], [7,7], [8,8], [9,9]], + )); + + $this->scheme = new RandomInitialization(); + } + + public function tearDown(): void + { + unset( + $this->space, + $this->points, + $this->scheme + ); + } + + /** + * @covers ::initializeClusters + * @covers ::getRandomPoint + */ + public function testInitializeClusters(): void + { + $clusters = $this->scheme->initializeClusters($this->points, 3); + + $this->assertCount(3, $clusters); + + $expectedNbPoints = [10, 0, 0]; + + foreach ($clusters as $i => $cluster) { + $this->assertCount( + array_shift($expectedNbPoints), + $cluster->getPoints() + ); + + $coordinates = $cluster->getCentroid()->getCoordinates(); + + $this->assertGreaterThanOrEqual(0, $coordinates[0]); + $this->assertGreaterThanOrEqual(0, $coordinates[1]); + + $this->assertLessThanOrEqual(9, $coordinates[0]); + $this->assertLessThanOrEqual(9, $coordinates[1]); + } + } + + /** + * @covers ::initializeClusters + */ + public function testInitializeClustersWithInvalidClusterCount(): void + { + $this->expectException(\InvalidArgumentException::class); + $this->expectExceptionMessageMatches('/^Invalid cluster count/'); + + $this->scheme->initializeClusters($this->points, 0); + } + + /** + * @covers ::initializeClusters + * @covers ::getRandomPoint + */ + public function testInitializeClustersWithoutPoints(): void + { + $this->expectException(\LogicException::class); + $this->expectExceptionMessageMatches('/^Unable to pick a random point out of an empty point collection/'); + + $this->scheme->initializeClusters(new PointCollection($this->space), 3); + } +} diff --git a/tests/Unit/SpaceTest.php b/tests/Unit/SpaceTest.php index 4380f6b..1efbf0e 100644 --- a/tests/Unit/SpaceTest.php +++ b/tests/Unit/SpaceTest.php @@ -2,11 +2,11 @@ namespace Tests\Unit; -use Bdelespierre\Kmeans\Space; +use Kmeans\Space; use PHPUnit\Framework\TestCase; /** - * @coversDefaultClass \Bdelespierre\Kmeans\Space + * @coversDefaultClass \Kmeans\Space */ class SpaceTest extends TestCase { @@ -14,7 +14,7 @@ class SpaceTest extends TestCase * @covers ::__construct * @covers ::getDimensions */ - public function testGetDimensions() + public function testGetDimensions(): void { $space = new Space(1); @@ -32,10 +32,26 @@ public function testGetDimensions() /** * @covers ::__construct */ - public function testInvalidSpaceDimensions() + public function testInvalidSpaceDimensions(): void { - $this->expectException(\LogicException::class); + $this->expectException(\InvalidArgumentException::class); $space = new Space(0); } + + /** + * @covers ::__construct + * @covers ::isEqualTo + * @covers ::getDimensions + */ + public function testIsEqualTo(): void + { + $this->assertTrue( + (new Space(1))->isEqualTo(new Space(1)) + ); + + $this->assertFalse( + (new Space(1))->isEqualTo(new Space(2)) + ); + } } diff --git a/tests_old/Kmeans/ClusterTest.php b/tests_old/Kmeans/ClusterTest.php deleted file mode 100644 index 963f56b..0000000 --- a/tests_old/Kmeans/ClusterTest.php +++ /dev/null @@ -1,206 +0,0 @@ -assertInstanceOf( - Cluster::class, - new Cluster(new Space(2), [0,0]) - ); - } - - public function testToArray() - { - $space = new Space(2); - $cluster = new Cluster($space, [0,0]); - $points = [ - new Point($space, [0,0]), - new Point($space, [1,1]), - new Point($space, [2,2]), - ]; - - foreach ($points as $point) { - $cluster->attach($point); - } - - $this->assertEquals( - [ - 'centroid' => $points[0]->toArray(), - 'points' => array_map( - function ($p) { - return $p->toArray(); - }, - $points - ), - ], - $cluster->toArray() - ); - } - - public function testAttach() - { - $space = new Space(2); - $cluster = new Cluster($space, [0,0]); - $points = [ - new Point($space, [0,0]), - new Point($space, [1,1]), - new Point($space, [2,2]), - ]; - - foreach ($points as $point) { - $cluster->attach($point); - } - - $this->assertCount(3, $cluster); - } - - public function testAttachException() - { - $this->expectException(\LogicException::class); - - $space = new Space(2); - $cluster = new Cluster($space, [0,0]); - - $cluster->attach($cluster); - } - - public function testDetach() - { - $space = new Space(2); - $cluster = new Cluster($space, [0,0]); - $points = [ - new Point($space, [0,0]), - new Point($space, [1,1]), - new Point($space, [2,2]), - ]; - - foreach ($points as $point) { - $cluster->attach($point); - } - - $cluster->detach($points[0]); - $this->assertCount(2, $cluster); - - $cluster->detach($points[1]); - $this->assertCount(1, $cluster); - - $cluster->detach($points[2]); - $this->assertCount(0, $cluster); - } - - public function testAttachAll() - { - $space = new Space(2); - $cluster = new Cluster($space, [0,0]); - $points = [ - new Point($space, [0,0]), - new Point($space, [1,1]), - new Point($space, [2,2]), - ]; - - $storage = new \SplObjectStorage(); - foreach ($points as $point) { - $storage->attach($point); - } - - $cluster->attachAll($storage); - $this->assertCount(3, $cluster); - } - - public function testDetachAll() - { - $space = new Space(2); - $cluster = new Cluster($space, [0,0]); - $points = [ - new Point($space, [0,0]), - new Point($space, [1,1]), - new Point($space, [2,2]), - ]; - - foreach ($points as $point) { - $cluster->attach($point); - } - - $storage = new \SplObjectStorage(); - foreach ($points as $point) { - $storage->attach($point); - } - - $cluster->detachAll($storage); - $this->assertCount(0, $cluster); - } - - public function testUpdateCentroid() - { - $space = new Space(1); - $cluster = new Cluster($space, [0]); - - $cluster->updateCentroid(); - $this->assertEquals([0], $cluster->getCoordinates()); - - $cluster->attach(new Point($space, [5])); - $cluster->attach(new Point($space, [6])); - $cluster->attach(new Point($space, [7])); - - $cluster->updateCentroid(); - - $this->assertEquals([6], $cluster->getCoordinates()); - } - - public function testGetIterator() - { - $space = new Space(2); - $cluster = new Cluster($space, [0,0]); - $points = [ - new Point($space, [0,0]), - new Point($space, [1,1]), - new Point($space, [2,2]), - ]; - - foreach ($points as $point) { - $cluster->attach($point); - } - - $this->assertInstanceOf( - \SplObjectStorage::class, - $cluster->getIterator() - ); - } - - public function testCount() - { - $space = new Space(2); - $cluster = new Cluster($space, [0,0]); - $points = [ - new Point($space, [0,0]), - new Point($space, [1,1]), - new Point($space, [2,2]), - ]; - - $cluster->attach($points[0]); - $this->assertEquals(1, $cluster->count()); - - $cluster->attach($points[1]); - $this->assertEquals(2, $cluster->count()); - - $cluster->attach($points[2]); - $this->assertEquals(3, $cluster->count()); - - $cluster->detach($points[2]); - $this->assertEquals(2, $cluster->count()); - - $cluster->detach($points[1]); - $this->assertEquals(1, $cluster->count()); - - $cluster->detach($points[0]); - $this->assertEquals(0, $cluster->count()); - } -} diff --git a/tests_old/Kmeans/PointTest.php b/tests_old/Kmeans/PointTest.php deleted file mode 100644 index b06a28b..0000000 --- a/tests_old/Kmeans/PointTest.php +++ /dev/null @@ -1,157 +0,0 @@ -assertInstanceOf(Point::class, $point); - } - - public function testToArray() - { - $space = new Space(2); - $point = new Point($space, [0,0]); - - $this->assertEquals(['coordinates' => [0,0], 'data' => null], $point->toArray()); - - $space[$point] = "foobar"; - - $this->assertEquals(['coordinates' => [0,0], 'data' => "foobar"], $point->toArray()); - } - - public function testGetDistanceWith() - { - $space = new Space(2); - $point1 = new Point($space, [1,1]); - $point2 = new Point($space, [2,1]); - - $this->assertEquals(1, $point1->getDistanceWith($point2)); - } - - public function testGetDistanceWithException() - { - $this->expectException(\LogicException::class); - - $space = new Space(2); - $point1 = new Point($space, [1,1]); - - $space = new Space(3); - $point2 = new Point($space, [2,1,0]); - - $point1->getDistanceWith($point2); - } - - public function testGetDistanceWithPreciseFalse() - { - $space = new Space(2); - $point1 = new Point($space, [4,3]); - $point2 = new Point($space, [2,1]); - - $this->assertEquals(8, $point1->getDistanceWith($point2, false)); - } - - public function testGetClosest() - { - $space = new Space(2); - $points = [ - new Point($space, [-2,-2]), - new Point($space, [-1,-1]), - new Point($space, [ 0, 0]), - new Point($space, [ 1, 1]), - new Point($space, [ 2, 2]), - ]; - - $this->assertEquals($points[0], (new Point($space, [-2.1, -2.1]))->getClosest($points)); - $this->assertEquals($points[1], (new Point($space, [-1.1, -1.1]))->getClosest($points)); - $this->assertEquals($points[2], (new Point($space, [ 0.1, 0.1]))->getClosest($points)); - $this->assertEquals($points[3], (new Point($space, [ 1.1, 1.1]))->getClosest($points)); - $this->assertEquals($points[4], (new Point($space, [ 2.1, 2.1]))->getClosest($points)); - } - - public function testBelongsTo() - { - $space = new Space(2); - $point = new Point($space, [0,0]); - - $this->assertTrue($point->belongsTo($space)); - $this->assertFalse($point->belongsTo(new Space(2))); - } - - public function testGetSpace() - { - $space = new Space(2); - $point = new Point($space, [0,0]); - - $this->assertTrue($point->getSpace() === $space); - } - - public function testGetCoordinates() - { - $space = new Space(2); - $point = new Point($space, [0,0]); - - $this->assertEquals([0,0], $point->getCoordinates()); - } - - public function testOffsetExists() - { - $space = new Space(2); - $point = new Point($space, [0,0]); - - $this->assertTrue($point->offsetExists(0)); - $this->assertTrue($point->offsetExists(1)); - $this->assertFalse($point->offsetExists(2)); - } - - public function testOffsetGet() - { - $space = new Space(2); - $point = new Point($space, [1,2]); - - $this->assertEquals(1, $point->offsetGet(0)); - $this->assertEquals(2, $point->offsetGet(1)); - } - - public function testOffsetGetError() - { - $this->expectError(); - - $space = new Space(1); - $point = new Point($space, [1]); - - $point->offsetGet(1); - } - - public function testOffsetSet() - { - $space = new Space(2); - $point = new Point($space, [1,2]); - - $point->offsetSet(0, 3); - $point->offsetSet(1, 4); - - $this->assertEquals(3, $point->offsetGet(0)); - $this->assertEquals(4, $point->offsetGet(1)); - } - - public function testOffsetUnset() - { - $space = new Space(2); - $point = new Point($space, [1,2]); - - $point->offsetUnset(0); - $point->offsetUnset(1); - - $this->assertFalse($point->offsetExists(0)); - $this->assertFalse($point->offsetExists(1)); - } -} diff --git a/tests_old/Kmeans/SpaceTest.php b/tests_old/Kmeans/SpaceTest.php deleted file mode 100644 index 63abfdd..0000000 --- a/tests_old/Kmeans/SpaceTest.php +++ /dev/null @@ -1,222 +0,0 @@ -assertInstanceOf(Space::class, new Space(1)); - $this->assertInstanceOf(Space::class, new Space(2)); - $this->assertInstanceOf(Space::class, new Space(3)); - $this->assertInstanceOf(Space::class, new Space(50)); - } - - public function testConstructException() - { - $this->expectException(\LogicException::class); - - new Space(-1); - } - - public function testToArray() - { - $space = new Space(2); - $points = [ - new Point($space, [-2,-2]), - new Point($space, [-1,-1]), - new Point($space, [ 0, 0]), - new Point($space, [ 1, 1]), - new Point($space, [ 2, 2]), - ]; - - foreach ($points as $point) { - $space->attach($point); - } - - $this->assertEquals( - ['points' => array_map(function ($p) { - return $p->toArray(); - }, $points)], - $space->toArray() - ); - } - - public function testNewPoint() - { - $space = new Space(2); - - $this->assertInstanceOf(Point::class, $space->newPoint([0,0])); - } - - public function testNewPointException() - { - $this->expectException(\LogicException::class); - - $space = new Space(2); - $space->newPoint([1,2,3]); - } - - public function testAddPoint() - { - $space = new Space(2); - - $space->addPoint([0,0]); - $space->addPoint([1,1]); - $space->addPoint([2,2]); - - $this->assertCount(3, $space); - } - - public function testAttach() - { - $space = new Space(2); - - $space->attach(new Point($space, [0,0])); - $space->attach(new Point($space, [1,1])); - $space->attach(new Point($space, [2,2])); - - $this->assertCount(3, $space); - } - - public function testAttachException() - { - $this->expectException(\InvalidArgumentException::class); - - $space = new Space(2); - $space->attach("INVALID"); - } - - public function testGetDimention() - { - $this->assertEquals(1, (new Space(1))->getDimention()); - $this->assertEquals(2, (new Space(2))->getDimention()); - $this->assertEquals(3, (new Space(3))->getDimention()); - } - - public function testGetBoundaries() - { - $space = new Space(2); - - $this->assertEmpty($space->getBoundaries()); - - $space->attach($p1 = new Point($space, [ 0, 0])); - $space->attach($p2 = new Point($space, [ 0,10])); - $space->attach($p3 = new Point($space, [10, 0])); - $space->attach($p4 = new Point($space, [10,10])); - - $this->assertEquals([$p1, $p4], $space->getBoundaries()); - } - - public function testGetRandomPoint() - { - $space = new Space(1); - - $min = new Point($space, [0]); - $max = new Point($space, [10]); - - Space::setRng(function ($min, $max) { - return $min; - }); - $this->assertEquals($min, $space->getRandomPoint($min, $max)); - - Space::setRng(function ($min, $max) { - return $max; - }); - $this->assertEquals($max, $space->getRandomPoint($min, $max)); - } - - public function testSolve() - { - Space::setRng(function ($min, $max) { - static $values = [10, 0]; - return array_pop($values) ?? mt_rand($min, $max); - }); - - $space = new Space(1); - - $space->attach($space->newPoint([1])); - $space->attach($space->newPoint([2])); - $space->attach($space->newPoint([3])); - - $space->attach($space->newPoint([7])); - $space->attach($space->newPoint([8])); - $space->attach($space->newPoint([9])); - - $iterations = 0; - $history = []; - $callback = function ($space, $clusters) use (&$iterations, &$history) { - foreach ($clusters as $cluster) { - $history[$iterations][] = $cluster->getCoordinates()[0]; - } - - $iterations++; - }; - - $clusters = $space->solve(2, $callback); - - $this->assertEquals([[0,10],[2,8]], $history); - $this->assertEquals(2, $iterations); - $this->assertcount(2, $clusters); - - $this->assertEquals([2], $clusters[0]->getCoordinates()); - $this->assertEquals([8], $clusters[1]->getCoordinates()); - } - - public function testSolveSingleCluster() - { - $space = new Space(2); - $space->attach($space->newPoint([0,0])); - $space->solve(1); - } - - public function testSolveWithInvalidClustersNumber() - { - $this->expectException(\InvalidArgumentException::class); - - $space = new Space(2); - $space->attach($space->newPoint([0,0])); - $space->solve(-1); - } - - public function testSolveWithKmeansPlusPlus() - { - Space::setRng(function () { - return 52590703; - }); - - $space = new Space(1); - - $space->attach($space->newPoint([1])); - $space->attach($space->newPoint([2])); - $space->attach($space->newPoint([3])); - - $space->attach($space->newPoint([7])); - $space->attach($space->newPoint([8])); - $space->attach($space->newPoint([9])); - - $iterations = 0; - $history = []; - $callback = function ($space, $clusters) use (&$iterations, &$history) { - foreach ($clusters as $cluster) { - $history[$iterations][] = $cluster->getCoordinates()[0]; - } - - $iterations++; - }; - - $clusters = $space->solve(2, $callback, Cluster::INIT_KMEANS_PLUS_PLUS); - - $this->assertEquals([[1,3],[1.5,6.75],[2,8]], $history); - $this->assertEquals(3, $iterations); - $this->assertcount(2, $clusters); - - $this->assertEquals([2], $clusters[0]->getCoordinates()); - $this->assertEquals([8], $clusters[1]->getCoordinates()); - } -} From df638c1a739b877e7bb8dcfb12f84637eef0aa40 Mon Sep 17 00:00:00 2001 From: Benjamin Delespierre Date: Tue, 21 Sep 2021 03:40:55 +0200 Subject: [PATCH 04/20] dropping support for PHP 7.3 --- .travis.yml | 1 - composer.json | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 2780ef2..356a0a8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,6 @@ language: php php: - - 7.3 - 7.4 - 8.0 diff --git a/composer.json b/composer.json index fca5c51..a57fab5 100644 --- a/composer.json +++ b/composer.json @@ -16,7 +16,7 @@ } ], "require": { - "php": "^7.3|^8.0" + "php": "^7.4|^8.0" }, "require-dev": { "phpunit/phpunit": "^9.3", From 1dad69f7b26fbf0fee78f6daa8bccdecd1fea805 Mon Sep 17 00:00:00 2001 From: Benjamin Delespierre Date: Tue, 21 Sep 2021 03:56:52 +0200 Subject: [PATCH 05/20] update scrutinizer.yml --- .scrutinizer.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/.scrutinizer.yml b/.scrutinizer.yml index 1165e72..27497c2 100644 --- a/.scrutinizer.yml +++ b/.scrutinizer.yml @@ -1,3 +1,11 @@ +build: + nodes: + analysis: + project_setup: + override: true + tests: + override: [php-scrutinizer-run] + filter: excluded_paths: [tests/*] @@ -16,3 +24,8 @@ checks: fix_line_ending: true fix_identation_4spaces: true fix_doc_comments: true + +tools: + external_code_coverage: + timeout: 600 + runs: 3 From 3f486efd50371cbf62bc8c003a16177ee2d44a3d Mon Sep 17 00:00:00 2001 From: Benjamin Delespierre Date: Tue, 21 Sep 2021 04:07:30 +0200 Subject: [PATCH 06/20] updating travis.yml --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 356a0a8..a3898db 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,4 +17,5 @@ script: - vendor/bin/phpunit --coverage-text --coverage-clover=coverage.clover after_script: - - php vendor/bin/ocular code-coverage:upload --format=php-clover coverage.clover + - wget https://scrutinizer-ci.com/ocular.phar + - php ocular.phar code-coverage:upload --format=php-clover coverage.clover From 3f58ed3311ec7c433464a820e822f5573e7d859e Mon Sep 17 00:00:00 2001 From: Benjamin Delespierre Date: Tue, 21 Sep 2021 04:29:18 +0200 Subject: [PATCH 07/20] updating travis.yml (again) to attempt to get some coverage --- .scrutinizer.yml | 2 +- .travis.yml | 19 ++++++++++++------- composer.json | 2 +- phpunit.xml.dist_old | 34 ---------------------------------- 4 files changed, 14 insertions(+), 43 deletions(-) delete mode 100644 phpunit.xml.dist_old diff --git a/.scrutinizer.yml b/.scrutinizer.yml index 27497c2..4e48850 100644 --- a/.scrutinizer.yml +++ b/.scrutinizer.yml @@ -28,4 +28,4 @@ checks: tools: external_code_coverage: timeout: 600 - runs: 3 + runs: 1 diff --git a/.travis.yml b/.travis.yml index a3898db..b2604c6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,18 +4,23 @@ php: - 7.4 - 8.0 +cache: + directories: + - $HOME/.composer/cache + env: - matrix: - - COMPOSER_FLAGS="--prefer-lowest" - - COMPOSER_FLAGS="" + - XDEBUG_MODE=coverage before_script: - - travis_retry composer self-update - - travis_retry composer update ${COMPOSER_FLAGS} --no-interaction --prefer-source + - travis_retry composer update --no-interaction --prefer-dist script: + - vendor/bin/phpcs --standard=psr2 src/ - vendor/bin/phpunit --coverage-text --coverage-clover=coverage.clover after_script: - - wget https://scrutinizer-ci.com/ocular.phar - - php ocular.phar code-coverage:upload --format=php-clover coverage.clover + - | + if [[ "$TRAVIS_PHP_VERSION" != '8.0' ]]; then + wget https://scrutinizer-ci.com/ocular.phar + php ocular.phar code-coverage:upload --format=php-clover coverage.clover + fi diff --git a/composer.json b/composer.json index a57fab5..b962314 100644 --- a/composer.json +++ b/composer.json @@ -19,7 +19,7 @@ "php": "^7.4|^8.0" }, "require-dev": { - "phpunit/phpunit": "^9.3", + "phpunit/phpunit": "^9.5", "squizlabs/php_codesniffer": "^3.6", "phpstan/phpstan": "^0.12.97", "mockery/mockery": "^1.4" diff --git a/phpunit.xml.dist_old b/phpunit.xml.dist_old deleted file mode 100644 index b9ba704..0000000 --- a/phpunit.xml.dist_old +++ /dev/null @@ -1,34 +0,0 @@ - - - - - src/ - - - - - - - - - - tests - - - - - - From e248e7c4282fd8847df83270c9175a0475416814 Mon Sep 17 00:00:00 2001 From: Benjamin Delespierre Date: Sat, 26 Mar 2022 22:33:55 +0100 Subject: [PATCH 08/20] bump phpstan/phpstan to ^1.5 --- composer.json | 2 +- phpstan.neon | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/composer.json b/composer.json index b962314..6b32b91 100644 --- a/composer.json +++ b/composer.json @@ -21,7 +21,7 @@ "require-dev": { "phpunit/phpunit": "^9.5", "squizlabs/php_codesniffer": "^3.6", - "phpstan/phpstan": "^0.12.97", + "phpstan/phpstan": "^1.5", "mockery/mockery": "^1.4" }, "autoload": { diff --git a/phpstan.neon b/phpstan.neon index 6c31ff1..191c126 100644 --- a/phpstan.neon +++ b/phpstan.neon @@ -1,5 +1,5 @@ parameters: paths: - src - # The level 8 is the highest level - level: 8 + - tests + level: 9 From b942f37abaf5734716fb8d131601096538a779f5 Mon Sep 17 00:00:00 2001 From: Benjamin Delespierre Date: Sat, 26 Mar 2022 23:09:44 +0100 Subject: [PATCH 09/20] moving math function into Math static helper --- composer.json | 5 +-- src/Algorithm.php | 4 +-- src/Math.php | 68 ++++++++++++++++++++++++++++++++++++ src/math.php | 63 --------------------------------- tests/Unit/AlgorithmTest.php | 14 ++++---- tests/Unit/MathTest.php | 22 ++++++------ 6 files changed, 90 insertions(+), 86 deletions(-) create mode 100644 src/Math.php delete mode 100644 src/math.php diff --git a/composer.json b/composer.json index 6b32b91..3a83bad 100644 --- a/composer.json +++ b/composer.json @@ -27,10 +27,7 @@ "autoload": { "psr-4": { "Kmeans\\": "src/" - }, - "files": [ - "src/math.php" - ] + } }, "autoload-dev": { "psr-4": { diff --git a/src/Algorithm.php b/src/Algorithm.php index ccec8f7..f28be0a 100644 --- a/src/Algorithm.php +++ b/src/Algorithm.php @@ -98,12 +98,12 @@ protected function getClosestCluster(ClusterCollectionInterface $clusters, Point protected function getDistanceBetween(PointInterface $pointA, PointInterface $pointB): float { - return euclidean_dist($pointA->getCoordinates(), $pointB->getCoordinates()); + return Math::euclideanDist($pointA->getCoordinates(), $pointB->getCoordinates()); } protected function findCentroid(PointCollectionInterface $points): PointInterface { - return new Point($points->getSpace(), find_centroid( + return new Point($points->getSpace(), Math::centroid( array_map(fn ($point) => $point->getCoordinates(), iterator_to_array($points)) )); } diff --git a/src/Math.php b/src/Math.php new file mode 100644 index 0000000..f6e6c26 --- /dev/null +++ b/src/Math.php @@ -0,0 +1,68 @@ + $a + * @param array $b + */ + public static function euclideanDist(array $a, array $b): float + { + assert(count($a) == count($b)); + + for ($dist = 0, $n = 0; $n < count($a); $n++) { + $dist += pow($a[$n] - $b[$n], 2); + } + + return sqrt($dist); + } + + /** + * @param array> $points + * @return array + */ + public static function centroid(array $points): array + { + $centroid = []; + + foreach ($points as $point) { + foreach ($point as $n => $value) { + $centroid[$n] = ($centroid[$n] ?? 0) + $value; + } + } + + foreach ($centroid as &$value) { + $value /= count($points); + } + + return $centroid; + } + + /** + * The standard Box–Muller transform generates values from the standard normal + * distribution (i.e. standard normal deviates). + * + * @see https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform + * + * @return array{float, float} + */ + public static function gaussianNoise(float $mu, float $sigma): array + { + static $twoPi = 2 * M_PI; + + // create two random numbers, make sure u1 is greater than epsilon + do { + $u1 = (float) mt_rand() / (float) mt_getrandmax(); + $u2 = (float) mt_rand() / (float) mt_getrandmax(); + } while ($u1 < PHP_FLOAT_EPSILON); + + // compute z0 and z1 + $mag = $sigma * sqrt(-2.0 * log($u1)); + $z0 = $mag * cos($twoPi * $u2) + $mu; + $z1 = $mag * sin($twoPi * $u2) + $mu; + + return [$z0, $z1]; + } +} diff --git a/src/math.php b/src/math.php deleted file mode 100644 index 2138794..0000000 --- a/src/math.php +++ /dev/null @@ -1,63 +0,0 @@ - $a - * @param array $b - */ -function euclidean_dist(array $a, array $b): float -{ - assert(count($a) == count($b)); - - for ($dist = 0, $n = 0; $n < count($a); $n++) { - $dist += pow($a[$n] - $b[$n], 2); - } - - return sqrt($dist); -} - -/** - * @param array> $points - * @return array - */ -function find_centroid(array $points): array -{ - $centroid = []; - - foreach ($points as $point) { - foreach ($point as $n => $value) { - $centroid[$n] = ($centroid[$n] ?? 0) + $value; - } - } - - foreach ($centroid as &$value) { - $value /= count($points); - } - - return $centroid; -} - -/** - * The standard Box–Muller transform generates values from the standard normal - * distribution (i.e. standard normal deviates). - * - * @see https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform - * - * @return array{float, float} - */ -function generate_gaussian_noise(float $mu, float $sigma): array -{ - static $twoPi = 2 * M_PI; - - // create two random numbers, make sure u1 is greater than epsilon - do { - $u1 = (float) mt_rand() / (float) mt_getrandmax(); - $u2 = (float) mt_rand() / (float) mt_getrandmax(); - } while ($u1 < PHP_FLOAT_EPSILON); - - // compute z0 and z1 - $mag = $sigma * sqrt(-2.0 * log($u1)); - $z0 = $mag * cos($twoPi * $u2) + $mu; - $z1 = $mag * sin($twoPi * $u2) + $mu; - - return [$z0, $z1]; -} diff --git a/tests/Unit/AlgorithmTest.php b/tests/Unit/AlgorithmTest.php index 4c1f9cd..aea2549 100644 --- a/tests/Unit/AlgorithmTest.php +++ b/tests/Unit/AlgorithmTest.php @@ -10,6 +10,7 @@ use Kmeans\Interfaces\InitializationSchemeInterface; use Kmeans\Interfaces\PointCollectionInterface; use Kmeans\Interfaces\SpaceInterface; +use Kmeans\Math; use Kmeans\Point; use Kmeans\PointCollection; use Kmeans\Space; @@ -23,6 +24,7 @@ * @uses \Kmeans\Point * @uses \Kmeans\PointCollection * @uses \Kmeans\Space + * @uses \Kmeans\Math */ class AlgorithmTest extends TestCase { @@ -47,9 +49,9 @@ public function tearDown(): void * @covers ::getDistanceBetween * @covers ::invokeIterationCallbacks * @covers ::iterate - * @covers euclidean_dist - * @covers find_centroid - * @covers generate_gaussian_noise + * @covers \Kmeans\Math::euclideanDist + * @covers \Kmeans\Math::centroid + * @covers \Kmeans\Math::gaussianNoise * @param int<0, max> $dimensions * @param array> $expected * @param array> $initialClusterCentroids @@ -87,7 +89,7 @@ public function testClusterize( foreach ($expected as $n => $expectedCentroid) { // assert found cluster centroids are in the vicinity // of expected centroids - $this->assertLessThan(1, euclidean_dist( + $this->assertLessThan(1, Math::euclideanDist( $expectedCentroid, $resultClusters[$n]->getCentroid()->getCoordinates() )); @@ -167,7 +169,7 @@ public function testClusterizeFailsWhenClusterInitializationFails(): void * @covers ::invokeIterationCallbacks * @covers ::getClosestCluster * @covers ::getDistanceBetween - * @covers euclidean_dist + * @covers \Kmeans\Math::euclideanDist */ public function testIterationCallback(): void { @@ -213,7 +215,7 @@ private function makePointsAround( $coordinates = $centroid; foreach ($coordinates as &$n) { - list($n) = generate_gaussian_noise($n, $radius); + list($n) = Math::gaussianNoise($n, $radius); } $points->attach(new Point($space, $coordinates)); diff --git a/tests/Unit/MathTest.php b/tests/Unit/MathTest.php index 1e316f1..f3ea16a 100644 --- a/tests/Unit/MathTest.php +++ b/tests/Unit/MathTest.php @@ -2,17 +2,17 @@ namespace Tests\Unit; +use Kmeans\Math; +use Kmeans\findCentroid; use PHPUnit\Framework\TestCase; +/** + * @coversDefaultClass \Kmeans\Math + */ class MathTest extends TestCase { - public static function setUpBeforeClass(): void - { - require_once __DIR__ . '/../../src/math.php'; - } - /** - * @covers euclidean_dist + * @covers ::euclideanDist * @dataProvider euclidianDistanceDataProvider * @param array $a * @param array $b @@ -20,7 +20,7 @@ public static function setUpBeforeClass(): void */ public function testEuclideanDist(array $a, array $b, float $dist): void { - $this->assertEquals(round($dist, 6), round(euclidean_dist($a, $b), 6)); + $this->assertEquals(round($dist, 6), round(Math::euclideanDist($a, $b), 6)); } /** @@ -42,14 +42,14 @@ public function euclidianDistanceDataProvider(): \Generator } /** - * @covers find_centroid + * @covers ::centroid * @dataProvider centroidDataProvider * @param array $centroid * @param array ...$points */ public function testFindCentroid(array $centroid, array ...$points): void { - $this->assertEquals($centroid, find_centroid($points)); + $this->assertEquals($centroid, Math::centroid($points)); } /** @@ -91,14 +91,14 @@ public function frandDataProvider(): array } /** - * @covers generate_gaussian_noise + * @covers ::gaussianNoise * @dataProvider gaussianNoiseDataProvider */ public function testGenerateGaussianNoise(float $mu, float $sigma = 1, float $nb = 1e3): void { // let's generate $nb numbers and sum them for ($sum = 0, $i = 0; $i < $nb; $i++) { - $sum += array_sum(generate_gaussian_noise($mu, $sigma)); + $sum += array_sum(Math::gaussianNoise($mu, $sigma)); } // cumpute the mean (which should be $mu) From cde9da7675ee694ef163e8ad1b7ae14ec5846428 Mon Sep 17 00:00:00 2001 From: Benjamin Delespierre Date: Wed, 4 May 2022 00:04:02 +0200 Subject: [PATCH 10/20] moving coverage into build/coverage by default --- composer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer.json b/composer.json index 3a83bad..9353d8a 100644 --- a/composer.json +++ b/composer.json @@ -36,6 +36,6 @@ }, "scripts": { "test": "vendor/bin/phpunit", - "test-coverage": "vendor/bin/phpunit --coverage-html coverage" + "test-coverage": "vendor/bin/phpunit --coverage-html build/coverage" } } From 1bc5870def1b8cedb95f6c22570483bdc117d090 Mon Sep 17 00:00:00 2001 From: Benjamin Delespierre Date: Wed, 4 May 2022 00:04:49 +0200 Subject: [PATCH 11/20] adding haversine and GPS centroid formulas to Math helper --- src/Math.php | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/src/Math.php b/src/Math.php index f6e6c26..eae57c9 100644 --- a/src/Math.php +++ b/src/Math.php @@ -65,4 +65,62 @@ public static function gaussianNoise(float $mu, float $sigma): array return [$z0, $z1]; } + + public static int $earthRadius = 6371000; + + /** + * Calculates the great-circle distance between two points, with + * the Haversine formula. + * + * @see https://stackoverflow.com/a/14751773/17403258 + * + * @param array{0: float, 1: float} $from + * @param array{0: float, 1: float} $to + * @return float + */ + public static function haversine($from, $to): float + { + return 2 * self::$earthRadius * asin(sqrt( + pow(sin(deg2rad($to[0] - $from[0]) / 2), 2) + + cos(deg2rad($from[0])) * cos(deg2rad($to[0])) + * pow(sin(deg2rad($to[1] - $from[1]) / 2), 2) + )); + } + + /** + * Calculates the centroid of GPS coordinates + * + * @see https://stackoverflow.com/questions/6671183 + * + * @param array $points + * @return array{0: float, 1: float} + */ + public static function gpsCentroid(array $points): array + { + if (count($points) == 1) { + return $points[0]; + } + + $x = $y = $z = 0; + + foreach ($points as $point) { + $lat = deg2rad($point[0]); + $long = deg2rad($point[0]); + + $x += cos($lat) * cos($long); + $y += cos($lat) * sin($long); + $z += sin($lat); + } + + $x /= count($points); + $y /= count($points); + $z /= count($points); + + $hypotenuse = sqrt(pow($x, 2) + pow($y, 2)); + + $long = atan2($y, $x); + $lat = atan2($z, $hypotenuse); + + return [rad2deg($lat), rad2deg($long)]; + } } From d69087bfa76ab416667a363a0ee52d39b7b99e65 Mon Sep 17 00:00:00 2001 From: Benjamin Delespierre Date: Wed, 4 May 2022 00:06:07 +0200 Subject: [PATCH 12/20] minor refactoring of Algorithm class --- src/Algorithm.php | 34 ++++++++------- tests/Unit/AlgorithmTest.php | 85 ++++++++++++++++++++---------------- 2 files changed, 65 insertions(+), 54 deletions(-) diff --git a/src/Algorithm.php b/src/Algorithm.php index f28be0a..7b38d6f 100644 --- a/src/Algorithm.php +++ b/src/Algorithm.php @@ -14,9 +14,11 @@ class Algorithm implements AlgorithmInterface { private InitializationSchemeInterface $initScheme; - /** @var array */ - private array $iterationCallbacks = []; + /** + * @var array + */ + private array $iterationCallbacks = []; public function __construct(InitializationSchemeInterface $initScheme) { @@ -46,7 +48,19 @@ public function clusterize(PointCollectionInterface $points, int $nbClusters): C return $clusters; } - protected function iterate(ClusterCollectionInterface $clusters): bool + protected function getDistanceBetween(PointInterface $pointA, PointInterface $pointB): float + { + return Math::euclideanDist($pointA->getCoordinates(), $pointB->getCoordinates()); + } + + protected function findCentroid(PointCollectionInterface $points): PointInterface + { + return new Point($points->getSpace(), Math::centroid( + array_map(fn (PointInterface $point) => $point->getCoordinates(), iterator_to_array($points)) + )); + } + + private function iterate(ClusterCollectionInterface $clusters): bool { /** @var \SplObjectStorage */ $changed = new \SplObjectStorage(); @@ -78,7 +92,7 @@ protected function iterate(ClusterCollectionInterface $clusters): bool return count($changed) > 0; } - protected function getClosestCluster(ClusterCollectionInterface $clusters, PointInterface $point): ClusterInterface + private function getClosestCluster(ClusterCollectionInterface $clusters, PointInterface $point): ClusterInterface { $min = null; $closest = null; @@ -96,18 +110,6 @@ protected function getClosestCluster(ClusterCollectionInterface $clusters, Point return $closest; } - protected function getDistanceBetween(PointInterface $pointA, PointInterface $pointB): float - { - return Math::euclideanDist($pointA->getCoordinates(), $pointB->getCoordinates()); - } - - protected function findCentroid(PointCollectionInterface $points): PointInterface - { - return new Point($points->getSpace(), Math::centroid( - array_map(fn ($point) => $point->getCoordinates(), iterator_to_array($points)) - )); - } - protected function invokeIterationCallbacks(ClusterCollectionInterface $clusters): void { foreach ($this->iterationCallbacks as $callback) { diff --git a/tests/Unit/AlgorithmTest.php b/tests/Unit/AlgorithmTest.php index aea2549..95e4510 100644 --- a/tests/Unit/AlgorithmTest.php +++ b/tests/Unit/AlgorithmTest.php @@ -40,6 +40,9 @@ public function tearDown(): void Mockery::close(); } + // ------------------------------------------------------------------------ + // tests + /** * @dataProvider clusterizeDataProvider * @covers ::__construct @@ -102,44 +105,6 @@ public function testClusterize( } } - /** - * @return array - */ - public function clusterizeDataProvider(): array - { - return [ - 'one dimension, 3 clusters, 5 points per cluster' => [ - 'dimension' => 1, - 'expected' => [ - [-50], - [0], - [50], - ], - 'initialClusterCentroids' => [ - [-10], - [0], - [10] - ], - 'nbPointsPerCentroid' => 5, - ], - - 'two dimensions, 3 clusters, 50 points per cluster' => [ - 'dimension' => 2, - 'expected' => [ - [20, 10], - [40, 20], - [60, 15], - ], - 'initialClusterCentroids' => [ - [12, 10], - [33, 20], - [60, 10], - ], - 'nbPointsPerCentroid' => 50, - ], - ]; - } - /** * @covers ::__construct * @covers ::clusterize @@ -198,6 +163,50 @@ function (AlgorithmInterface $algo, ClusterCollectionInterface $cluster) use (&$ $this->assertTrue($callbackCalled); } + // ------------------------------------------------------------------------ + // data-providers + + /** + * @return array + */ + public function clusterizeDataProvider(): array + { + return [ + 'one dimension, 3 clusters, 5 points per cluster' => [ + 'dimension' => 1, + 'expected' => [ + [-50], + [0], + [50], + ], + 'initialClusterCentroids' => [ + [-10], + [0], + [10] + ], + 'nbPointsPerCentroid' => 5, + ], + + 'two dimensions, 3 clusters, 50 points per cluster' => [ + 'dimension' => 2, + 'expected' => [ + [20, 10], + [40, 20], + [60, 15], + ], + 'initialClusterCentroids' => [ + [12, 10], + [33, 20], + [60, 10], + ], + 'nbPointsPerCentroid' => 50, + ], + ]; + } + + // ------------------------------------------------------------------------ + // helpers + /** * @param array> $centroids * @param int<0, max> $nbPointsPerCentroid From a7555cd6191944db4c9d51fdd7b0ddb452f83045 Mon Sep 17 00:00:00 2001 From: Benjamin Delespierre Date: Wed, 4 May 2022 00:06:34 +0200 Subject: [PATCH 13/20] starting GPS clustering algorithm implementation --- src/Gps/Algorithm.php | 38 ++++++++++++++++++++++++++++++++++++++ src/Gps/Point.php | 41 +++++++++++++++++++++++++++++++++++++++++ src/Gps/Space.php | 13 +++++++++++++ 3 files changed, 92 insertions(+) create mode 100644 src/Gps/Algorithm.php create mode 100644 src/Gps/Point.php create mode 100644 src/Gps/Space.php diff --git a/src/Gps/Algorithm.php b/src/Gps/Algorithm.php new file mode 100644 index 0000000..36a4ca1 --- /dev/null +++ b/src/Gps/Algorithm.php @@ -0,0 +1,38 @@ +getCoordinates(), $pointB->getCoordinates()); + } + + protected function findCentroid(PointCollectionInterface $points): PointInterface + { + if (! $points->getSpace() instanceof Space) { + throw new \InvalidArgumentException( + "Point collection should consist of GPS coordinates" + ); + } + + /** @var array $points */ + $points = iterator_to_array($points); + + return new Point(Math::gpsCentroid( + array_map(fn (Point $point) => $point->getCoordinates(), $points) + )); + } +} diff --git a/src/Gps/Point.php b/src/Gps/Point.php new file mode 100644 index 0000000..89fcc49 --- /dev/null +++ b/src/Gps/Point.php @@ -0,0 +1,41 @@ + $coordinates + */ + public function __construct(array $coordinates) + { + $this->validateCoordinates($coordinates); + + parent::__construct(new Space(), $coordinates); + } + + /** + * @param array $coordinates + */ + private function validateCoordinates(array $coordinates): void + { + if (count($coordinates) != 2) { + throw new \InvalidArgumentException( + "Invalid GPS coordinates" + ); + } + + list($lat, $long) = $coordinates; + + if ($lat < -90 || $lat > 90 || $long < -180 || $long > 180) { + throw new \InvalidArgumentException( + "Invalid GPS coordinates" + ); + } + } +} diff --git a/src/Gps/Space.php b/src/Gps/Space.php new file mode 100644 index 0000000..4311088 --- /dev/null +++ b/src/Gps/Space.php @@ -0,0 +1,13 @@ + Date: Tue, 12 Apr 2022 18:50:02 +0200 Subject: [PATCH 14/20] adding tests for Math GPS formulas --- src/Math.php | 10 +-- tests/Data/boundaries_2d.csv | 100 ------------------------ tests/Data/gps_centroid.csv | 3 + tests/Data/gps_centroid.py | 44 +++++++++++ tests/Data/haversine_distances.csv | 5 ++ tests/Unit/MathTest.php | 118 +++++++++++++++++++++-------- 6 files changed, 142 insertions(+), 138 deletions(-) delete mode 100644 tests/Data/boundaries_2d.csv create mode 100644 tests/Data/gps_centroid.csv create mode 100644 tests/Data/gps_centroid.py create mode 100644 tests/Data/haversine_distances.csv diff --git a/src/Math.php b/src/Math.php index eae57c9..416944e 100644 --- a/src/Math.php +++ b/src/Math.php @@ -66,11 +66,11 @@ public static function gaussianNoise(float $mu, float $sigma): array return [$z0, $z1]; } - public static int $earthRadius = 6371000; + public static int $earthRadius = 6371009; // meters /** - * Calculates the great-circle distance between two points, with - * the Haversine formula. + * Calculates the great-circle distance (in meters) between two points, + * with the Haversine formula. * * @see https://stackoverflow.com/a/14751773/17403258 * @@ -88,7 +88,7 @@ public static function haversine($from, $to): float } /** - * Calculates the centroid of GPS coordinates + * Calculates the centroid of GPS coordinates. * * @see https://stackoverflow.com/questions/6671183 * @@ -105,7 +105,7 @@ public static function gpsCentroid(array $points): array foreach ($points as $point) { $lat = deg2rad($point[0]); - $long = deg2rad($point[0]); + $long = deg2rad($point[1]); $x += cos($lat) * cos($long); $y += cos($lat) * sin($long); diff --git a/tests/Data/boundaries_2d.csv b/tests/Data/boundaries_2d.csv deleted file mode 100644 index 1b2a334..0000000 --- a/tests/Data/boundaries_2d.csv +++ /dev/null @@ -1,100 +0,0 @@ --13,-337,-762,291,-470,-31,-262,505,-613,399,-762,-337,-13,505 --951,-313,-537,-977,780,620,-182,244,142,554,-951,-977,780,620 -767,723,973,-648,142,-229,-156,682,-726,616,-726,-648,973,723 --793,-469,-254,759,283,118,31,197,-594,784,-793,-469,283,784 -359,562,-72,-517,392,-710,556,-636,-204,-414,-204,-710,556,562 --929,221,-411,584,737,-872,380,-101,315,46,-929,-872,737,584 --461,220,308,-656,-5,79,-781,363,648,-158,-781,-656,648,363 -428,673,359,770,985,560,-49,-237,-491,-416,-491,-416,985,770 -541,593,193,-636,133,571,661,213,617,843,133,-636,661,843 --690,660,657,-587,-939,-625,-532,-860,301,-315,-939,-860,657,660 -910,854,724,-954,-818,871,-146,972,-263,-47,-818,-954,910,972 --831,435,336,-982,884,-460,537,-559,609,-872,-831,-982,884,435 --987,253,482,-656,973,-839,-242,-920,-60,-180,-987,-920,973,253 --517,-862,571,417,419,-271,725,735,449,395,-517,-862,725,735 -942,296,-256,-745,587,-54,-546,941,-258,827,-546,-745,942,941 --310,-569,-290,595,649,437,-410,-78,-73,168,-410,-569,649,595 -916,864,-210,660,-866,-314,877,-44,-692,796,-866,-314,916,864 --181,352,-448,731,339,-850,248,210,55,-542,-448,-850,339,731 --994,523,-644,145,924,944,-246,604,989,-388,-994,-388,989,944 --754,-386,152,920,510,578,-766,800,735,748,-766,-386,735,920 --869,684,277,333,-869,-437,-364,55,-359,312,-869,-437,277,684 --460,-509,392,818,-369,-133,-410,758,-249,-989,-460,-989,392,818 -15,-419,856,-280,-142,311,555,975,502,-254,-142,-419,856,975 -378,-417,-356,-532,-783,438,-90,908,245,-36,-783,-532,378,908 -461,258,-139,-100,427,-717,44,-556,103,-146,-139,-717,461,258 -591,-928,58,964,-483,-252,-665,-479,-935,502,-935,-928,591,964 -396,-590,-359,-198,365,30,686,-841,-724,461,-724,-841,686,461 -945,-797,-933,-295,-377,748,682,461,537,318,-933,-797,945,748 --536,858,-889,-564,-871,-835,17,-395,-28,300,-889,-835,17,858 -131,365,873,859,493,-860,362,-358,230,988,131,-860,873,988 -928,375,249,874,297,-682,769,-495,755,-816,249,-816,928,874 -35,711,-382,-621,-971,837,-108,663,459,924,-971,-621,459,924 --809,862,805,504,481,886,-136,700,-213,253,-809,253,805,886 -901,245,434,-839,774,-290,-613,-834,-847,-218,-847,-839,901,245 --254,739,892,-319,-55,854,117,717,295,796,-254,-319,892,854 --211,583,418,130,-414,845,116,704,-561,-609,-561,-609,418,845 -777,-133,-896,-592,-830,712,-276,297,119,697,-896,-592,777,712 -91,710,32,691,-714,-446,58,85,-487,512,-714,-446,91,710 --406,231,705,-543,-210,-661,205,-621,925,-483,-406,-661,925,231 -943,339,-237,-997,993,726,533,-845,-903,740,-903,-997,993,740 -539,137,233,-681,-301,-607,-27,-352,534,-818,-301,-818,539,137 -648,-174,-163,293,441,-117,-516,901,-600,-884,-600,-884,648,901 -685,929,605,626,-649,190,-87,829,-120,355,-649,190,685,929 -541,181,368,-652,-624,-437,-951,-757,-316,-36,-951,-757,541,181 --257,869,617,46,624,279,-99,-826,-980,-23,-980,-826,624,869 --509,946,-413,-678,-140,320,711,812,-712,430,-712,-678,711,946 --823,-565,935,13,540,-226,214,-973,-171,540,-823,-973,935,540 --815,418,28,-14,-584,-956,-24,-856,281,423,-815,-956,281,423 -361,692,537,180,607,247,-419,185,269,552,-419,180,607,692 -698,889,666,322,26,879,913,-639,391,248,26,-639,913,889 -614,101,201,-280,-111,-7,-133,-281,-639,-733,-639,-733,614,101 --518,949,880,-675,-265,-329,254,-777,805,51,-518,-777,880,949 --181,-184,771,-231,205,-387,-231,628,-133,-344,-231,-387,771,628 --324,-295,189,-415,-218,-304,-865,-925,-852,-503,-865,-925,189,-295 -337,-255,-235,-159,-62,-788,436,-87,709,-231,-235,-788,709,-87 -854,-143,143,-473,-308,-662,-71,574,52,-362,-308,-662,854,574 --561,-667,-589,-233,318,-157,-63,3,-975,269,-975,-667,318,269 --659,855,478,649,173,-672,-199,-584,15,-251,-659,-672,478,855 --529,-592,789,-287,569,239,-422,695,977,534,-529,-592,977,695 --979,-322,-505,-657,733,-648,-603,-324,485,70,-979,-657,733,70 --477,-256,-256,-391,-477,843,-873,622,635,-108,-873,-391,635,843 --941,-534,597,-462,-696,458,-332,103,-534,-683,-941,-683,597,458 -783,735,-160,277,455,-851,788,-960,-821,907,-821,-960,788,907 --162,-584,-883,597,64,-997,799,-496,448,-357,-883,-997,799,597 -257,-282,127,-847,328,-418,513,-900,727,-381,127,-900,727,-282 -225,220,443,277,129,-75,-315,-192,-888,48,-888,-192,443,277 --477,253,-494,931,-205,127,-31,849,409,-639,-494,-639,409,931 --339,-739,461,905,-420,297,376,-227,660,-117,-420,-739,660,905 -310,-886,-238,-928,89,-217,76,-59,-620,-223,-620,-928,310,-59 -740,-629,-301,836,207,844,-501,-32,82,903,-501,-629,740,903 --507,-602,-984,979,140,-725,344,-971,204,494,-984,-971,344,979 -895,205,-162,221,530,350,-313,692,-669,-360,-669,-360,895,692 -168,-879,839,137,-408,314,974,-913,-38,565,-408,-913,974,565 -696,-110,-93,-934,317,-496,-94,-483,-365,419,-365,-934,696,419 -710,-241,-787,150,-535,277,-630,-544,-549,-980,-787,-980,710,277 -513,455,550,565,-65,-77,288,-72,671,-388,-65,-388,671,565 -709,-604,-655,-420,787,905,282,-949,-676,-603,-676,-949,787,905 -93,-464,305,54,-107,534,-565,-441,736,-142,-565,-464,736,534 --597,-242,665,-222,549,100,919,800,-51,-217,-597,-242,919,800 --218,-956,63,-359,-500,-802,-769,162,-922,-573,-922,-956,63,162 -524,325,909,142,183,-172,18,12,-751,168,-751,-172,909,325 -951,927,424,-476,-457,-776,-769,883,554,-277,-769,-776,951,927 --750,-675,-928,-927,-750,831,-52,-606,-903,731,-928,-927,-52,831 --285,982,817,280,47,-139,-615,-40,990,-823,-615,-823,990,982 --952,-990,-874,-93,178,-8,-140,-295,325,612,-952,-990,325,612 --386,446,599,230,173,448,-264,-905,-351,-574,-386,-905,599,448 --916,-153,-781,642,986,-449,283,468,-295,555,-916,-449,986,642 --15,-692,-679,123,514,825,189,450,912,376,-679,-692,912,825 -287,-270,-440,-372,193,696,-333,618,-36,-593,-440,-593,287,696 -253,433,424,-183,-679,-268,220,-663,869,-284,-679,-663,869,433 --797,-970,843,-348,642,-230,429,-938,376,857,-797,-970,843,857 --917,649,-648,-264,-494,-595,-52,-976,976,-606,-917,-976,976,649 -249,-737,745,689,-327,367,-18,439,-969,-304,-969,-737,745,689 --587,327,63,-981,7,-379,-312,906,844,954,-587,-981,844,954 --423,793,471,318,840,-981,697,670,39,-867,-423,-981,840,793 --418,202,145,-953,24,-447,-6,-250,453,-907,-418,-953,453,202 -863,401,-621,-319,-236,-632,694,-39,-181,638,-621,-632,863,638 -187,-745,-182,967,718,709,44,-128,574,650,-182,-745,718,967 -495,336,929,994,-792,165,-966,-55,-407,-518,-966,-518,929,994 -391,672,729,-450,-209,-370,624,785,-473,-52,-473,-450,729,785 diff --git a/tests/Data/gps_centroid.csv b/tests/Data/gps_centroid.csv new file mode 100644 index 0000000..4ff47c3 --- /dev/null +++ b/tests/Data/gps_centroid.csv @@ -0,0 +1,3 @@ +"Paris, Lyon, Marseille",45.9784058082879,4.226770011911983,48.85889,2.32004,45.75781,4.83201,43.29617,5.36995 +"Single point",48.85889,2.32004,48.85889,2.32004 +"5 close points",43.29619000000861,5.369947999981911,43.29617,5.36995,43.29616,5.36987,43.29625,5.36998,43.29621,5.37000,43.29616,5.36994 diff --git a/tests/Data/gps_centroid.py b/tests/Data/gps_centroid.py new file mode 100644 index 0000000..def69d2 --- /dev/null +++ b/tests/Data/gps_centroid.py @@ -0,0 +1,44 @@ +import numpy as np +import numpy.linalg as lin + +E = np.array([[0, 0, 1], + [0, 1, 0], + [-1, 0, 0]]) + +def lat_long2n_E(latitude,longitude): + res = [np.sin(np.deg2rad(latitude)), + np.sin(np.deg2rad(longitude)) * np.cos(np.deg2rad(latitude)), + -np.cos(np.deg2rad(longitude)) * np.cos(np.deg2rad(latitude))] + return np.dot(E.T,np.array(res)) + +def n_E2lat_long(n_E): + n_E = np.dot(E, n_E) + longitude=np.arctan2(n_E[1],-n_E[2]); + equatorial_component = np.sqrt(n_E[1]**2 + n_E[2]**2 ); + latitude=np.arctan2(n_E[0],equatorial_component); + return np.rad2deg(latitude), np.rad2deg(longitude) + +def average(coords): + res = [] + for lat,lon in coords: + res.append(lat_long2n_E(lat,lon)) + res = np.array(res) + m = np.mean(res,axis=0) + m = m / lin.norm(m) + return n_E2lat_long(m) + + +#paris = [48.85889,2.32004] +#lyon = [45.75781,4.83201] +#marseille = [43.29617,5.36995] +# +## 45.9784058082879, 4.226770011911983 +#print (average([paris, lyon, marseille])) + +print(average([ + [43.29617,5.36995], + [43.29616,5.36987], + [43.29625,5.36998], + [43.29621,5.37000], + [43.29616,5.36994] +])) diff --git a/tests/Data/haversine_distances.csv b/tests/Data/haversine_distances.csv new file mode 100644 index 0000000..9f0598e --- /dev/null +++ b/tests/Data/haversine_distances.csv @@ -0,0 +1,5 @@ +"Paris - New York",48.864716,2.349014,40.7128,74.0060,5514741.115351569 +"Paris - Neuilly",48.864716,2.349014,48.8848,2.2685,6297.56948974873 +"Paris - Paris",48.864716,2.349014,48.864716,2.349014,0.0 +"North Pole - South Pole",90.0,0.0,-90.0,0.0,20015114.442035925 +"Two very close points",48.85323,2.34903,48.85321,2.34902,2.3411651390339396 diff --git a/tests/Unit/MathTest.php b/tests/Unit/MathTest.php index f3ea16a..1a76c86 100644 --- a/tests/Unit/MathTest.php +++ b/tests/Unit/MathTest.php @@ -11,6 +11,9 @@ */ class MathTest extends TestCase { + // ------------------------------------------------------------------------ + // Euclidean Distance + /** * @covers ::euclideanDist * @dataProvider euclidianDistanceDataProvider @@ -29,25 +32,28 @@ public function testEuclideanDist(array $a, array $b, float $dist): void public function euclidianDistanceDataProvider(): \Generator { /** @var array $row */ - foreach ($this->openCsv('euclidean_distances_2d.csv') as $row) { + foreach ($this->readCsv('euclidean_distances_2d') as $row) { list($x1, $y1, $x2, $y2, $dist) = array_map('floatval', $row); yield [[$x1, $y1], [$x2, $y2], $dist]; } /** @var array $row */ - foreach ($this->openCsv('euclidean_distances_3d.csv') as $row) { + foreach ($this->readCsv('euclidean_distances_3d') as $row) { list($x1, $y1, $z1, $x2, $y2, $z2, $dist) = array_map('floatval', $row); yield [[$x1, $y1, $z1], [$x2, $y2, $z2], $dist]; } } + // ------------------------------------------------------------------------ + // Centroid + /** * @covers ::centroid * @dataProvider centroidDataProvider * @param array $centroid * @param array ...$points */ - public function testFindCentroid(array $centroid, array ...$points): void + public function testCentroid(array $centroid, array ...$points): void { $this->assertEquals($centroid, Math::centroid($points)); } @@ -58,43 +64,20 @@ public function testFindCentroid(array $centroid, array ...$points): void public function centroidDataProvider(): \Generator { /** @var array $row */ - foreach ($this->openCsv('centroids_2d.csv') as $row) { + foreach ($this->readCsv('centroids_2d') as $row) { list($x1, $y1, $x2, $y2, $x3, $y3, $x4, $y4, $cx, $cy) = array_map('floatval', $row); yield [[$cx, $cy], [$x1, $y1], [$x2, $y2], [$x3, $y3], [$x4, $y4]]; } } - /** - * @return \Generator>> - */ - public function boundariesDataProvider(): \Generator - { - /** @var array $row */ - foreach ($this->openCsv('boundaries_2d.csv') as $row) { - list($x1, $y1, $x2, $y2, $x3, $y3, $x4, $y4, $x5, $y5, $ax, $ay, $bx, $by) = array_map('floatval', $row); - yield [[$ax, $ay], [$bx, $by], [$x1, $y1], [$x2, $y2], [$x3, $y3], [$x4, $y4], [$x5, $y5]]; - } - } - - /** - * @return array - */ - public function frandDataProvider(): array - { - return [ - ['min' => 0, 'max' => 1], - ['min' => 10, 'max' => 20], - ['min' => 0, 'max' => 100], - ['min' => -100, 'max' => 100], - ['min' => -1e6, 'max' => 1e6], - ]; - } + // ------------------------------------------------------------------------ + // Gaussian Noise /** * @covers ::gaussianNoise * @dataProvider gaussianNoiseDataProvider */ - public function testGenerateGaussianNoise(float $mu, float $sigma = 1, float $nb = 1e3): void + public function testGaussianNoise(float $mu, float $sigma = 1, float $nb = 1e3): void { // let's generate $nb numbers and sum them for ($sum = 0, $i = 0; $i < $nb; $i++) { @@ -125,10 +108,79 @@ public function gaussianNoiseDataProvider(): array ]; } - private static function openCsv(string $path): \SplFileObject + // ------------------------------------------------------------------------ + // Haversine + + /** + * @covers ::haversine + * @dataProvider haversineDataProvider + * @param array{0: float, 1: float} $from + * @param array{0: float, 1: float} $to + */ + public function testHaversine(string $label, array $from, array $to, float $expected): void + { + $obtained = Math::haversine($from, $to); + + $this->assertLessThan( + 1, // meter + $obtained - $expected, + "Haversine distance for $label should be around $expected meters", + ); + } + + public function haversineDataProvider(): \Generator + { + /** @var array $row */ + foreach ($this->readCsv('haversine_distances') as $row) { + $label = array_shift($row); + $row = array_map('floatval', $row); + yield [$label, [$row[0], $row[1]], [$row[2], $row[3]], $row[4]]; + } + } + + // ------------------------------------------------------------------------ + // GPS Centroid + + /** + * @covers ::gpsCentroid + * @uses \Kmeans\Math::haversine + * @dataProvider gpsCentroidDataProvider + * @param array{0: float, 1: float} $expected + * @param array $points + */ + public function testGpsCentroid(string $label, array $expected, array $points): void { - $csv = new \SplFileObject(__DIR__ . '/../Data/' . $path); - $csv->setFlags(\SplFileObject::READ_CSV | \SplFileObject::SKIP_EMPTY | \SplFileObject::READ_AHEAD); + $obtained = Math::gpsCentroid($points); + + $this->assertLessThan( + 1, + Math::haversine($expected, $obtained), + "Centroid of $label should be near " . implode(', ', $expected), + ); + } + + public function gpsCentroidDataProvider(): \Generator + { + /** @var array $row */ + foreach ($this->readCsv('gps_centroid') as $row) { + $label = array_shift($row); + $points = array_chunk(array_map('floatval', $row), 2); + yield [$label, array_shift($points), $points]; + } + } + + // ------------------------------------------------------------------------ + // Helpers + + private static function readCsv(string $path): \SplFileObject + { + $csv = new \SplFileObject(__DIR__ . "/../Data/{$path}.csv"); + + $csv->setFlags( + \SplFileObject::READ_CSV | + \SplFileObject::SKIP_EMPTY | + \SplFileObject::READ_AHEAD + ); return $csv; } From 2b55e72f68d3557368b846f8aa0456eb9e9ad2d1 Mon Sep 17 00:00:00 2001 From: Benjamin Delespierre Date: Tue, 12 Apr 2022 23:50:06 +0200 Subject: [PATCH 15/20] moving default implementation into Euclidean namespace --- src/Algorithm.php | 20 +-- src/Euclidean/Algorithm.php | 23 +++ src/{ => Euclidean}/Point.php | 9 +- src/{ => Euclidean}/Space.php | 9 +- src/Gps/Point.php | 2 +- src/Gps/Space.php | 8 +- src/Interfaces/SpaceInterface.php | 5 - tests/Unit/ClusterCollectionTest.php | 41 +---- tests/Unit/ClusterTest.php | 144 +++++------------ tests/Unit/Concerns/HasSpaceTraitTest.php | 15 +- tests/Unit/{ => Euclidean}/AlgorithmTest.php | 41 ++--- tests/Unit/{ => Euclidean}/PointTest.php | 29 +--- tests/Unit/{ => Euclidean}/SpaceTest.php | 18 +-- tests/Unit/MathTest.php | 9 +- tests/Unit/PointCollectionTest.php | 157 +++++++------------ tests/Unit/RandomInitializationTest.php | 10 +- 16 files changed, 178 insertions(+), 362 deletions(-) create mode 100644 src/Euclidean/Algorithm.php rename src/{ => Euclidean}/Point.php (87%) rename src/{ => Euclidean}/Space.php (65%) rename tests/Unit/{ => Euclidean}/AlgorithmTest.php (88%) rename tests/Unit/{ => Euclidean}/PointTest.php (70%) rename tests/Unit/{ => Euclidean}/SpaceTest.php (73%) diff --git a/src/Algorithm.php b/src/Algorithm.php index 7b38d6f..00f59c0 100644 --- a/src/Algorithm.php +++ b/src/Algorithm.php @@ -11,7 +11,7 @@ use Kmeans\Interfaces\PointCollectionInterface; use Kmeans\Interfaces\PointInterface; -class Algorithm implements AlgorithmInterface +abstract class Algorithm implements AlgorithmInterface { private InitializationSchemeInterface $initScheme; @@ -25,6 +25,10 @@ public function __construct(InitializationSchemeInterface $initScheme) $this->initScheme = $initScheme; } + abstract protected function getDistanceBetween(PointInterface $pointA, PointInterface $pointB): float; + + abstract protected function findCentroid(PointCollectionInterface $points): PointInterface; + public function registerIterationCallback(callable $callback): void { $this->iterationCallbacks[] = $callback; @@ -48,18 +52,6 @@ public function clusterize(PointCollectionInterface $points, int $nbClusters): C return $clusters; } - protected function getDistanceBetween(PointInterface $pointA, PointInterface $pointB): float - { - return Math::euclideanDist($pointA->getCoordinates(), $pointB->getCoordinates()); - } - - protected function findCentroid(PointCollectionInterface $points): PointInterface - { - return new Point($points->getSpace(), Math::centroid( - array_map(fn (PointInterface $point) => $point->getCoordinates(), iterator_to_array($points)) - )); - } - private function iterate(ClusterCollectionInterface $clusters): bool { /** @var \SplObjectStorage */ @@ -110,7 +102,7 @@ private function getClosestCluster(ClusterCollectionInterface $clusters, PointIn return $closest; } - protected function invokeIterationCallbacks(ClusterCollectionInterface $clusters): void + private function invokeIterationCallbacks(ClusterCollectionInterface $clusters): void { foreach ($this->iterationCallbacks as $callback) { $callback($this, $clusters); diff --git a/src/Euclidean/Algorithm.php b/src/Euclidean/Algorithm.php new file mode 100644 index 0000000..741c63f --- /dev/null +++ b/src/Euclidean/Algorithm.php @@ -0,0 +1,23 @@ +getCoordinates(), $pointB->getCoordinates()); + } + + protected function findCentroid(PointCollectionInterface $points): PointInterface + { + return new Point($points->getSpace(), Math::centroid( + array_map(fn (PointInterface $point) => $point->getCoordinates(), iterator_to_array($points)) + )); + } +} diff --git a/src/Point.php b/src/Euclidean/Point.php similarity index 87% rename from src/Point.php rename to src/Euclidean/Point.php index 140f652..e9e35f3 100644 --- a/src/Point.php +++ b/src/Euclidean/Point.php @@ -1,6 +1,6 @@ setSpace($space); $this->coordinates = $this->sanitizeCoordinates($coordinates); } @@ -50,6 +56,7 @@ public function setData($data): void */ private function sanitizeCoordinates(array $coordinates): array { + assert($this->space instanceof Space); if (count($coordinates) != $this->space->getDimensions()) { throw new \InvalidArgumentException(sprintf( "Invalid set of coordinates: %d coordinates expected, %d given", diff --git a/src/Space.php b/src/Euclidean/Space.php similarity index 65% rename from src/Space.php rename to src/Euclidean/Space.php index e114821..f902631 100644 --- a/src/Space.php +++ b/src/Euclidean/Space.php @@ -1,6 +1,6 @@ dimensions = $dimensions; @@ -27,6 +29,7 @@ public function getDimensions(): int public function isEqualTo(SpaceInterface $space): bool { - return $this->getDimensions() == $space->getDimensions(); + return $space instanceof self + && $this->dimensions == $space->dimensions; } } diff --git a/src/Gps/Point.php b/src/Gps/Point.php index 89fcc49..e9a75a7 100644 --- a/src/Gps/Point.php +++ b/src/Gps/Point.php @@ -2,7 +2,7 @@ namespace Kmeans\Gps; -use Kmeans\Point as BasePoint; +use Kmeans\Euclidean\Point as BasePoint; /** * @method array{0: float, 1: float} getCoordinates() diff --git a/src/Gps/Space.php b/src/Gps/Space.php index 4311088..c6f3829 100644 --- a/src/Gps/Space.php +++ b/src/Gps/Space.php @@ -2,12 +2,12 @@ namespace Kmeans\Gps; -use Kmeans\Space as BaseSpace; +use Kmeans\Interfaces\SpaceInterface; -class Space extends BaseSpace +class Space implements SpaceInterface { - public function __construct() + public function isEqualTo(SpaceInterface $other): bool { - parent::__construct(2); + return $other instanceof self; } } diff --git a/src/Interfaces/SpaceInterface.php b/src/Interfaces/SpaceInterface.php index 37359bb..913224f 100644 --- a/src/Interfaces/SpaceInterface.php +++ b/src/Interfaces/SpaceInterface.php @@ -4,10 +4,5 @@ interface SpaceInterface { - /** - * @return int<1, max> - */ - public function getDimensions(): int; - public function isEqualTo(self $space): bool; } diff --git a/tests/Unit/ClusterCollectionTest.php b/tests/Unit/ClusterCollectionTest.php index 08440c9..04bc451 100644 --- a/tests/Unit/ClusterCollectionTest.php +++ b/tests/Unit/ClusterCollectionTest.php @@ -4,26 +4,20 @@ use Kmeans\Cluster; use Kmeans\ClusterCollection; +use Kmeans\Euclidean\Point; +use Kmeans\Euclidean\Space; use Kmeans\Interfaces\ClusterInterface; -use Kmeans\Point; -use Kmeans\Space; use PHPUnit\Framework\TestCase; /** - * @coversDefaultClass \Kmeans\ClusterCollection - * @uses \Kmeans\Space + * @covers \Kmeans\ClusterCollection * @uses \Kmeans\Cluster - * @uses \Kmeans\Point + * @uses \Kmeans\Euclidean\Point + * @uses \Kmeans\Euclidean\Space * @uses \Kmeans\PointCollection */ class ClusterCollectionTest extends TestCase { - /** - * @covers ::__construct - * @covers ::getSpace - * @covers ::attach - * @covers ::contains - */ public function testConstructingClusterWithPoints(): void { $space = new Space(1); @@ -40,12 +34,6 @@ public function testConstructingClusterWithPoints(): void ); } - /** - * @covers ::__construct - * @covers ::attach - * @covers ::contains - * @covers ::detach - */ public function testAddingAndRemovingClustersFromCollection(): void { $space = new Space(4); @@ -77,10 +65,6 @@ public function testAddingAndRemovingClustersFromCollection(): void ); } - /** - * @covers ::__construct - * @covers ::attach - */ public function testAddingInvalidClusterToCollection(): void { $this->expectException(\InvalidArgumentException::class); @@ -94,12 +78,6 @@ public function testAddingInvalidClusterToCollection(): void $collection->attach($cluster); } - /** - * @covers ::__construct - * @covers ::attach - * @covers ::detach - * @covers ::count - */ public function testCount(): void { $space = new Space(4); @@ -125,15 +103,6 @@ public function testCount(): void $this->assertEquals(0, count($collection)); } - /** - * @covers ::__construct - * @covers ::attach - * @covers ::current - * @covers ::key - * @covers ::next - * @covers ::rewind - * @covers ::valid - */ public function testIterator(): void { $space = new Space(4); diff --git a/tests/Unit/ClusterTest.php b/tests/Unit/ClusterTest.php index 6b1a4fc..349f6cd 100644 --- a/tests/Unit/ClusterTest.php +++ b/tests/Unit/ClusterTest.php @@ -3,160 +3,100 @@ namespace Tests\Unit; use Kmeans\Cluster; -use Kmeans\Point; +use Kmeans\Euclidean\Point; +use Kmeans\Euclidean\Space; use Kmeans\PointCollection; -use Kmeans\Space; use PHPUnit\Framework\TestCase; /** - * @coversDefaultClass \Kmeans\Cluster - * @uses \Kmeans\Space - * @uses \Kmeans\Point + * @covers \Kmeans\Cluster + * @uses \Kmeans\Euclidean\Point + * @uses \Kmeans\Euclidean\Space * @uses \Kmeans\PointCollection */ class ClusterTest extends TestCase { - private Space $space; - /** @var array */ - private array $pointsArray; - private Point $centroid; - private PointCollection $points; - private Cluster $cluster; - - public function setUp(): void + public static function makeCluster(): Cluster { - $this->space = new Space(2); - - $this->pointsArray = array_map( - fn ($i) => new Point($this->space, [$i, $i]), - range(1, 10) - ); - - $this->points = new PointCollection( - $this->space, - $this->pointsArray - ); - - $this->centroid = new Point($this->space, [0, 0]); - - $this->cluster = new Cluster( - $this->centroid, - $this->points - ); - } - - public function tearDown(): void - { - unset( - $this->space, - $this->pointsArray, - $this->points, - $this->centroid, - $this->cluster, + return new Cluster( + new Point(new Space(2), [3,3]), + PointCollectionTest::makePointCollection() ); } - /** - * @covers ::__construct - * @covers ::getSpace - * @covers ::setCentroid - * @covers ::belongsTo - */ public function testBelongsTo(): void { + $cluster = self::makeCluster(); + $this->assertTrue( - $this->cluster->belongsTo($this->space) + $cluster->belongsTo(new Space(2)) ); } - /** - * @covers ::__construct - * @covers ::getSpace - * @covers ::setCentroid - * @covers ::getCentroid - */ public function testGetCentroid(): void { + $cluster = self::makeCluster(); + $this->assertSame( - $this->centroid, - $this->cluster->getCentroid() + [3.0,3.0], + $cluster->getCentroid()->getCoordinates() ); } - /** - * @covers ::__construct - * @covers ::getSpace - * @covers ::setCentroid - * @covers ::getCentroid - */ public function testSetCentroid(): void { - $this->cluster->setCentroid( - $centroid = new Point($this->space, [1, 1]) + $cluster = self::makeCluster(); + + $cluster->setCentroid( + $centroid = new Point(new Space(2), [1,1]) ); $this->assertSame( $centroid, - $this->cluster->getCentroid() + $cluster->getCentroid() ); } - /** - * @covers ::__construct - * @covers ::getSpace - * @covers ::setCentroid - * @covers ::getCentroid - */ public function testSetCentroidFailsWithInvalidCentroid(): void { + $cluster = self::makeCluster(); + $this->expectException(\LogicException::class); $this->expectExceptionMessageMatches('/^Cannot set centroid/'); - $this->cluster->setCentroid( - new Point(new Space(3), [2, 2, 2]) + $cluster->setCentroid( + new Point(new Space(3), [6,6,6]) ); } - /** - * @covers ::__construct - * @covers ::getSpace - * @covers ::setCentroid - * @covers ::getPoints - */ public function testGetPoints(): void { - $this->assertCount(10, $this->cluster->getPoints()); + $cluster = self::makeCluster(); + + $this->assertCount(5, $cluster->getPoints()); } - /** - * @covers ::__construct - * @covers ::getSpace - * @covers ::setCentroid - * @covers ::attach - * @covers ::getPoints - */ - public function testAttach(): void + public function testAttach(): Cluster { - $this->cluster->attach( - new Point($this->space, [11, 11]) + $cluster = self::makeCluster(); + + $cluster->attach( + new Point(new Space(2), [6,6]) ); - $this->assertCount(11, $this->cluster->getPoints()); + $this->assertCount(6, $cluster->getPoints()); + + return $cluster; } - /** - * @covers ::__construct - * @covers ::getSpace - * @covers ::setCentroid - * @covers ::detach - * @covers ::getPoints - */ public function testDetach(): void { - $this->cluster->detach( - $this->pointsArray[array_rand($this->pointsArray)] - ); + $cluster = self::makeCluster(); + $points = iterator_to_array($cluster->getPoints()); + $point = $points[array_rand($points)]; + + $cluster->detach($point); - $this->assertCount(9, $this->cluster->getPoints()); + $this->assertCount(4, $cluster->getPoints()); } } diff --git a/tests/Unit/Concerns/HasSpaceTraitTest.php b/tests/Unit/Concerns/HasSpaceTraitTest.php index 2897a08..89406af 100644 --- a/tests/Unit/Concerns/HasSpaceTraitTest.php +++ b/tests/Unit/Concerns/HasSpaceTraitTest.php @@ -3,14 +3,14 @@ namespace Tests\Unit\Concerns; use Kmeans\Concerns\HasSpaceTrait; +use Kmeans\Euclidean\Space; use Kmeans\Interfaces\SpaceBoundInterface; use Kmeans\Interfaces\SpaceInterface; -use Kmeans\Space; use PHPUnit\Framework\TestCase; /** - * @coversDefaultClass \Kmeans\Concerns\HasSpaceTrait - * @uses \Kmeans\Space + * @covers \Kmeans\Concerns\HasSpaceTrait + * @uses \Kmeans\Euclidean\Space */ class HasSpaceTraitTest extends TestCase { @@ -31,20 +31,11 @@ public function __construct(SpaceInterface $space) }; } - /** - * @covers ::setSpace - * @covers ::getSpace - */ public function testGetSpace(): void { $this->assertSame($this->space, $this->point->getSpace()); } - /** - * @covers ::setSpace - * @covers ::getSpace - * @covers ::belongsTo - */ public function testBelongsTo(): void { $this->assertTrue($this->point->belongsTo($this->space)); diff --git a/tests/Unit/AlgorithmTest.php b/tests/Unit/Euclidean/AlgorithmTest.php similarity index 88% rename from tests/Unit/AlgorithmTest.php rename to tests/Unit/Euclidean/AlgorithmTest.php index 95e4510..f503edc 100644 --- a/tests/Unit/AlgorithmTest.php +++ b/tests/Unit/Euclidean/AlgorithmTest.php @@ -1,30 +1,31 @@ $dimensions * @param array> $expected * @param array> $initialClusterCentroids @@ -105,10 +96,6 @@ public function testClusterize( } } - /** - * @covers ::__construct - * @covers ::clusterize - */ public function testClusterizeFailsWhenClusterInitializationFails(): void { /** @var InitializationSchemeInterface */ @@ -126,16 +113,6 @@ public function testClusterizeFailsWhenClusterInitializationFails(): void (new Algorithm($initScheme))->clusterize(new PointCollection(new Space(1)), 1); } - /** - * @covers ::__construct - * @covers ::registerIterationCallback - * @covers ::clusterize - * @covers ::iterate - * @covers ::invokeIterationCallbacks - * @covers ::getClosestCluster - * @covers ::getDistanceBetween - * @covers \Kmeans\Math::euclideanDist - */ public function testIterationCallback(): void { $space = new Space(1); diff --git a/tests/Unit/PointTest.php b/tests/Unit/Euclidean/PointTest.php similarity index 70% rename from tests/Unit/PointTest.php rename to tests/Unit/Euclidean/PointTest.php index 29c074b..ee2f83d 100644 --- a/tests/Unit/PointTest.php +++ b/tests/Unit/Euclidean/PointTest.php @@ -1,23 +1,18 @@ assertSame([1.2, 3.4], $point->getCoordinates()); } - /** - * @covers ::__construct - * @covers ::sanitizeCoordinates - */ public function testInvalidCoordinates(): void { $this->expectException(\LogicException::class); @@ -39,10 +30,6 @@ public function testInvalidCoordinates(): void $point = new Point($space, [0.0, 0.0]); // 2d space point } - /** - * @covers ::__construct - * @covers ::sanitizeCoordinates - */ public function testInvalidCoordinatesValues(): void { $this->expectException(\LogicException::class); @@ -52,12 +39,6 @@ public function testInvalidCoordinatesValues(): void $point = new Point($space, [NAN, 1.0, "hello!"]); /** @phpstan-ignore-line */ } - /** - * @covers ::__construct - * @covers ::sanitizeCoordinates - * @covers ::getData - * @covers ::setData - */ public function testAssociateData(): void { $space = new Space(2); diff --git a/tests/Unit/SpaceTest.php b/tests/Unit/Euclidean/SpaceTest.php similarity index 73% rename from tests/Unit/SpaceTest.php rename to tests/Unit/Euclidean/SpaceTest.php index 1efbf0e..fcb411e 100644 --- a/tests/Unit/SpaceTest.php +++ b/tests/Unit/Euclidean/SpaceTest.php @@ -1,19 +1,15 @@ assertEquals(3, $space->getDimensions()); } - /** - * @covers ::__construct - */ public function testInvalidSpaceDimensions(): void { $this->expectException(\InvalidArgumentException::class); @@ -39,11 +32,6 @@ public function testInvalidSpaceDimensions(): void $space = new Space(0); } - /** - * @covers ::__construct - * @covers ::isEqualTo - * @covers ::getDimensions - */ public function testIsEqualTo(): void { $this->assertTrue( diff --git a/tests/Unit/MathTest.php b/tests/Unit/MathTest.php index 1a76c86..a385d92 100644 --- a/tests/Unit/MathTest.php +++ b/tests/Unit/MathTest.php @@ -3,11 +3,10 @@ namespace Tests\Unit; use Kmeans\Math; -use Kmeans\findCentroid; use PHPUnit\Framework\TestCase; /** - * @coversDefaultClass \Kmeans\Math + * @covers \Kmeans\Math */ class MathTest extends TestCase { @@ -15,7 +14,6 @@ class MathTest extends TestCase // Euclidean Distance /** - * @covers ::euclideanDist * @dataProvider euclidianDistanceDataProvider * @param array $a * @param array $b @@ -48,7 +46,6 @@ public function euclidianDistanceDataProvider(): \Generator // Centroid /** - * @covers ::centroid * @dataProvider centroidDataProvider * @param array $centroid * @param array ...$points @@ -74,7 +71,6 @@ public function centroidDataProvider(): \Generator // Gaussian Noise /** - * @covers ::gaussianNoise * @dataProvider gaussianNoiseDataProvider */ public function testGaussianNoise(float $mu, float $sigma = 1, float $nb = 1e3): void @@ -112,7 +108,6 @@ public function gaussianNoiseDataProvider(): array // Haversine /** - * @covers ::haversine * @dataProvider haversineDataProvider * @param array{0: float, 1: float} $from * @param array{0: float, 1: float} $to @@ -142,8 +137,6 @@ public function haversineDataProvider(): \Generator // GPS Centroid /** - * @covers ::gpsCentroid - * @uses \Kmeans\Math::haversine * @dataProvider gpsCentroidDataProvider * @param array{0: float, 1: float} $expected * @param array $points diff --git a/tests/Unit/PointCollectionTest.php b/tests/Unit/PointCollectionTest.php index 42f0873..8a90cac 100644 --- a/tests/Unit/PointCollectionTest.php +++ b/tests/Unit/PointCollectionTest.php @@ -2,165 +2,122 @@ namespace Tests\Unit; +use Kmeans\Euclidean\Point; +use Kmeans\Euclidean\Space; use Kmeans\Interfaces\PointCollectionInterface; use Kmeans\Interfaces\PointInterface; -use Kmeans\Point; use Kmeans\PointCollection; -use Kmeans\Space; use PHPUnit\Framework\TestCase; /** - * @coversDefaultClass \Kmeans\PointCollection - * @uses \Kmeans\Space - * @uses \Kmeans\Point + * @covers \Kmeans\PointCollection + * @uses \Kmeans\Euclidean\Point + * @uses \Kmeans\Euclidean\Space */ class PointCollectionTest extends TestCase { - private Space $space; - /** @var array */ - private array $pointsArray; - private PointCollection $points; - - public function setUp(): void + public static function makePointCollection(): PointCollection { - $this->space = new Space(2); - - $this->pointsArray = array_map( - fn ($i) => new Point($this->space, [$i, $i]), - range(1, 10) - ); - - $this->points = new PointCollection( - $this->space, - $this->pointsArray + $space = new Space(2); + + return new PointCollection( + $space, + [ + new Point($space, [1,1]), + new Point($space, [2,2]), + new Point($space, [3,3]), + new Point($space, [4,4]), + new Point($space, [5,5]), + ] ); } - public function tearDown(): void - { - unset( - $this->space, - $this->pointsArray, - $this->points, - ); - } - - /** - * @covers ::__construct - * @covers ::attach - * @covers ::count - */ public function testAttach(): void { - $this->points->attach( - new Point($this->space, [11, 11]) + $points = self::makePointCollection(); + + $points->attach( + new Point(new Space(2), [6,6]) ); - $this->assertCount(11, $this->points); + $this->assertCount(6, $points); } - /** - * @covers ::__construct - * @covers ::attach - * @covers ::count - */ public function testAttachTwiceHasNoEffect(): void { - $this->points->attach( - $point = new Point($this->space, [11, 11]) + $points = self::makePointCollection(); + + $points->attach( + $point = new Point(new Space(2), [6,6]) ); - $this->points->attach($point); + $points->attach($point); - $this->assertCount(11, $this->points); + $this->assertCount(6, $points); } - /** - * @covers ::__construct - * @covers ::attach - * @covers ::count - */ public function testAttachInvalidPointFails(): void { + $points = self::makePointCollection(); + $this->expectException(\InvalidArgumentException::class); $this->expectExceptionMessageMatches('/^Cannot add point to collection/'); - $this->points->attach( - $point = new Point(new Space(3), [11, 11, 11]) + $points->attach( + $point = new Point(new Space(3), [6,6,6]) ); - $this->points->attach($point); + $points->attach($point); - $this->assertCount(11, $this->points); + $this->assertCount(11, $points); } - /** - * @covers ::__construct - * @covers ::contains - * @covers ::attach - */ public function testContains(): void { + $points = self::makePointCollection(); + $arr = iterator_to_array($points); + $point = $arr[array_rand($arr)]; + $this->assertTrue( - $this->points->contains( - $this->pointsArray[array_rand($this->pointsArray)] - ) + $points->contains($point) ); $this->assertFalse( - $this->points->contains( - new Point($this->space, [11, 11]) + $points->contains( + new Point(new Space(2), [6,6]) ) ); } - /** - * @covers ::__construct - * @covers ::attach - * @covers ::detach - * @covers ::count - */ public function testDetach(): void { - $this->points->detach( - $this->pointsArray[array_rand($this->pointsArray)] - ); + $points = self::makePointCollection(); + $arr = iterator_to_array($points); + $point = $arr[array_rand($arr)]; + + $points->detach($point); - $this->assertCount(9, $this->points); + $this->assertCount(4, $points); } - /** - * @covers ::__construct - * @covers ::attach - * @covers ::detach - * @covers ::count - */ public function testDetachTwiceHasNoEffect(): void { - $this->points->detach( - $point = $this->pointsArray[array_rand($this->pointsArray)] - ); + $points = self::makePointCollection(); + $arr = iterator_to_array($points); + $point = $arr[array_rand($arr)]; - $this->points->detach($point); + $points->detach($point); + $points->detach($point); - $this->assertCount(9, $this->points); + $this->assertCount(4, $points); } - /** - * @covers ::__construct - * @covers ::attach - * @covers ::current - * @covers ::key - * @covers ::next - * @covers ::rewind - * @covers ::valid - */ public function testIteration(): void { - foreach ($this->points as $key => $point) { - $this->assertTrue( - array_search($point, $this->pointsArray, true) !== false - ); + $points = self::makePointCollection(); + + foreach ($points as $key => $point) { + $this->assertInstanceof(PointInterface::class, $point); } } } diff --git a/tests/Unit/RandomInitializationTest.php b/tests/Unit/RandomInitializationTest.php index 8aff71b..fbd4ae4 100644 --- a/tests/Unit/RandomInitializationTest.php +++ b/tests/Unit/RandomInitializationTest.php @@ -2,22 +2,22 @@ namespace Tests\Unit; +use Kmeans\Euclidean\Point; +use Kmeans\Euclidean\Space; use Kmeans\Interfaces\InitializationSchemeInterface; use Kmeans\Interfaces\PointCollectionInterface; use Kmeans\Interfaces\SpaceInterface; -use Kmeans\Point; use Kmeans\PointCollection; use Kmeans\RandomInitialization; -use Kmeans\Space; use PHPUnit\Framework\TestCase; /** * @coversDefaultClass \Kmeans\RandomInitialization - * @uses \Kmeans\Space - * @uses \Kmeans\Point - * @uses \Kmeans\PointCollection * @uses \Kmeans\Cluster * @uses \Kmeans\ClusterCollection + * @uses \Kmeans\Euclidean\Point + * @uses \Kmeans\Euclidean\Space + * @uses \Kmeans\PointCollection */ class RandomInitializationTest extends TestCase { From 44c34fea48118e70848f255be7d643109636d151 Mon Sep 17 00:00:00 2001 From: Benjamin Delespierre Date: Tue, 3 May 2022 23:58:46 +0200 Subject: [PATCH 16/20] adding GPS clustering algorithm using Haversine formula --- src/Algorithm.php | 12 +- src/ClusterCollection.php | 11 +- src/Concerns/HasDataTrait.php | 27 +++ src/Euclidean/Algorithm.php | 18 +- src/Euclidean/Point.php | 19 +- src/Euclidean/Space.php | 9 + src/Gps/Algorithm.php | 14 +- src/Gps/Point.php | 40 ++-- src/Gps/Space.php | 16 ++ src/Interfaces/AlgorithmInterface.php | 4 + src/Interfaces/SpaceInterface.php | 5 + src/RandomInitialization.php | 2 +- tests/Unit/AlgorithmTest.php | 174 +++++++++++++++++ tests/Unit/Euclidean/AlgorithmTest.php | 257 ++++++++----------------- tests/Unit/Euclidean/PointTest.php | 11 ++ tests/Unit/Euclidean/SpaceTest.php | 10 + tests/Unit/Gps/AlgorithmTest.php | 108 +++++++++++ tests/Unit/Gps/PointTest.php | 60 ++++++ tests/Unit/Gps/SpaceTest.php | 48 +++++ 19 files changed, 613 insertions(+), 232 deletions(-) create mode 100644 src/Concerns/HasDataTrait.php create mode 100644 tests/Unit/AlgorithmTest.php create mode 100644 tests/Unit/Gps/AlgorithmTest.php create mode 100644 tests/Unit/Gps/PointTest.php create mode 100644 tests/Unit/Gps/SpaceTest.php diff --git a/src/Algorithm.php b/src/Algorithm.php index 00f59c0..986534d 100644 --- a/src/Algorithm.php +++ b/src/Algorithm.php @@ -25,10 +25,6 @@ public function __construct(InitializationSchemeInterface $initScheme) $this->initScheme = $initScheme; } - abstract protected function getDistanceBetween(PointInterface $pointA, PointInterface $pointB): float; - - abstract protected function findCentroid(PointCollectionInterface $points): PointInterface; - public function registerIterationCallback(callable $callback): void { $this->iterationCallbacks[] = $callback; @@ -36,12 +32,8 @@ public function registerIterationCallback(callable $callback): void public function clusterize(PointCollectionInterface $points, int $nbClusters): ClusterCollectionInterface { - try { - // initialize clusters - $clusters = $this->initScheme->initializeClusters($points, $nbClusters); - } catch (\Exception $e) { - throw new \RuntimeException("Cannot initialize clusters", 0, $e); - } + // initialize clusters + $clusters = $this->initScheme->initializeClusters($points, $nbClusters); // iterate until convergence is reached do { diff --git a/src/ClusterCollection.php b/src/ClusterCollection.php index 18b7218..07569aa 100644 --- a/src/ClusterCollection.php +++ b/src/ClusterCollection.php @@ -29,6 +29,9 @@ public function __construct(SpaceInterface $space, array $clusters = []) } } + // ------------------------------------------------------------------------ + // ClusterCollectionInterface + public function contains(ClusterInterface $cluster): bool { return $this->clusters->contains($cluster); @@ -36,7 +39,7 @@ public function contains(ClusterInterface $cluster): bool public function attach(ClusterInterface $cluster): void { - if ($cluster->getCentroid()->getSpace() !== $this->getSpace()) { + if (! $this->getSpace()->isEqualTo($cluster->getSpace())) { throw new \InvalidArgumentException( "Cannot add cluster to collection: cluster space is not same as collection space" ); @@ -50,6 +53,9 @@ public function detach(ClusterInterface $cluster): void $this->clusters->detach($cluster); } + // ------------------------------------------------------------------------ + // Iterator + public function current() { return $this->clusters->current(); @@ -75,6 +81,9 @@ public function valid(): bool return $this->clusters->valid(); } + // ------------------------------------------------------------------------ + // Countable + public function count(): int { return count($this->clusters); diff --git a/src/Concerns/HasDataTrait.php b/src/Concerns/HasDataTrait.php new file mode 100644 index 0000000..885a975 --- /dev/null +++ b/src/Concerns/HasDataTrait.php @@ -0,0 +1,27 @@ +data; + } + + /** + * @param mixed $data + */ + public function setData($data): void + { + $this->data = $data; + } +} diff --git a/src/Euclidean/Algorithm.php b/src/Euclidean/Algorithm.php index 741c63f..a9813a7 100644 --- a/src/Euclidean/Algorithm.php +++ b/src/Euclidean/Algorithm.php @@ -9,14 +9,26 @@ class Algorithm extends BaseAlgorithm { - protected function getDistanceBetween(PointInterface $pointA, PointInterface $pointB): float + public function getDistanceBetween(PointInterface $pointA, PointInterface $pointB): float { + if (! $pointA instanceof Point || ! $pointB instanceof Point) { + throw new \InvalidArgumentException( + "Euclidean Algorithm can only calculate distance between euclidean points" + ); + } + return Math::euclideanDist($pointA->getCoordinates(), $pointB->getCoordinates()); } - protected function findCentroid(PointCollectionInterface $points): PointInterface + public function findCentroid(PointCollectionInterface $points): PointInterface { - return new Point($points->getSpace(), Math::centroid( + if (! $points->getSpace() instanceof Space) { + throw new \InvalidArgumentException( + "Point collection should consist of Euclidean points" + ); + } + + return $points->getSpace()->makePoint(Math::centroid( array_map(fn (PointInterface $point) => $point->getCoordinates(), iterator_to_array($points)) )); } diff --git a/src/Euclidean/Point.php b/src/Euclidean/Point.php index e9e35f3..361dca8 100644 --- a/src/Euclidean/Point.php +++ b/src/Euclidean/Point.php @@ -2,6 +2,7 @@ namespace Kmeans\Euclidean; +use Kmeans\Concerns\HasDataTrait; use Kmeans\Concerns\HasSpaceTrait; use Kmeans\Interfaces\PointInterface; use Kmeans\Interfaces\SpaceInterface; @@ -9,17 +10,13 @@ class Point implements PointInterface { use HasSpaceTrait; + use HasDataTrait; /** * @var array */ private array $coordinates; - /** - * @var mixed - */ - private $data; - /** * @param array $coordinates */ @@ -27,7 +24,7 @@ public function __construct(SpaceInterface $space, array $coordinates) { if (! $space instanceof Space) { throw new \LogicException( - "An euclidean point must belong to an euclidean space." + "An euclidean point must belong to an euclidean space" ); } @@ -40,16 +37,6 @@ public function getCoordinates(): array return $this->coordinates; } - public function getData() - { - return $this->data; - } - - public function setData($data): void - { - $this->data = $data; - } - /** * @param array $coordinates * @return array diff --git a/src/Euclidean/Space.php b/src/Euclidean/Space.php index f902631..dba53a6 100644 --- a/src/Euclidean/Space.php +++ b/src/Euclidean/Space.php @@ -2,6 +2,7 @@ namespace Kmeans\Euclidean; +use Kmeans\Interfaces\PointInterface; use Kmeans\Interfaces\SpaceInterface; class Space implements SpaceInterface @@ -32,4 +33,12 @@ public function isEqualTo(SpaceInterface $space): bool return $space instanceof self && $this->dimensions == $space->dimensions; } + + /** + * @param array $coordinates + */ + public function makePoint(array $coordinates): PointInterface + { + return new Point($this, $coordinates); + } } diff --git a/src/Gps/Algorithm.php b/src/Gps/Algorithm.php index 36a4ca1..b9eb4e6 100644 --- a/src/Gps/Algorithm.php +++ b/src/Gps/Algorithm.php @@ -9,18 +9,18 @@ class Algorithm extends BaseAlgorithm { - protected function getDistanceBetween(PointInterface $pointA, PointInterface $pointB): float + public function getDistanceBetween(PointInterface $pointA, PointInterface $pointB): float { if (! $pointA instanceof Point || ! $pointB instanceof Point) { throw new \InvalidArgumentException( - "Expecting \\Kmeans\\GPS\\Point" + "GPS algorithm can only calculate distance from GPS locations" ); } return Math::haversine($pointA->getCoordinates(), $pointB->getCoordinates()); } - protected function findCentroid(PointCollectionInterface $points): PointInterface + public function findCentroid(PointCollectionInterface $points): PointInterface { if (! $points->getSpace() instanceof Space) { throw new \InvalidArgumentException( @@ -28,11 +28,11 @@ protected function findCentroid(PointCollectionInterface $points): PointInterfac ); } - /** @var array $points */ - $points = iterator_to_array($points); + /** @var array $pointsArray */ + $pointsArray = iterator_to_array($points); - return new Point(Math::gpsCentroid( - array_map(fn (Point $point) => $point->getCoordinates(), $points) + return $points->getSpace()->makePoint(Math::gpsCentroid( + array_map(fn (Point $point) => $point->getCoordinates(), $pointsArray) )); } } diff --git a/src/Gps/Point.php b/src/Gps/Point.php index e9a75a7..d2069e3 100644 --- a/src/Gps/Point.php +++ b/src/Gps/Point.php @@ -2,36 +2,40 @@ namespace Kmeans\Gps; -use Kmeans\Euclidean\Point as BasePoint; +use Kmeans\Concerns\HasDataTrait; +use Kmeans\Concerns\HasSpaceTrait; +use Kmeans\Interfaces\PointInterface; /** * @method array{0: float, 1: float} getCoordinates() */ -class Point extends BasePoint +class Point implements PointInterface { - /** - * @param array $coordinates - */ - public function __construct(array $coordinates) - { - $this->validateCoordinates($coordinates); + use HasDataTrait; + use HasSpaceTrait; + + private float $lat; - parent::__construct(new Space(), $coordinates); + private float $long; + + public function __construct(float $lat, float $long) + { + $this->validateCoordinates($lat, $long); + $this->setSpace(Space::singleton()); + $this->lat = $lat; + $this->long = $long; } /** - * @param array $coordinates + * @return array{0: float, 1: float} */ - private function validateCoordinates(array $coordinates): void + public function getCoordinates(): array { - if (count($coordinates) != 2) { - throw new \InvalidArgumentException( - "Invalid GPS coordinates" - ); - } - - list($lat, $long) = $coordinates; + return [$this->lat, $this->long]; + } + private function validateCoordinates(float $lat, float $long): void + { if ($lat < -90 || $lat > 90 || $long < -180 || $long > 180) { throw new \InvalidArgumentException( "Invalid GPS coordinates" diff --git a/src/Gps/Space.php b/src/Gps/Space.php index c6f3829..1edc687 100644 --- a/src/Gps/Space.php +++ b/src/Gps/Space.php @@ -2,12 +2,28 @@ namespace Kmeans\Gps; +use Kmeans\Interfaces\PointInterface; use Kmeans\Interfaces\SpaceInterface; class Space implements SpaceInterface { + public static function singleton(): self + { + static $space = new self(); + + return $space; + } + public function isEqualTo(SpaceInterface $other): bool { return $other instanceof self; } + + /** + * @param array{0: float, 1: float} $coordinates + */ + public function makePoint(array $coordinates): PointInterface + { + return new Point(...$coordinates); + } } diff --git a/src/Interfaces/AlgorithmInterface.php b/src/Interfaces/AlgorithmInterface.php index 79210d9..ddb00bd 100644 --- a/src/Interfaces/AlgorithmInterface.php +++ b/src/Interfaces/AlgorithmInterface.php @@ -5,4 +5,8 @@ interface AlgorithmInterface { public function clusterize(PointCollectionInterface $points, int $nbClusters): ClusterCollectionInterface; + + public function getDistanceBetween(PointInterface $pointA, PointInterface $pointB): float; + + public function findCentroid(PointCollectionInterface $points): PointInterface; } diff --git a/src/Interfaces/SpaceInterface.php b/src/Interfaces/SpaceInterface.php index 913224f..5486221 100644 --- a/src/Interfaces/SpaceInterface.php +++ b/src/Interfaces/SpaceInterface.php @@ -5,4 +5,9 @@ interface SpaceInterface { public function isEqualTo(self $space): bool; + + /** + * @param array $coordinates + */ + public function makePoint(array $coordinates): PointInterface; } diff --git a/src/RandomInitialization.php b/src/RandomInitialization.php index da841f7..c1debda 100644 --- a/src/RandomInitialization.php +++ b/src/RandomInitialization.php @@ -21,7 +21,7 @@ public function initializeClusters(PointCollectionInterface $points, int $nbClus $clusters = new ClusterCollection($points->getSpace()); - // initialize N clusters with a random point within space boundaries + // initialize N clusters with a random point for ($n = 0; $n < $nbClusters; $n++) { // assign all points to the first cluster only $clusters->attach(new Cluster($this->getRandomPoint($points), $n == 0 ? $points : null)); diff --git a/tests/Unit/AlgorithmTest.php b/tests/Unit/AlgorithmTest.php new file mode 100644 index 0000000..758f361 --- /dev/null +++ b/tests/Unit/AlgorithmTest.php @@ -0,0 +1,174 @@ + + */ + abstract public function clusterizeDataProvider(): array; + + /** + * @param array $center + * @return array + */ + abstract protected function random(array $center, float $radius): array; + + // ------------------------------------------------------------------------ + // tests + + /** + * @dataProvider clusterizeDataProvider + */ + public function testClusterize( + SpaceInterface $space, + float $radius, + PointCollectionInterface $points, + PointCollectionInterface $initialCentroids, + PointCollectionInterface $expectedCentroids, + ): void { + $algorithm = $this->makeAlgorithm( + $this->mockInitScheme( + $this->makeClusters($points, $initialCentroids) + ) + ); + + $result = iterator_to_array( + $algorithm->clusterize($points, count($expectedCentroids)) + ); + + foreach ($expectedCentroids as $i => $expectedCentroid) { + $this->assertLessThan( + $radius, + $algorithm->getDistanceBetween( + $expectedCentroid, + $result[$i]->getCentroid() + ) + ); + + if ( + is_array($expectedCentroid->getData()) + && isset($expectedCentroid->getData()['count']) + ) { + $this->assertCount( + $expectedCentroid->getData()['count'], + $result[$i]->getPoints() + ); + } + } + } + + // ------------------------------------------------------------------------ + // helpers + + /** + * @param array> $centers + * @return ClusterizeScenarioData + */ + protected function makeClusterizeScenarioData( + SpaceInterface $space, + array $centers, + float $radius, + int $count + ): array { + $points = new PointCollection($space); + for ($i = 0; $i < count($centers); $i++) { + for ($j = 0; $j < $count; $j++) { + $point = $space->makePoint($this->random($centers[$i], $radius)); + $points->attach($point); + } + } + + $initialCentroids = new PointCollection($space); + for ($i = 0; $i < count($centers); $i++) { + $point = $space->makePoint($centers[$i]); + $initialCentroids->attach($point); + } + + $expectedCentroids = new PointCollection($space); + for ($i = 0; $i < count($centers); $i++) { + $point = $space->makePoint($centers[$i]); + $point->setData(['count' => $count]); + $expectedCentroids->attach($point); + } + + return compact( + 'space', + 'radius', + 'points', + 'initialCentroids', + 'expectedCentroids' + ); + } + + protected function makeClusters( + PointCollectionInterface $points, + PointCollectionInterface $centroids + ): ClusterCollectionInterface { + $clusters = new ClusterCollection($points->getSpace()); + + foreach ($centroids as $n => $centroid) { + // attach all points to the first cluster + $clusters->attach(new Cluster($centroid, $n == 0 ? $points : null)); + } + + return $clusters; + } + + protected function mockInitScheme( + ClusterCollectionInterface $clusters + ): InitializationSchemeInterface { + /** @var InitializationSchemeInterface */ + $initScheme = Mockery::mock(InitializationSchemeInterface::class); + + /** @phpstan-ignore-next-line */ + $initScheme + ->shouldReceive('initializeClusters') + ->with(PointCollectionInterface::class, Mockery::type('integer')) + ->andReturn($clusters); + + return $initScheme; + } +} diff --git a/tests/Unit/Euclidean/AlgorithmTest.php b/tests/Unit/Euclidean/AlgorithmTest.php index f503edc..8e74f1d 100644 --- a/tests/Unit/Euclidean/AlgorithmTest.php +++ b/tests/Unit/Euclidean/AlgorithmTest.php @@ -2,243 +2,148 @@ namespace Tests\Unit\Euclidean; -use Kmeans\Cluster; -use Kmeans\ClusterCollection; use Kmeans\Euclidean\Algorithm; use Kmeans\Euclidean\Point; use Kmeans\Euclidean\Space; use Kmeans\Interfaces\AlgorithmInterface; -use Kmeans\Interfaces\ClusterCollectionInterface; use Kmeans\Interfaces\InitializationSchemeInterface; use Kmeans\Interfaces\PointCollectionInterface; use Kmeans\Interfaces\SpaceInterface; use Kmeans\Math; use Kmeans\PointCollection; -use Mockery; -use PHPUnit\Framework\TestCase; +use Tests\Unit\AlgorithmTest as BaseAlgorithmTest; /** * @covers \Kmeans\Euclidean\Algorithm * @covers \Kmeans\Algorithm * @uses \Kmeans\Cluster * @uses \Kmeans\ClusterCollection + * @uses \Kmeans\Concerns\HasDataTrait + * @uses \Kmeans\Concerns\HasSpaceTrait * @uses \Kmeans\Euclidean\Point * @uses \Kmeans\Euclidean\Space * @uses \Kmeans\Math * @uses \Kmeans\PointCollection + * @phpstan-import-type ClusterizeScenarioData from BaseAlgorithmTest */ -class AlgorithmTest extends TestCase +class AlgorithmTest extends BaseAlgorithmTest { - private const MT_RAND_SEED = 123456; - - public static function setUpBeforeClass(): void - { - mt_srand(self::MT_RAND_SEED); - } - - public function tearDown(): void - { - Mockery::close(); - } - - // ------------------------------------------------------------------------ - // tests - /** * @dataProvider clusterizeDataProvider - * @param int<0, max> $dimensions - * @param array> $expected - * @param array> $initialClusterCentroids - * @param int<0, max> $nbPointsPerCentroid */ - public function testClusterize( - int $dimensions, - array $expected, - array $initialClusterCentroids, - int $nbPointsPerCentroid + public function testIterationCallback( + SpaceInterface $space, + float $radius, + PointCollectionInterface $points, + PointCollectionInterface $initialCentroids, + PointCollectionInterface $expectedCentroids, ): void { - $space = new Space($dimensions); - $radius = 1; - - $points = $this->makePointsAround( - $space, - $expected, - $radius, - $nbPointsPerCentroid, + /** @var \Kmeans\Algorithm $algorithm */ + $algorithm = $this->makeAlgorithm( + $this->mockInitScheme( + $this->makeClusters($points, $initialCentroids) + ) ); - $clusters = $this->makeClusters( - $points, - $initialClusterCentroids - ); + $called = false; + $algorithm->registerIterationCallback(function () use (&$called) { + $called = true; + }); - $algo = new Algorithm( - $this->mockInitScheme($clusters) - ); + $algorithm->clusterize($points, count($expectedCentroids)); - $resultClusters = iterator_to_array( - $algo->clusterize($points, count($expected)) - ); - - foreach ($expected as $n => $expectedCentroid) { - // assert found cluster centroids are in the vicinity - // of expected centroids - $this->assertLessThan(1, Math::euclideanDist( - $expectedCentroid, - $resultClusters[$n]->getCentroid()->getCoordinates() - )); - - // assert found cluster has $nbPoints points attached - $this->assertCount( - $nbPointsPerCentroid, - $resultClusters[$n]->getPoints() - ); - } + $this->assertTrue($called); } - public function testClusterizeFailsWhenClusterInitializationFails(): void - { - /** @var InitializationSchemeInterface */ - $initScheme = Mockery::mock(InitializationSchemeInterface::class); - - /** @phpstan-ignore-next-line */ - $initScheme - ->shouldReceive('initializeClusters') - ->with(PointCollectionInterface::class, Mockery::type('integer')) - ->andThrow(new \Exception('n/a')); - - $this->expectException(\RuntimeException::class); - $this->expectExceptionMessage("Cannot initialize clusters"); - - (new Algorithm($initScheme))->clusterize(new PointCollection(new Space(1)), 1); + protected function makeAlgorithm( + InitializationSchemeInterface $initScheme + ): AlgorithmInterface { + return new Algorithm($initScheme); } - public function testIterationCallback(): void - { - $space = new Space(1); - - $points = new PointCollection($space, array_map( - fn ($coordinates) => new Point($space, $coordinates), - [[1],[2],[3],[4],[5]] - )); - - $clusters = new ClusterCollection($space, [ - new Cluster(new Point($space, [6]), $points) - ]); - - $callbackCalled = false; - - $algo = new Algorithm($this->mockInitScheme($clusters)); - $algo->registerIterationCallback( - function (AlgorithmInterface $algo, ClusterCollectionInterface $cluster) use (&$callbackCalled) { - $callbackCalled = true; - } - ); - - $algo->clusterize($points, 1); - - $this->assertTrue($callbackCalled); - } - - // ------------------------------------------------------------------------ - // data-providers - /** - * @return array + * @return array */ public function clusterizeDataProvider(): array { return [ - 'one dimension, 3 clusters, 5 points per cluster' => [ - 'dimension' => 1, - 'expected' => [ - [-50], + '1D' => $this->makeClusterizeScenarioData( + new Space(1), + [ + [-100], [0], - [50], + [100] ], - 'initialClusterCentroids' => [ - [-10], - [0], - [10] + 2, // radius + 10, // points per clusters + ), + '2D' => $this->makeClusterizeScenarioData( + new Space(2), + [ + [-100, -100], + [0, 0], + [100, 100], ], - 'nbPointsPerCentroid' => 5, - ], - - 'two dimensions, 3 clusters, 50 points per cluster' => [ - 'dimension' => 2, - 'expected' => [ - [20, 10], - [40, 20], - [60, 15], + 2, // radius + 10, // points per clusters + ), + '3D' => $this->makeClusterizeScenarioData( + new Space(3), + [ + [-100, -100, -100], + [0, 0, 0], + [100, 100, 100], ], - 'initialClusterCentroids' => [ - [12, 10], - [33, 20], - [60, 10], - ], - 'nbPointsPerCentroid' => 50, - ], + 2, // radius + 10, // points per clusters + ), ]; } - // ------------------------------------------------------------------------ - // helpers - /** - * @param array> $centroids - * @param int<0, max> $nbPointsPerCentroid + * @param array $center + * @return array */ - private function makePointsAround( - SpaceInterface $space, - array $centroids, - float $radius, - int $nbPointsPerCentroid - ): PointCollectionInterface { - $points = new PointCollection($space); - - foreach ($centroids as $centroid) { - for ($i = 0; $i < $nbPointsPerCentroid; $i++) { - $coordinates = $centroid; - - foreach ($coordinates as &$n) { - list($n) = Math::gaussianNoise($n, $radius); - } + protected function random(array $center, float $radius): array + { + $point = $center; - $points->attach(new Point($space, $coordinates)); - } + foreach ($point as &$c) { + $blur = Math::gaussianNoise($c, $radius); + $c = $blur[array_rand($blur)]; } - return $points; + return $point; } /** - * @param array> $centroids + * @uses \Kmeans\Gps\Point + * @uses \Kmeans\Gps\Space */ - private function makeClusters(PointCollectionInterface $points, array $centroids): ClusterCollectionInterface + public function testGetDistanceBetweenException(): void { - $clusters = new ClusterCollection($points->getSpace()); + $this->expectException(\InvalidArgumentException::class); - foreach ($centroids as $n => $centroid) { - $clusters->attach(new Cluster( - new Point($points->getSpace(), $centroid), - $n == 0 ? $points : null - )); - } + /** @var InitializationSchemeInterface */ + $initScheme = \Mockery::mock(InitializationSchemeInterface::class); - return $clusters; + $algorithm = new Algorithm($initScheme); + $algorithm->getDistanceBetween( + new \Kmeans\Gps\Point(0, 0), + new \Kmeans\Gps\Point(0, 0) + ); } - private function mockInitScheme(ClusterCollectionInterface $clusters): InitializationSchemeInterface + public function testFindCentroidException(): void { - /** @var InitializationSchemeInterface */ - $initScheme = Mockery::mock(InitializationSchemeInterface::class); + $this->expectException(\InvalidArgumentException::class); - /** @phpstan-ignore-next-line */ - $initScheme - ->shouldReceive('initializeClusters') - ->with(PointCollectionInterface::class, Mockery::type('integer')) - ->andReturn($clusters); + /** @var InitializationSchemeInterface */ + $initScheme = \Mockery::mock(InitializationSchemeInterface::class); - return $initScheme; + $algorithm = new Algorithm($initScheme); + $algorithm->findCentroid( + new PointCollection(new \Kmeans\Gps\Space(), []) + ); } } diff --git a/tests/Unit/Euclidean/PointTest.php b/tests/Unit/Euclidean/PointTest.php index ee2f83d..fa0e4ee 100644 --- a/tests/Unit/Euclidean/PointTest.php +++ b/tests/Unit/Euclidean/PointTest.php @@ -4,15 +4,26 @@ use Kmeans\Euclidean\Point; use Kmeans\Euclidean\Space; +use Kmeans\Gps\Space as GpsSpace; use PHPUnit\Framework\TestCase; /** * @covers \Kmeans\Euclidean\Point * @uses \Kmeans\Concerns\HasSpaceTrait * @uses \Kmeans\Euclidean\Space + * @uses \Kmeans\Gps\Space */ class PointTest extends TestCase { + public function testInvalidSpace(): void + { + $this->expectException(\LogicException::class); + $this->expectExceptionMessage("An euclidean point must belong to an euclidean space"); + + $space = new GpsSpace(); + $point = new Point($space, [48.85889, 2.32004]); + } + public function testCoordinates(): void { $space = new Space(2); diff --git a/tests/Unit/Euclidean/SpaceTest.php b/tests/Unit/Euclidean/SpaceTest.php index fcb411e..f76bd2c 100644 --- a/tests/Unit/Euclidean/SpaceTest.php +++ b/tests/Unit/Euclidean/SpaceTest.php @@ -2,11 +2,13 @@ namespace Tests\Unit\Euclidean; +use Kmeans\Euclidean\Point; use Kmeans\Euclidean\Space; use PHPUnit\Framework\TestCase; /** * @covers \Kmeans\Euclidean\Space + * @uses \Kmeans\Euclidean\Point */ class SpaceTest extends TestCase { @@ -42,4 +44,12 @@ public function testIsEqualTo(): void (new Space(1))->isEqualTo(new Space(2)) ); } + + public function testMakePoint(): void + { + $this->assertInstanceof( + Point::class, + (new Space(1))->makePoint([1]) + ); + } } diff --git a/tests/Unit/Gps/AlgorithmTest.php b/tests/Unit/Gps/AlgorithmTest.php new file mode 100644 index 0000000..9583650 --- /dev/null +++ b/tests/Unit/Gps/AlgorithmTest.php @@ -0,0 +1,108 @@ + + */ + public function clusterizeDataProvider(): array + { + return [ + 'French cities' => $this->makeClusterizeScenarioData( + new Space(), + [ + [48.85889, 2.32004], // Paris + [45.75781, 4.83201], // Lyon + [43.29617, 5.36995], // Marseille + ], + 10e3, // 10km radius + 10, // points per cluster + ), + ]; + } + + /** + * @param array{0: float, 1: float} $center + * @return array{0: float, 1: float} + */ + protected function random(array $center, float $radius): array + { + //about 111300 meters in one degree + $rd = $radius / 111300; + + $u = mt_rand() / mt_getrandmax(); + $v = mt_rand() / mt_getrandmax(); + + $w = $rd * sqrt($u); + $t = 2 * pi() * $v; + $x = $w * cos($t); + $y = $w * sin($t); + + return [$y + $center[0], $x + $center[1]]; + } + + public function testGetDistanceBetweenWithInvalidPoints(): void + { + $this->expectException(\InvalidArgumentException::class); + $this->expectExceptionMessageMatches('/^GPS algorithm can only calculate distance from GPS locations/'); + + /** @var InitializationSchemeInterface */ + $initScheme = Mockery::mock(InitializationSchemeInterface::class); + + $algorithm = new Algorithm($initScheme); + $algorithm->getDistanceBetween( + new EuclideanPoint(new EuclideanSpace(2), [0, 1]), + new EuclideanPoint(new EuclideanSpace(2), [1, 0]) + ); + } + + public function testGetCentroiWithInvalidPoins(): void + { + $this->expectException(\InvalidArgumentException::class); + $this->expectExceptionMessageMatches('/^Point collection should consist of GPS coordinates/'); + + /** @var InitializationSchemeInterface */ + $initScheme = Mockery::mock(InitializationSchemeInterface::class); + + $algorithm = new Algorithm($initScheme); + $algorithm->findCentroid(new PointCollection(new EuclideanSpace(2), [ + new EuclideanPoint(new EuclideanSpace(2), [0, 1]), + new EuclideanPoint(new EuclideanSpace(2), [1, 0]) + ])); + } +} diff --git a/tests/Unit/Gps/PointTest.php b/tests/Unit/Gps/PointTest.php new file mode 100644 index 0000000..6185ba5 --- /dev/null +++ b/tests/Unit/Gps/PointTest.php @@ -0,0 +1,60 @@ +assertTrue( + $point->getSpace()->isEqualTo(new Space()) + ); + + return $point; + } + + /** + * @depends testConstruct + */ + public function testGetCoordinates(Point $point): void + { + $this->assertEquals( + [48.85889, 2.32004], + $point->getCoordinates(), + ); + } + + /** + * @dataProvider invalidGpsCoordinatesDataProvider + */ + public function testConstructWithInvalidCoordinates(float $lat, float $long): void + { + $this->expectException(\InvalidArgumentException::class); + $this->expectExceptionMessage("Invalid GPS coordinates"); + + $point = new Point($lat, $long); + } + + /** + * @return array + */ + public function invalidGpsCoordinatesDataProvider(): array + { + return [ + 'invalid lat (-91)' => [-91,0], + 'invalid lat (91)' => [91,0], + 'invalid long (-181)' => [-181,0], + 'invalid long (181)' => [181,0], + ]; + } +} diff --git a/tests/Unit/Gps/SpaceTest.php b/tests/Unit/Gps/SpaceTest.php new file mode 100644 index 0000000..9301596 --- /dev/null +++ b/tests/Unit/Gps/SpaceTest.php @@ -0,0 +1,48 @@ +assertInstanceof( + Space::class, + Space::singleton(), + ); + + $this->assertSame( + Space::singleton(), + Space::singleton(), + ); + } + + public function testIsEqualTo(): void + { + $this->assertTrue( + (new Space())->isEqualTo(new Space()) + ); + + $this->assertFalse( + (new Space())->isEqualTo(new EuclideanSpace(1)) + ); + } + + public function testMakePoint(): void + { + $this->assertInstanceof( + Point::class, + (new Space())->makePoint([48.85889, 2.32004]) + ); + } +} From 071ebd29ffff0a63d773552b0521cc33f5732193 Mon Sep 17 00:00:00 2001 From: Benjamin Delespierre Date: Wed, 4 May 2022 01:02:00 +0200 Subject: [PATCH 17/20] fixing Scrutinizer issues --- src/Euclidean/Point.php | 2 +- src/Gps/Point.php | 3 --- src/Math.php | 4 ++-- src/RandomInitialization.php | 11 ++--------- 4 files changed, 5 insertions(+), 15 deletions(-) diff --git a/src/Euclidean/Point.php b/src/Euclidean/Point.php index 361dca8..31794c1 100644 --- a/src/Euclidean/Point.php +++ b/src/Euclidean/Point.php @@ -56,7 +56,7 @@ private function sanitizeCoordinates(array $coordinates): array assert(is_array($coordinates)); $errors = array_keys($coordinates, false, true); - if ($errors) { + if (! empty($errors)) { throw new \InvalidArgumentException(sprintf( "Invalid set of coordinates: values at offsets [%s] could not be converted to numbers", implode(',', $errors) diff --git a/src/Gps/Point.php b/src/Gps/Point.php index d2069e3..b66db67 100644 --- a/src/Gps/Point.php +++ b/src/Gps/Point.php @@ -6,9 +6,6 @@ use Kmeans\Concerns\HasSpaceTrait; use Kmeans\Interfaces\PointInterface; -/** - * @method array{0: float, 1: float} getCoordinates() - */ class Point implements PointInterface { use HasDataTrait; diff --git a/src/Math.php b/src/Math.php index 416944e..cec14be 100644 --- a/src/Math.php +++ b/src/Math.php @@ -12,7 +12,7 @@ public static function euclideanDist(array $a, array $b): float { assert(count($a) == count($b)); - for ($dist = 0, $n = 0; $n < count($a); $n++) { + for ($dist = 0, $n = 0, $c = count($a); $n < $c; $n++) { $dist += pow($a[$n] - $b[$n], 2); } @@ -46,7 +46,7 @@ public static function centroid(array $points): array * * @see https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform * - * @return array{float, float} + * @return array{0: float, 1: float} */ public static function gaussianNoise(float $mu, float $sigma): array { diff --git a/src/RandomInitialization.php b/src/RandomInitialization.php index c1debda..e997894 100644 --- a/src/RandomInitialization.php +++ b/src/RandomInitialization.php @@ -36,14 +36,7 @@ protected function getRandomPoint(PointCollectionInterface $points): PointInterf throw new \LogicException("Unable to pick a random point out of an empty point collection"); } - $num = mt_rand(0, count($points) - 1); - foreach ($points as $i => $point) { - if ($i > $num) { - break; - } - } - - assert(isset($point)); - return $point; + $arr = iterator_to_array($points); + return $arr[array_rand($arr)]; } } From 44f5a4a5a3330d5c80000c94c56a1522f20ca8b4 Mon Sep 17 00:00:00 2001 From: Benjamin Delespierre Date: Wed, 4 May 2022 12:19:36 +0200 Subject: [PATCH 18/20] updating demo.php --- demo.php | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/demo.php b/demo.php index 7458f59..6cc231e 100644 --- a/demo.php +++ b/demo.php @@ -16,17 +16,13 @@ ]; // create a 2-dimentions space -$space = new Kmeans\Space(2); +$space = new Kmeans\Euclidean\Space(2); // prepare the points -$points = new Kmeans\PointCollection($space); - -foreach ($data as $coordinates) { - $points->attach(new Kmeans\Point($space, $coordinates)); -} +$points = new Kmeans\PointCollection($space, array_map([$space, 'makePoint'], $data)); // prepare the algorithm -$algorithm = new Kmeans\Algorithm(new Kmeans\RandomInitialization()); +$algorithm = new Kmeans\Euclidean\Algorithm(new Kmeans\RandomInitialization()); // cluster these 50 points in 3 clusters $clusters = $algorithm->clusterize($points, 3); @@ -34,6 +30,7 @@ // display the cluster centers and attached points foreach ($clusters as $num => $cluster) { $coordinates = $cluster->getCentroid()->getCoordinates(); + assert(is_int($num)); printf( "Cluster #%s [%d,%d] has %d points\n", $num, From d11b3b99418c68cee6e44ad423ea81c5b64ac891 Mon Sep 17 00:00:00 2001 From: Benjamin Delespierre Date: Thu, 5 May 2022 02:38:16 +0200 Subject: [PATCH 19/20] adding max iterations threshold --- src/Algorithm.php | 25 +++++++++++----- tests/Unit/Euclidean/AlgorithmTest.php | 41 ++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 7 deletions(-) diff --git a/src/Algorithm.php b/src/Algorithm.php index 986534d..db76ef5 100644 --- a/src/Algorithm.php +++ b/src/Algorithm.php @@ -30,21 +30,32 @@ public function registerIterationCallback(callable $callback): void $this->iterationCallbacks[] = $callback; } - public function clusterize(PointCollectionInterface $points, int $nbClusters): ClusterCollectionInterface - { + public function clusterize( + PointCollectionInterface $points, + int $nClusters, + ?int $maxIter = null + ): ClusterCollectionInterface { + $maxIter ??= INF; + + if ($maxIter < 1) { + throw new \UnexpectedValueException( + "Invalid maximum number of iterations: {$maxIter}" + ); + } + // initialize clusters - $clusters = $this->initScheme->initializeClusters($points, $nbClusters); + $clusters = $this->initScheme->initializeClusters($points, $nClusters); // iterate until convergence is reached do { $this->invokeIterationCallbacks($clusters); - } while ($this->iterate($clusters)); + } while ($this->iterate($clusters) && --$maxIter); // clustering is done. return $clusters; } - private function iterate(ClusterCollectionInterface $clusters): bool + protected function iterate(ClusterCollectionInterface $clusters): bool { /** @var \SplObjectStorage */ $changed = new \SplObjectStorage(); @@ -78,13 +89,13 @@ private function iterate(ClusterCollectionInterface $clusters): bool private function getClosestCluster(ClusterCollectionInterface $clusters, PointInterface $point): ClusterInterface { - $min = null; + $min = INF; $closest = null; foreach ($clusters as $cluster) { $distance = $this->getDistanceBetween($point, $cluster->getCentroid()); - if (is_null($min) || $distance < $min) { + if ($distance < $min) { $min = $distance; $closest = $cluster; } diff --git a/tests/Unit/Euclidean/AlgorithmTest.php b/tests/Unit/Euclidean/AlgorithmTest.php index 8e74f1d..4c50155 100644 --- a/tests/Unit/Euclidean/AlgorithmTest.php +++ b/tests/Unit/Euclidean/AlgorithmTest.php @@ -6,11 +6,13 @@ use Kmeans\Euclidean\Point; use Kmeans\Euclidean\Space; use Kmeans\Interfaces\AlgorithmInterface; +use Kmeans\Interfaces\ClusterCollectionInterface; use Kmeans\Interfaces\InitializationSchemeInterface; use Kmeans\Interfaces\PointCollectionInterface; use Kmeans\Interfaces\SpaceInterface; use Kmeans\Math; use Kmeans\PointCollection; +use Kmeans\RandomInitialization; use Tests\Unit\AlgorithmTest as BaseAlgorithmTest; /** @@ -24,6 +26,7 @@ * @uses \Kmeans\Euclidean\Space * @uses \Kmeans\Math * @uses \Kmeans\PointCollection + * @uses \Kmeans\RandomInitialization * @phpstan-import-type ClusterizeScenarioData from BaseAlgorithmTest */ class AlgorithmTest extends BaseAlgorithmTest @@ -146,4 +149,42 @@ public function testFindCentroidException(): void new PointCollection(new \Kmeans\Gps\Space(), []) ); } + + public function testMaxIterations(): void + { + $algorithm = new class (new RandomInitialization()) extends Algorithm { + protected function iterate(ClusterCollectionInterface $clusters): bool + { + // do nothing and iterate indefinitely + return true; + } + }; + + $iterations = 0; + $algorithm->registerIterationCallback(function () use (&$iterations) { + $iterations++; + }); + + $space = new Space(1); + $points = new PointCollection( + $space, + array_map([$space, 'makePoint'], [[1],[2],[3]]) + ); + + $algorithm->clusterize($points, 3, 300); + + $this->assertEquals( + 300, + $iterations + ); + } + + public function testMaxIterationsException(): void + { + $this->expectException(\UnexpectedValueException::class); + $this->expectExceptionMessageMatches('/^Invalid maximum number of iterations/'); + + $algorithm = new Algorithm(new RandomInitialization()); + $algorithm->clusterize(new PointCollection(new Space(1), []), 3, 0); + } } From 8c3eefe06163efd65d9db8bbcd789f53155c2843 Mon Sep 17 00:00:00 2001 From: Benjamin Delespierre Date: Fri, 6 May 2022 02:54:16 +0200 Subject: [PATCH 20/20] replacing 'clusterize' by 'fit' --- demo.php | 2 +- src/Algorithm.php | 2 +- src/Interfaces/AlgorithmInterface.php | 2 +- tests/Unit/AlgorithmTest.php | 16 ++++++++-------- tests/Unit/Euclidean/AlgorithmTest.php | 20 ++++++++++---------- tests/Unit/Gps/AlgorithmTest.php | 8 ++++---- 6 files changed, 25 insertions(+), 25 deletions(-) diff --git a/demo.php b/demo.php index 6cc231e..3e9c385 100644 --- a/demo.php +++ b/demo.php @@ -25,7 +25,7 @@ $algorithm = new Kmeans\Euclidean\Algorithm(new Kmeans\RandomInitialization()); // cluster these 50 points in 3 clusters -$clusters = $algorithm->clusterize($points, 3); +$clusters = $algorithm->fit($points, 3); // display the cluster centers and attached points foreach ($clusters as $num => $cluster) { diff --git a/src/Algorithm.php b/src/Algorithm.php index db76ef5..8e83045 100644 --- a/src/Algorithm.php +++ b/src/Algorithm.php @@ -30,7 +30,7 @@ public function registerIterationCallback(callable $callback): void $this->iterationCallbacks[] = $callback; } - public function clusterize( + public function fit( PointCollectionInterface $points, int $nClusters, ?int $maxIter = null diff --git a/src/Interfaces/AlgorithmInterface.php b/src/Interfaces/AlgorithmInterface.php index ddb00bd..793c3bc 100644 --- a/src/Interfaces/AlgorithmInterface.php +++ b/src/Interfaces/AlgorithmInterface.php @@ -4,7 +4,7 @@ interface AlgorithmInterface { - public function clusterize(PointCollectionInterface $points, int $nbClusters): ClusterCollectionInterface; + public function fit(PointCollectionInterface $points, int $nbClusters): ClusterCollectionInterface; public function getDistanceBetween(PointInterface $pointA, PointInterface $pointB): float; diff --git a/tests/Unit/AlgorithmTest.php b/tests/Unit/AlgorithmTest.php index 758f361..37f3a9b 100644 --- a/tests/Unit/AlgorithmTest.php +++ b/tests/Unit/AlgorithmTest.php @@ -18,7 +18,7 @@ * @covers \Kmeans\Algorithm * @uses \Kmeans\Cluster * @uses \Kmeans\ClusterCollection - * @phpstan-type ClusterizeScenarioData array{ + * @phpstan-type ScenarioData array{ * space: SpaceInterface, * radius: float, * points: PointCollectionInterface, @@ -46,9 +46,9 @@ public function tearDown(): void abstract protected function makeAlgorithm(InitializationSchemeInterface $initScheme): AlgorithmInterface; /** - * @return array + * @return array */ - abstract public function clusterizeDataProvider(): array; + abstract public function fitDataProvider(): array; /** * @param array $center @@ -60,9 +60,9 @@ abstract protected function random(array $center, float $radius): array; // tests /** - * @dataProvider clusterizeDataProvider + * @dataProvider fitDataProvider */ - public function testClusterize( + public function testFit( SpaceInterface $space, float $radius, PointCollectionInterface $points, @@ -76,7 +76,7 @@ public function testClusterize( ); $result = iterator_to_array( - $algorithm->clusterize($points, count($expectedCentroids)) + $algorithm->fit($points, count($expectedCentroids)) ); foreach ($expectedCentroids as $i => $expectedCentroid) { @@ -105,9 +105,9 @@ public function testClusterize( /** * @param array> $centers - * @return ClusterizeScenarioData + * @return ScenarioData */ - protected function makeClusterizeScenarioData( + protected function makeScenarioData( SpaceInterface $space, array $centers, float $radius, diff --git a/tests/Unit/Euclidean/AlgorithmTest.php b/tests/Unit/Euclidean/AlgorithmTest.php index 4c50155..023480e 100644 --- a/tests/Unit/Euclidean/AlgorithmTest.php +++ b/tests/Unit/Euclidean/AlgorithmTest.php @@ -27,12 +27,12 @@ * @uses \Kmeans\Math * @uses \Kmeans\PointCollection * @uses \Kmeans\RandomInitialization - * @phpstan-import-type ClusterizeScenarioData from BaseAlgorithmTest + * @phpstan-import-type ScenarioData from BaseAlgorithmTest */ class AlgorithmTest extends BaseAlgorithmTest { /** - * @dataProvider clusterizeDataProvider + * @dataProvider fitDataProvider */ public function testIterationCallback( SpaceInterface $space, @@ -53,7 +53,7 @@ public function testIterationCallback( $called = true; }); - $algorithm->clusterize($points, count($expectedCentroids)); + $algorithm->fit($points, count($expectedCentroids)); $this->assertTrue($called); } @@ -65,12 +65,12 @@ protected function makeAlgorithm( } /** - * @return array + * @return array */ - public function clusterizeDataProvider(): array + public function fitDataProvider(): array { return [ - '1D' => $this->makeClusterizeScenarioData( + '1D' => $this->makeScenarioData( new Space(1), [ [-100], @@ -80,7 +80,7 @@ public function clusterizeDataProvider(): array 2, // radius 10, // points per clusters ), - '2D' => $this->makeClusterizeScenarioData( + '2D' => $this->makeScenarioData( new Space(2), [ [-100, -100], @@ -90,7 +90,7 @@ public function clusterizeDataProvider(): array 2, // radius 10, // points per clusters ), - '3D' => $this->makeClusterizeScenarioData( + '3D' => $this->makeScenarioData( new Space(3), [ [-100, -100, -100], @@ -171,7 +171,7 @@ protected function iterate(ClusterCollectionInterface $clusters): bool array_map([$space, 'makePoint'], [[1],[2],[3]]) ); - $algorithm->clusterize($points, 3, 300); + $algorithm->fit($points, 3, 300); $this->assertEquals( 300, @@ -185,6 +185,6 @@ public function testMaxIterationsException(): void $this->expectExceptionMessageMatches('/^Invalid maximum number of iterations/'); $algorithm = new Algorithm(new RandomInitialization()); - $algorithm->clusterize(new PointCollection(new Space(1), []), 3, 0); + $algorithm->fit(new PointCollection(new Space(1), []), 3, 0); } } diff --git a/tests/Unit/Gps/AlgorithmTest.php b/tests/Unit/Gps/AlgorithmTest.php index 9583650..26aef51 100644 --- a/tests/Unit/Gps/AlgorithmTest.php +++ b/tests/Unit/Gps/AlgorithmTest.php @@ -27,7 +27,7 @@ * @uses \Kmeans\Gps\Space * @uses \Kmeans\Math * @uses \Kmeans\PointCollection - * @phpstan-import-type ClusterizeScenarioData from BaseAlgorithmTest + * @phpstan-import-type ScenarioData from BaseAlgorithmTest */ class AlgorithmTest extends BaseAlgorithmTest { @@ -38,12 +38,12 @@ protected function makeAlgorithm( } /** - * @return array + * @return array */ - public function clusterizeDataProvider(): array + public function fitDataProvider(): array { return [ - 'French cities' => $this->makeClusterizeScenarioData( + 'French cities' => $this->makeScenarioData( new Space(), [ [48.85889, 2.32004], // Paris