Skip to content

Commit 426312a

Browse files
authored
Merge branch 'development' into add-contribution-guides
2 parents 4eb747a + 764309e commit 426312a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+4947
-176
lines changed

.github/workflows/ci.yml

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,24 @@ name: CI
22

33
on:
44
push:
5-
branches: [ main, development ]
5+
branches: [main, development]
66
pull_request:
7-
branches: [ development ]
7+
branches: [development]
88

99
jobs:
1010
tests:
1111
runs-on: "${{ matrix.platform.os }}-latest"
1212
strategy:
1313
matrix:
14-
platform: [
15-
{ os: "windows", target: "x86_64-pc-windows-msvc" },
16-
{ os: "windows", target: "i686-pc-windows-msvc" },
17-
{ os: "ubuntu", target: "x86_64-unknown-linux-gnu" },
18-
{ os: "ubuntu", target: "i686-unknown-linux-gnu" },
19-
{ os: "ubuntu", target: "wasm32-unknown-unknown" },
20-
{ os: "macos", target: "aarch64-apple-darwin" },
21-
]
14+
platform:
15+
[
16+
{ os: "windows", target: "x86_64-pc-windows-msvc" },
17+
{ os: "windows", target: "i686-pc-windows-msvc" },
18+
{ os: "ubuntu", target: "x86_64-unknown-linux-gnu" },
19+
{ os: "ubuntu", target: "i686-unknown-linux-gnu" },
20+
{ os: "ubuntu", target: "wasm32-unknown-unknown" },
21+
{ os: "macos", target: "aarch64-apple-darwin" },
22+
]
2223
env:
2324
TZ: "/usr/share/zoneinfo/your/location"
2425
steps:
@@ -40,7 +41,7 @@ jobs:
4041
default: true
4142
- name: Install test runner for wasm
4243
if: matrix.platform.target == 'wasm32-unknown-unknown'
43-
run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
44+
run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
4445
- name: Stable Build
4546
uses: actions-rs/cargo@v1
4647
with:

CHANGELOG.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
66

77
## [Unreleased]
88

9+
## Added
10+
- Seeds to multiple algorithims that depend on random number generation.
11+
- Added feature `js` to use WASM in browser
12+
13+
## BREAKING CHANGE
14+
- Added a new parameter to `train_test_split` to define the seed.
15+
16+
## [0.2.1] - 2022-05-10
17+
918
## Added
1019
- L2 regularization penalty to the Logistic Regression
1120
- Getters for the naive bayes structs

Cargo.toml

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,22 +16,29 @@ categories = ["science"]
1616
default = ["datasets"]
1717
ndarray-bindings = ["ndarray"]
1818
nalgebra-bindings = ["nalgebra"]
19-
datasets = []
19+
datasets = ["rand_distr", "std"]
20+
fp_bench = ["itertools"]
21+
std = ["rand/std", "rand/std_rng"]
22+
# wasm32 only
23+
js = ["getrandom/js"]
2024

2125
[dependencies]
2226
ndarray = { version = "0.15", optional = true }
23-
nalgebra = { version = "0.23.0", optional = true }
24-
num-traits = "0.2.12"
25-
num = "0.4.0"
26-
rand = "0.8.3"
27-
rand_distr = "0.4.0"
28-
serde = { version = "1.0.115", features = ["derive"], optional = true }
27+
nalgebra = { version = "0.31", optional = true }
28+
num-traits = "0.2"
29+
num = "0.4"
30+
rand = { version = "0.8", default-features = false, features = ["small_rng"] }
31+
rand_distr = { version = "0.4", optional = true }
32+
serde = { version = "1", features = ["derive"], optional = true }
33+
itertools = { version = "0.10.3", optional = true }
34+
cfg-if = "1.0.0"
2935

3036
[target.'cfg(target_arch = "wasm32")'.dependencies]
31-
getrandom = { version = "0.2", features = ["js"] }
37+
getrandom = { version = "0.2", optional = true }
3238

3339
[dev-dependencies]
34-
criterion = "0.3"
40+
smartcore = { path = ".", features = ["fp_bench"] }
41+
criterion = { version = "0.4", default-features = false }
3542
serde_json = "1.0"
3643
bincode = "1.3.1"
3744

@@ -46,3 +53,8 @@ harness = false
4653
name = "naive_bayes"
4754
harness = false
4855
required-features = ["ndarray-bindings", "nalgebra-bindings"]
56+
57+
[[bench]]
58+
name = "fastpair"
59+
harness = false
60+
required-features = ["fp_bench"]

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@
186186
same "printed page" as the copyright notice for easier
187187
identification within third-party archives.
188188

189-
Copyright [yyyy] [name of copyright owner]
189+
Copyright 2019-present at SmartCore developers (smartcorelib.org)
190190

191191
Licensed under the Apache License, Version 2.0 (the "License");
192192
you may not use this file except in compliance with the License.

benches/fastpair.rs

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
2+
3+
// to run this bench you have to change the declaraion in mod.rs ---> pub mod fastpair;
4+
use smartcore::algorithm::neighbour::fastpair::FastPair;
5+
use smartcore::linalg::naive::dense_matrix::*;
6+
use std::time::Duration;
7+
8+
fn closest_pair_bench(n: usize, m: usize) -> () {
9+
let x = DenseMatrix::<f64>::rand(n, m);
10+
let fastpair = FastPair::new(&x);
11+
let result = fastpair.unwrap();
12+
13+
result.closest_pair();
14+
}
15+
16+
fn closest_pair_brute_bench(n: usize, m: usize) -> () {
17+
let x = DenseMatrix::<f64>::rand(n, m);
18+
let fastpair = FastPair::new(&x);
19+
let result = fastpair.unwrap();
20+
21+
result.closest_pair_brute();
22+
}
23+
24+
fn bench_fastpair(c: &mut Criterion) {
25+
let mut group = c.benchmark_group("FastPair");
26+
27+
// with full samples size (100) the test will take too long
28+
group.significance_level(0.1).sample_size(30);
29+
// increase from default 5.0 secs
30+
group.measurement_time(Duration::from_secs(60));
31+
32+
for n_samples in [100_usize, 1000_usize].iter() {
33+
for n_features in [10_usize, 100_usize, 1000_usize].iter() {
34+
group.bench_with_input(
35+
BenchmarkId::from_parameter(format!(
36+
"fastpair --- n_samples: {}, n_features: {}",
37+
n_samples, n_features
38+
)),
39+
n_samples,
40+
|b, _| b.iter(|| closest_pair_bench(*n_samples, *n_features)),
41+
);
42+
group.bench_with_input(
43+
BenchmarkId::from_parameter(format!(
44+
"brute --- n_samples: {}, n_features: {}",
45+
n_samples, n_features
46+
)),
47+
n_samples,
48+
|b, _| b.iter(|| closest_pair_brute_bench(*n_samples, *n_features)),
49+
);
50+
}
51+
}
52+
group.finish();
53+
}
54+
55+
criterion_group!(benches, bench_fastpair);
56+
criterion_main!(benches);

src/algorithm/neighbour/bbd_tree.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ impl<T: RealNumber> BBDTree<T> {
5959
tree
6060
}
6161

62-
pub(in crate) fn clustering(
62+
pub(crate) fn clustering(
6363
&self,
6464
centroids: &[Vec<T>],
6565
sums: &mut Vec<Vec<T>>,
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
//!
2+
//! Dissimilarities for vector-vector distance
3+
//!
4+
//! Representing distances as pairwise dissimilarities, so to build a
5+
//! graph of closest neighbours. This representation can be reused for
6+
//! different implementations (initially used in this library for FastPair).
7+
use std::cmp::{Eq, Ordering, PartialOrd};
8+
9+
#[cfg(feature = "serde")]
10+
use serde::{Deserialize, Serialize};
11+
12+
use crate::math::num::RealNumber;
13+
14+
///
15+
/// The edge of the subgraph is defined by `PairwiseDistance`.
16+
/// The calling algorithm can store a list of distsances as
17+
/// a list of these structures.
18+
///
19+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
20+
#[derive(Debug, Clone, Copy)]
21+
pub struct PairwiseDistance<T: RealNumber> {
22+
/// index of the vector in the original `Matrix` or list
23+
pub node: usize,
24+
25+
/// index of the closest neighbor in the original `Matrix` or same list
26+
pub neighbour: Option<usize>,
27+
28+
/// measure of distance, according to the algorithm distance function
29+
/// if the distance is None, the edge has value "infinite" or max distance
30+
/// each algorithm has to match
31+
pub distance: Option<T>,
32+
}
33+
34+
impl<T: RealNumber> Eq for PairwiseDistance<T> {}
35+
36+
impl<T: RealNumber> PartialEq for PairwiseDistance<T> {
37+
fn eq(&self, other: &Self) -> bool {
38+
self.node == other.node
39+
&& self.neighbour == other.neighbour
40+
&& self.distance == other.distance
41+
}
42+
}
43+
44+
impl<T: RealNumber> PartialOrd for PairwiseDistance<T> {
45+
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
46+
self.distance.partial_cmp(&other.distance)
47+
}
48+
}

0 commit comments

Comments
 (0)