diff --git a/CMakeLists.txt b/CMakeLists.txt index 9fd7e80..9cdf9b5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,6 +19,8 @@ target_link_options(gp_image_test PRIVATE -Wall -Wextra -Werror -Wpedantic -Wno- target_link_libraries(gp_image_test BLT_WITH_GRAPHICS) +target_link_libraries(gp_image_test ${CMAKE_SOURCE_DIR}/extern/bindings/target/release/libbindings.so) + if (${ENABLE_ADDRSAN} MATCHES ON) target_compile_options(gp_image_test PRIVATE -fsanitize=address) target_link_options(gp_image_test PRIVATE -fsanitize=address) diff --git a/extern/bindings/.gitignore b/extern/bindings/.gitignore new file mode 100644 index 0000000..2f7896d --- /dev/null +++ b/extern/bindings/.gitignore @@ -0,0 +1 @@ +target/ diff --git a/extern/bindings/Cargo.lock b/extern/bindings/Cargo.lock new file mode 100644 index 0000000..9a299fc --- /dev/null +++ b/extern/bindings/Cargo.lock @@ -0,0 +1,257 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "approx" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f2a05fd1bd10b2527e20a2cd32d8873d115b8b39fe219ee25f42a8aca6ba278" +dependencies = [ + "num-traits", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bindings" +version = "0.1.0" +dependencies = [ + "stattest", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.152" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" + +[[package]] +name = "libm" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" + +[[package]] +name = "matrixmultiply" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "916806ba0031cd542105d916a97c8572e1fa6dd79c9c51e7eb43a09ec2dd84c1" +dependencies = [ + "rawpointer", +] + +[[package]] +name = "nalgebra" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9bd52e9c9922d5702d5c2a105ceacc3c1206e781d3d130f4d2b7ad5f43c144b" +dependencies = [ + "approx", + "generic-array", + "matrixmultiply", + "num-complex", + "num-rational", + "num-traits", + "rand", + "rand_distr", + "simba", + "typenum", +] + +[[package]] +name = "num-complex" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "747d632c0c558b87dbabbe6a82f3b4ae03720d0646ac5b7b4dae89394be5f2c5" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12ac428b1cb17fce6f731001d307d351ec70a6d202fc2e60f7d4c5e42d8f4f07" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "paste" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "rand" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +dependencies = [ + "getrandom", + "libc", + "rand_chacha", + "rand_core", + "rand_hc", +] + +[[package]] +name = "rand_chacha" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rand_distr" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9e9532ada3929fb8b2e9dbe28d1e06c9b2cc65813f074fcb6bd5fbefeff9d56" +dependencies = [ + "num-traits", + "rand", +] + +[[package]] +name = "rand_hc" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +dependencies = [ + "rand_core", +] + +[[package]] +name = "rawpointer" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" + +[[package]] +name = "simba" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17bfe642b1728a6e89137ad428ef5d4738eca4efaba9590f9e110b8944028621" +dependencies = [ + "approx", + "num-complex", + "num-traits", + "paste", +] + +[[package]] +name = "statrs" +version = "0.13.0" +source = "git+https://github.com/larsgw/statrs?branch=patch-1#79f638351f70590ec44a1b57e660b1dcb009ac33" +dependencies = [ + "approx", + "lazy_static", + "nalgebra", + "num-traits", + "rand", +] + +[[package]] +name = "stattest" +version = "0.0.0" +source = "git+https://github.com/Tri11Paragon/stattest#d05d11f1515278904c1be74a76311e9c4550eeae" +dependencies = [ + "rand", + "statrs", +] + +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "wasi" +version = "0.9.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" diff --git a/extern/bindings/Cargo.toml b/extern/bindings/Cargo.toml new file mode 100644 index 0000000..a1e2564 --- /dev/null +++ b/extern/bindings/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "bindings" +version = "0.1.0" +edition = "2021" + +[lib] +crate-type = ["cdylib"] +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +stattest = { git = "https://github.com/Tri11Paragon/stattest" } diff --git a/extern/bindings/src/lib.rs b/extern/bindings/src/lib.rs new file mode 100644 index 0000000..b38c55f --- /dev/null +++ b/extern/bindings/src/lib.rs @@ -0,0 +1,6 @@ +#![crate_type="cdylib"] + +#[no_mangle] +pub extern "C" fn test() { + println!("Hello"); +} diff --git a/extern/stattest/.github/workflows/rust.yml b/extern/stattest/.github/workflows/rust.yml new file mode 100644 index 0000000..7ae98f3 --- /dev/null +++ b/extern/stattest/.github/workflows/rust.yml @@ -0,0 +1,22 @@ +name: Rust + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +env: + CARGO_TERM_COLOR: always + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Build + run: cargo build --verbose + - name: Run tests + run: cargo test --verbose diff --git a/extern/stattest/.gitignore b/extern/stattest/.gitignore new file mode 100644 index 0000000..a13b9ba --- /dev/null +++ b/extern/stattest/.gitignore @@ -0,0 +1,3 @@ +/dev +/target +Cargo.lock diff --git a/extern/stattest/Cargo.toml b/extern/stattest/Cargo.toml new file mode 100644 index 0000000..a2c8dd9 --- /dev/null +++ b/extern/stattest/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "stattest" +version = "0.0.0" +authors = ["Lars Willighagen "] +edition = "2018" + +[lib] +doctest = false + +[dependencies] +statrs = { git = "https://github.com/larsgw/statrs", branch = "patch-1" } +rand = "0.7.3" diff --git a/extern/stattest/LICENSE b/extern/stattest/LICENSE new file mode 100644 index 0000000..5926a00 --- /dev/null +++ b/extern/stattest/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021 Lars Willighagen + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/extern/stattest/README.md b/extern/stattest/README.md new file mode 100644 index 0000000..3b53a4b --- /dev/null +++ b/extern/stattest/README.md @@ -0,0 +1,50 @@ +# Stat-Test + +This crate provides statistical tests and relevant utilities. + +--- + +## Status + +Currently a number of tests are implemented for comparing the means of two +independent samples, and for checking assumptions regarding the former tests. +However, these tests are all two-sample tests that do not yet support specifying +alternative hypotheses. + +### Comparison of independent means + + - **Student's t-test** + `stattest::test::StudentsTTest` + *Assumptions:* normality, homogeneity of variances + + - **Welch's t-test** + `stattest::test::WelchsTTest` + *Assumptions:* normality + + - **Mann-Whitney U-test/Wilcoxon rank-sum test** + `stattest::test::MannWhitneyUTest` + *Assumptions:* – + +### Comparison of paired observations + + - **Student's t-test** + `stattest::test::StudentsTTest` + *Assumptions:* normality, homogeneity of variances + + - **Wilcoxon signed rank test** + `stattest::test::WilcoxonWTest` + *Assumptions:* – + +### Assumption tests + + - **Levene's test** + `stattest::test::LevenesTest` + *Tests:* homogeneity of variances + + - **F-test of equality of variances** + `stattest::test::FTest` + *Tests:* homogeneity of variances + + - **Shapiro-Wilk test** + `stattest::test::ShapiroWilkTest` + *Tests:* normality diff --git a/extern/stattest/src/distribution/mod.rs b/extern/stattest/src/distribution/mod.rs new file mode 100644 index 0000000..4ab736e --- /dev/null +++ b/extern/stattest/src/distribution/mod.rs @@ -0,0 +1,7 @@ +//! Defines interfaces for creating and approximating statistical distributions. + +pub use self::shapiro_wilk::*; +pub use self::signed_rank::*; + +mod shapiro_wilk; +mod signed_rank; diff --git a/extern/stattest/src/distribution/rank_sum.rs b/extern/stattest/src/distribution/rank_sum.rs new file mode 100644 index 0000000..e69de29 diff --git a/extern/stattest/src/distribution/shapiro_wilk.rs b/extern/stattest/src/distribution/shapiro_wilk.rs new file mode 100644 index 0000000..b4473ca --- /dev/null +++ b/extern/stattest/src/distribution/shapiro_wilk.rs @@ -0,0 +1,139 @@ +use rand::Rng; +use statrs::distribution::{Continuous, ContinuousCDF, Normal}; +use statrs::function::evaluate::polynomial; +use statrs::statistics::*; +use statrs::Result; + +// Polynomials used to calculate the mean and standard deviation +// of a normal distribution that approximates the distribution of +// the W statistic of the Shapiro-Wilk test (Royston, 1992). +static C3: [f64; 4] = [0.5440, -0.39978, 0.025054, -0.0006714]; +static C4: [f64; 4] = [1.3822, -0.77857, 0.062767, -0.0020322]; +static C5: [f64; 4] = [-1.5861, -0.31082, -0.083751, 0.0038915]; +static C6: [f64; 3] = [-0.4803, -0.082676, 0.0030302]; + +/// Implements an approximation of the distribution of the W +/// statistic of the [Shapiro-Wilk test](https://en.wikipedia.org/wiki/Shapiro%E2%80%93Wilk_test). +/// +/// # References +/// +/// Royston, P. (1992). Approximating the Shapiro-Wilk W-test for non-normality. +/// Statistics and Computing, 2(3), 117–119. +/// +/// Shapiro, S. S., & Wilk, M. B. (1965). An analysis of variance test for normality +/// (complete samples)†. Biometrika, 52(3–4), 591–611. +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct ShapiroWilk { + normal: Normal, + n: usize, + mean: f64, + std_dev: f64, +} + +impl ShapiroWilk { + /// Create a new approximation for the distribution of W for a given sample size `n`. + /// + /// # Examples + /// + /// ``` + /// use stattest::distribution::ShapiroWilk; + /// + /// let result = ShapiroWilk::new(25); + /// assert!(result.is_ok()); + /// ``` + pub fn new(n: usize) -> Result { + let float_n = n as f64; + + let (mean, std_dev) = if n <= 11 { + (polynomial(float_n, &C3), polynomial(float_n, &C4).exp()) + } else { + let log_n = float_n.ln(); + (polynomial(log_n, &C5), polynomial(log_n, &C6).exp()) + }; + + let result = Normal::new(mean, std_dev); + match result { + Ok(normal) => Ok(ShapiroWilk { + normal, + n, + mean, + std_dev, + }), + Err(error) => Err(error), + } + } + + /// Return the sample size associated with this distribution. + pub fn n(&self) -> usize { + self.n + } +} + +impl ::rand::distributions::Distribution for ShapiroWilk { + fn sample(&self, r: &mut R) -> f64 { + ::rand::distributions::Distribution::sample(&self.normal, r) + } +} + +impl ContinuousCDF for ShapiroWilk { + fn cdf(&self, x: f64) -> f64 { + self.normal.cdf(x) + } + + fn inverse_cdf(&self, x: f64) -> f64 { + self.normal.inverse_cdf(x) + } +} + +impl Min for ShapiroWilk { + fn min(&self) -> f64 { + self.normal.min() + } +} + +impl Max for ShapiroWilk { + fn max(&self) -> f64 { + self.normal.max() + } +} + +impl Distribution for ShapiroWilk { + fn mean(&self) -> Option { + self.normal.mean() + } + fn variance(&self) -> Option { + self.normal.variance() + } + fn entropy(&self) -> Option { + self.normal.entropy() + } + fn skewness(&self) -> Option { + self.normal.skewness() + } +} + +impl Mode> for ShapiroWilk { + fn mode(&self) -> Option { + self.normal.mode() + } +} + +impl Continuous for ShapiroWilk { + fn pdf(&self, x: f64) -> f64 { + self.normal.pdf(x) + } + + fn ln_pdf(&self, x: f64) -> f64 { + self.normal.ln_pdf(x) + } +} + +#[cfg(test)] +mod tests { + #[test] + fn n_25() { + let distribution = super::ShapiroWilk::new(25).unwrap(); + assert_eq!(distribution.mean, -3.3245620722111475); + assert_eq!(distribution.std_dev, 0.4891787150186814); + } +} diff --git a/extern/stattest/src/distribution/signed_rank.rs b/extern/stattest/src/distribution/signed_rank.rs new file mode 100644 index 0000000..a846b73 --- /dev/null +++ b/extern/stattest/src/distribution/signed_rank.rs @@ -0,0 +1,313 @@ +use rand::Rng; +use statrs::distribution::{Continuous, ContinuousCDF, Normal}; +use statrs::function::factorial::binomial; +use statrs::statistics::*; +use statrs::Result; + +/// Implements an approximation of the distribution of the W +/// statistic of the [Wilcoxon signed rank test](https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test). +/// +/// # References +/// +/// Wilcoxon, F. (1945). Individual Comparisons by Ranking Methods. +/// Biometrics Bulletin, 1(6), 80–83. +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct SignedRank { + approximation: Approximation, + n: usize, +} + +#[derive(Debug, Copy, Clone, PartialEq)] +enum Approximation { + Normal(Normal), + Exact, +} + +impl SignedRank { + /// Determine whether to use an approximation or an exact distribution of W for a given sample size `n` + /// and given a number of paired observations with a non-zero difference `non_zero`. + /// + /// # Examples + /// + /// ``` + /// use stattest::distribution::SignedRank; + /// + /// let result = SignedRank::new(25, 25); + /// assert!(result.is_ok()); + /// ``` + pub fn new(n: usize, zeroes: usize, tie_correction: usize) -> Result { + if zeroes > 0 || tie_correction > 0 || (n - zeroes) >= 20 { + SignedRank::approximate(n - zeroes, tie_correction) + } else { + SignedRank::exact(n) + } + } + + /// Create a new approximation for the distribution of W for a given sample size `n` + /// and given a number of paired observations with a non-zero difference `non_zero`. + /// + /// # Examples + /// + /// ``` + /// use stattest::distribution::SignedRank; + /// + /// let result = SignedRank::approximate(25, 25); + /// assert!(result.is_ok()); + /// ``` + pub fn approximate(n: usize, tie_correction: usize) -> Result { + let mean = (n * (n + 1)) as f64 / 4.0; + let var = mean * (2 * n + 1) as f64 / 6.0 - tie_correction as f64 / 48.0; + let normal = Normal::new(mean, var.sqrt())?; + + Ok(SignedRank { + n, + approximation: Approximation::Normal(normal), + }) + } + + /// Create a new exact distribution of W for a given sample size `n`. + /// + /// # Examples + /// + /// ``` + /// use stattest::distribution::SignedRank; + /// + /// let result = SignedRank::exact(25); + /// assert!(result.is_ok()); + /// ``` + pub fn exact(n: usize) -> Result { + Ok(SignedRank { + n, + approximation: Approximation::Exact, + }) + } +} + +fn partitions(number: usize, parts: usize, size: usize) -> usize { + if number == parts { + 1 + } else if number == 0 || parts == 0 || size == 0 { + 0 + } else { + (1..=std::cmp::min(number, size)) + .map(|highest_part| partitions(number - highest_part, parts - 1, highest_part)) + .sum() + } +} + +impl ::rand::distributions::Distribution for SignedRank { + fn sample(&self, r: &mut R) -> f64 { + match self.approximation { + Approximation::Normal(normal) => { + ::rand::distributions::Distribution::sample(&normal, r) + } + Approximation::Exact => { + todo!(); + } + } + } +} + +impl ContinuousCDF for SignedRank { + fn cdf(&self, x: f64) -> f64 { + match self.approximation { + Approximation::Normal(normal) => 2.0 * normal.cdf(x), + Approximation::Exact => { + let r = x.round() as usize; + let mut sum = 1; + + for n in 1..=self.n { + let n_choose_2 = binomial(n as u64, 2) as usize; + if n_choose_2 > r { + continue; + } + for i in n..=(r - n_choose_2) { + sum += partitions(i, n, self.n - n + 1); + } + } + + sum as f64 / 2_f64.powi(self.n as i32 - 1) + } + } + } + + fn inverse_cdf(&self, x: f64) -> f64 { + match self.approximation { + Approximation::Normal(normal) => normal.inverse_cdf(x), + Approximation::Exact => { + todo!(); + } + } + } +} + +impl Min for SignedRank { + fn min(&self) -> f64 { + 0.0 + } +} + +impl Max for SignedRank { + fn max(&self) -> f64 { + (self.n * (self.n + 1) / 2) as f64 + } +} + +impl Distribution for SignedRank { + fn mean(&self) -> Option { + match self.approximation { + Approximation::Normal(normal) => normal.mean(), + Approximation::Exact => Some((self.n * (self.n + 1)) as f64 / 4.0), + } + } + + fn variance(&self) -> Option { + match self.approximation { + Approximation::Normal(normal) => normal.variance(), + Approximation::Exact => Some((self.n * (self.n + 1) * (2 * self.n + 1)) as f64 / 24.0), + } + } + + fn entropy(&self) -> Option { + match self.approximation { + Approximation::Normal(normal) => normal.entropy(), + Approximation::Exact => None, + } + } + + fn skewness(&self) -> Option { + match self.approximation { + Approximation::Normal(normal) => normal.skewness(), + Approximation::Exact => Some(0.0), + } + } +} + +impl Median for SignedRank { + fn median(&self) -> f64 { + match self.approximation { + Approximation::Normal(normal) => normal.median(), + Approximation::Exact => self.mean().unwrap(), + } + } +} + +impl Mode> for SignedRank { + fn mode(&self) -> Option { + match self.approximation { + Approximation::Normal(normal) => normal.mode(), + Approximation::Exact => self.mean(), + } + } +} + +impl Continuous for SignedRank { + fn pdf(&self, x: f64) -> f64 { + match self.approximation { + Approximation::Normal(normal) => normal.pdf(x), + Approximation::Exact => { + let r = x.round() as usize; + let mut sum = 0; + + for n in 1..=self.n { + let n_choose_2 = binomial(n as u64, 2) as usize; + if n_choose_2 > r { + break; + } + sum += partitions(r - n_choose_2, n, self.n - n + 1); + } + + sum as f64 / 2_f64.powi(self.n as i32) + } + } + } + + fn ln_pdf(&self, x: f64) -> f64 { + match self.approximation { + Approximation::Normal(normal) => normal.ln_pdf(x), + Approximation::Exact => self.pdf(x).ln(), + } + } +} + +#[cfg(test)] +mod tests { + use statrs::distribution::{Continuous, ContinuousCDF}; + use statrs::statistics::Distribution; + + #[test] + fn n_20() { + let distribution = super::SignedRank::new(20, 0, 0).unwrap(); + assert_eq!(distribution.mean(), Some(105.0)); + assert_eq!(distribution.std_dev(), Some(26.78619047195775)); + assert_eq!(distribution.cdf(50.0), 0.04004379807046464); + } + + #[test] + fn n_30() { + let distribution = super::SignedRank::new(30, 0, 0).unwrap(); + assert_eq!(distribution.mean(), Some(232.5)); + assert_eq!(distribution.std_dev(), Some(48.61841215013094)); + assert_eq!(distribution.cdf(150.0), 0.08971783777126728); + } + + #[test] + fn n_20_exact() { + let distribution = super::SignedRank::exact(20).unwrap(); + assert_eq!(distribution.cdf(50.0), 0.039989471435546875); + } + + #[test] + fn n_11() { + let distribution = super::SignedRank::exact(11).unwrap(); + assert_eq!(distribution.cdf(11.0), 0.0537109375); + assert_eq!(distribution.cdf(7.0), 0.0185546875); + assert_eq!(distribution.cdf(5.0), 0.009765625); + } + + #[test] + fn n_10() { + let distribution = super::SignedRank::exact(10).unwrap(); + assert_eq!(distribution.cdf(8.0), 0.048828125); + assert_eq!(distribution.cdf(5.0), 0.01953125); + assert_eq!(distribution.cdf(3.0), 0.009765625); + } + + #[test] + fn n_9() { + let distribution = super::SignedRank::exact(9).unwrap(); + assert_eq!(distribution.cdf(6.0), 0.0546875); + assert_eq!(distribution.cdf(3.0), 0.01953125); + assert_eq!(distribution.cdf(2.0), 0.01171875); + } + + #[test] + fn n_8() { + let distribution = super::SignedRank::exact(8).unwrap(); + assert_eq!(distribution.cdf(4.0), 0.0546875); + assert_eq!(distribution.cdf(3.0), 0.0390625); + assert_eq!(distribution.cdf(2.5), 0.0390625); + assert_eq!(distribution.cdf(2.0), 0.0234375); + } + + #[test] + fn n_8_pdf() { + let distribution = super::SignedRank::exact(8).unwrap(); + assert_eq!(distribution.pdf(4.0), 0.0078125); + assert_eq!(distribution.pdf(3.0), 0.0078125); + assert_eq!(distribution.pdf(2.0), 0.00390625); + } + + #[test] + fn n_8_ties() { + let distribution = super::SignedRank::new(8, 0, 7).unwrap(); + assert_eq!(distribution.cdf(3.0), 0.03542823032427003); + assert_eq!(distribution.cdf(2.5), 0.029739401378297385); + assert_eq!(distribution.cdf(2.0), 0.024854396634115632); + } + + #[test] + fn partition() { + assert_eq!(super::partitions(7, 3, 5), 4); + } +} diff --git a/extern/stattest/src/lib.rs b/extern/stattest/src/lib.rs new file mode 100644 index 0000000..a7cf23d --- /dev/null +++ b/extern/stattest/src/lib.rs @@ -0,0 +1,3 @@ +pub mod distribution; +pub mod statistics; +pub mod test; diff --git a/extern/stattest/src/statistics/iter_statistics_ext.rs b/extern/stattest/src/statistics/iter_statistics_ext.rs new file mode 100644 index 0000000..16cb885 --- /dev/null +++ b/extern/stattest/src/statistics/iter_statistics_ext.rs @@ -0,0 +1,73 @@ +use crate::statistics::StatisticsExt; +use statrs::statistics::Statistics; +use std::borrow::Borrow; +use std::f64; + +impl StatisticsExt for T +where + T: IntoIterator + Clone, + T::Item: Borrow, +{ + fn n(self) -> f64 { + self.into_iter().count() as f64 + } + + fn df(self) -> f64 { + self.n() - 1.0 + } + + fn pooled_variance(self, other: Self) -> f64 { + let df_x = self.clone().df(); + let df_y = other.clone().df(); + let var_x = self.variance(); + let var_y = other.variance(); + + (df_x * var_x + df_y * var_y) / (df_x + df_y) + } + + fn pooled_std_dev(self, other: Self) -> f64 { + self.pooled_variance(other).sqrt() + } + + fn variance_ratio(self, other: Self) -> f64 { + self.variance() / other.variance() + } +} + +#[cfg(test)] +mod tests { + #[allow(dead_code)] + fn round(number: f64, digits: Option) -> f64 { + match digits { + Some(digits) => { + let factor = 10_f64.powi(digits); + (number * factor).round() / factor + } + None => number.round(), + } + } + + #[test] + fn pooled_variance() { + let x = vec![ + 134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0, + ]; + let y = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0]; + assert_eq!( + round(super::StatisticsExt::pooled_variance(&x, &y), Some(3)), + 446.118 + ); + } + + #[test] + fn pooled_std_dev() { + let x = vec![ + 134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0, + ]; + let y = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0]; + assert_eq!( + round(super::StatisticsExt::pooled_std_dev(&x, &y), Some(3)), + 21.121 + ); + } +} diff --git a/extern/stattest/src/statistics/mod.rs b/extern/stattest/src/statistics/mod.rs new file mode 100644 index 0000000..acda8d8 --- /dev/null +++ b/extern/stattest/src/statistics/mod.rs @@ -0,0 +1,9 @@ +//! Provides traits for statistical computation. + +pub use self::iter_statistics_ext::*; +pub use self::ranks::*; +pub use self::statistics_ext::*; + +mod iter_statistics_ext; +mod ranks; +mod statistics_ext; diff --git a/extern/stattest/src/statistics/ranks.rs b/extern/stattest/src/statistics/ranks.rs new file mode 100644 index 0000000..0399435 --- /dev/null +++ b/extern/stattest/src/statistics/ranks.rs @@ -0,0 +1,155 @@ +use std::borrow::Borrow; +use std::iter::FusedIterator; + +/// For the ranking of groups of variables. +pub trait Ranks { + /// Returns a vector of ranks. + fn ranks(self) -> (Vec, usize); +} + +impl Ranks for T +where + T: IntoIterator, + T::Item: Borrow, +{ + fn ranks(self) -> (Vec, usize) { + let mut observations: Vec<_> = self.into_iter().map(|x| *x.borrow()).enumerate().collect(); + observations + .sort_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)); + + let (sorted_indices, sorted_values): (Vec<_>, Vec<_>) = observations.into_iter().unzip(); + let groups = DedupWithCount::new(sorted_values.iter()); + + let mut resolved_ties = ResolveTies::new(groups); + let mut ranks = vec![0.0; sorted_values.len()]; + + for (rank, old_index) in (&mut resolved_ties).zip(sorted_indices) { + ranks[old_index] = rank; + } + + (ranks, resolved_ties.tie_correction()) + } +} + +struct ResolveTies +where + I: Iterator, +{ + iter: I, + index: usize, + left: usize, + resolved: Option, + tie_correction: usize, +} + +impl ResolveTies +where + I: Iterator, +{ + fn new(iter: I) -> Self { + ResolveTies { + iter, + index: 0, + left: 0, + resolved: None, + tie_correction: 0, + } + } + + fn tie_correction(&self) -> usize { + self.tie_correction + } +} + +impl Iterator for ResolveTies +where + I: Iterator, +{ + type Item = f64; + + fn next(&mut self) -> Option { + if self.left > 0 { + self.left -= 1; + self.index += 1; + self.resolved + } else { + match self.iter.next() { + Some((count, _)) => { + self.resolved = Some(((1.0 + count as f64) / 2.0) + self.index as f64); + self.left = count - 1; + self.index += 1; + self.tie_correction += count.pow(3) - count; + self.resolved + } + None => None, + } + } + } + + fn size_hint(&self) -> (usize, Option) { + let (low, _high) = self.iter.size_hint(); + (low, None) + } +} + +impl FusedIterator for ResolveTies where I: Iterator {} + +struct DedupWithCount +where + I: Iterator, + I::Item: PartialEq, +{ + iter: I, + peek: Option, +} + +impl DedupWithCount +where + I: Iterator, + I::Item: PartialEq, +{ + fn new(mut iter: I) -> Self { + DedupWithCount { + peek: iter.next(), + iter, + } + } +} + +impl Iterator for DedupWithCount +where + I: Iterator, + I::Item: PartialEq, +{ + type Item = (usize, I::Item); + + fn next(&mut self) -> Option<(usize, I::Item)> { + let mut count = 1; + loop { + if self.peek.is_some() { + let next = self.iter.next(); + + if self.peek == next { + count += 1; + continue; + } + + let value = match next { + Some(value) => self.peek.replace(value).unwrap(), + None => self.peek.take().unwrap(), + }; + + break Some((count, value)); + } else { + break None; + } + } + } + + fn size_hint(&self) -> (usize, Option) { + let (low, high) = self.iter.size_hint(); + (if low == 0 { 0 } else { 1 }, high) + } +} + +impl FusedIterator for DedupWithCount where I::Item: PartialEq {} diff --git a/extern/stattest/src/statistics/statistics_ext.rs b/extern/stattest/src/statistics/statistics_ext.rs new file mode 100644 index 0000000..582d7f0 --- /dev/null +++ b/extern/stattest/src/statistics/statistics_ext.rs @@ -0,0 +1,15 @@ +/// Provides additional statistical utilities, complementing [statrs::statistics::Statistics]. +pub trait StatisticsExt { + /// Returns the amount of observations. + fn n(self) -> T; + /// Returns the degrees of freedom of the data. + fn df(self) -> T; + + /// Returns the pooled variance. + fn pooled_variance(self, other: Self) -> T; + /// Returns the pooled standard deviation. + fn pooled_std_dev(self, other: Self) -> T; + + /// Returns the ratio between two variances. + fn variance_ratio(self, other: Self) -> T; +} diff --git a/extern/stattest/src/test/f.rs b/extern/stattest/src/test/f.rs new file mode 100644 index 0000000..4e0eff0 --- /dev/null +++ b/extern/stattest/src/test/f.rs @@ -0,0 +1,59 @@ +use crate::statistics::StatisticsExt; +use statrs::distribution::{ContinuousCDF, FisherSnedecor}; + +/// Implements the [F-test of equality of variances](https://en.wikipedia.org/wiki/F-test_of_equality_of_variances). +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct FTest { + df: (f64, f64), + estimate: f64, + p_value: f64, +} + +impl FTest { + /// Carry out the F-test of equality of variances on the samples `x` and `y`. + pub fn new(x: &[f64], y: &[f64]) -> statrs::Result { + let f = x.variance_ratio(y); + let df = (x.df(), y.df()); + + let distribution = FisherSnedecor::new(df.0, df.1)?; + let probability = distribution.cdf(f); + let p_value = if f.gt(&1.0) { + 1.0 - probability + } else { + probability + }; + + Ok(FTest { + df, + estimate: f, + p_value, + }) + } +} + +#[cfg(test)] +mod tests { + #[test] + fn f_test() { + let x = vec![ + 134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0, + ]; + let y = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0]; + let result = super::FTest::new(&x, &y).unwrap(); + assert_eq!(result.df, (11.0, 6.0)); + assert_eq!(result.estimate, 1.0755200911940725); + assert_eq!(result.p_value, 0.4893961256182331); + } + + #[test] + fn f_test_reverse() { + let x = vec![ + 134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0, + ]; + let y = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0]; + let result = super::FTest::new(&y, &x).unwrap(); + assert_eq!(result.df, (6.0, 11.0)); + assert_eq!(result.estimate, 0.9297827239003709); + assert_eq!(result.p_value, 0.48939612561823265); + } +} diff --git a/extern/stattest/src/test/levenes.rs b/extern/stattest/src/test/levenes.rs new file mode 100644 index 0000000..b84c689 --- /dev/null +++ b/extern/stattest/src/test/levenes.rs @@ -0,0 +1,64 @@ +use crate::statistics::StatisticsExt; +use statrs::distribution::{ContinuousCDF, FisherSnedecor}; +use statrs::statistics::Statistics; + +/// Implements [Levene's test](https://en.wikipedia.org/wiki/Levene%27s_test) (Brown & Forsythe, 1974). +/// +/// # References +/// +/// Brown, M. B., & Forsythe, A. B. (1974). Robust Tests for the Equality of Variances. +/// Journal of the American Statistical Association, 69(346), 364–367. +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct LevenesTest { + df: f64, + estimate: f64, + p_value: f64, +} + +impl LevenesTest { + /// Run Levene's test on the samples `x` and `y`. + pub fn new(x: &[f64], y: &[f64]) -> statrs::Result { + let n_x = x.n(); + let n_y = y.n(); + let diff_x = x.iter().map(|xi| (xi - x.mean()).abs()); + let diff_y = y.iter().map(|yi| (yi - y.mean()).abs()); + + let mean_diff_x = diff_x.clone().mean(); + let mean_diff_y = diff_y.clone().mean(); + let mean_diff = Iterator::chain(diff_x.clone(), diff_y.clone()).mean(); + + let a: f64 = + n_x * (mean_diff_x - mean_diff).powi(2) + n_y * (mean_diff_y - mean_diff).powi(2); + let b: f64 = Iterator::chain( + diff_x.map(|diff| (diff - mean_diff_x).powi(2)), + diff_y.map(|diff| (diff - mean_diff_y).powi(2)), + ) + .sum(); + + let df = n_x + n_y - 2.0; + let estimate = df * a / b; + let distribution = FisherSnedecor::new(1.0, df)?; + let p_value = 1.0 - distribution.cdf(estimate); + + Ok(LevenesTest { + df, + estimate, + p_value, + }) + } +} + +#[cfg(test)] +mod tests { + #[test] + fn levenes_test() { + let x = vec![ + 134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0, + ]; + let y = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0]; + let result = super::LevenesTest::new(&x, &y).unwrap(); + assert_eq!(result.df, 17.0); + assert_eq!(result.estimate, 0.014721055064513417); + assert_eq!(result.p_value, 0.9048519802923365); + } +} diff --git a/extern/stattest/src/test/mann_whitney_u.rs b/extern/stattest/src/test/mann_whitney_u.rs new file mode 100644 index 0000000..ff46a03 --- /dev/null +++ b/extern/stattest/src/test/mann_whitney_u.rs @@ -0,0 +1,69 @@ +use crate::statistics::*; +use statrs::distribution::{ContinuousCDF, Normal}; + +/// Implements the [Mann-Whitney U test](https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test), +/// also known as the Wilcoxon rank-sum test. +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct MannWhitneyUTest { + estimate: (f64, f64), + effect_size: f64, + p_value: f64, +} + +impl MannWhitneyUTest { + /// Run Mann-Whitney U test/Wilcoxon rank-sum test on samples `x` and `y`. + pub fn independent(x: &[f64], y: &[f64]) -> statrs::Result { + let (ranks, tie_correction) = x.iter().chain(y).ranks(); + let n_x = x.n(); + let n_y = y.n(); + let n_xy = n_x * n_y; + + let estimate = (n_x * (n_x + 1.0)) / 2.0 - ranks[0..x.len()].iter().sum::(); + let estimate_x = n_xy + estimate; + let estimate_y = -estimate; + let estimate_small = if estimate < 0.0 { + estimate_x + } else { + estimate_y + }; + + let n = n_x + n_y; + let distribution_mean = n_xy / 2.0; + let distribution_var = (n_xy * (n + 1.0 - tie_correction as f64 / (n * (n - 1.0)))) / 12.0; + + let normal = Normal::new(distribution_mean, distribution_var.sqrt())?; + let p_value = 2.0 * normal.cdf(estimate_small); + let effect_size = 1.0 - (2.0 * estimate_small) / n_xy; + + Ok(MannWhitneyUTest { + effect_size, + estimate: (estimate_x, estimate_y), + p_value, + }) + } +} + +#[cfg(test)] +mod tests { + #[test] + fn mann_whitney_u() { + let x = vec![ + 134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0, + ]; + let y = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0]; + let test = super::MannWhitneyUTest::independent(&x, &y).unwrap(); + assert_eq!(test.estimate, (21.5, 62.5)); + assert_eq!(test.effect_size, 0.48809523809523814); + assert_eq!(test.p_value, 0.08303763193135497); + } + + #[test] + fn mann_whitney_u_2() { + let x = vec![68.0, 68.0, 59.0, 72.0, 64.0, 67.0, 70.0, 74.0]; + let y = vec![60.0, 67.0, 61.0, 62.0, 67.0, 63.0, 56.0, 58.0]; + let test = super::MannWhitneyUTest::independent(&x, &y).unwrap(); + assert_eq!(test.estimate, (9.0, 55.0)); + assert_eq!(test.effect_size, 0.71875); + assert_eq!(test.p_value, 0.01533316211294691); + } +} diff --git a/extern/stattest/src/test/mod.rs b/extern/stattest/src/test/mod.rs new file mode 100644 index 0000000..cda457c --- /dev/null +++ b/extern/stattest/src/test/mod.rs @@ -0,0 +1,24 @@ +//! Defines frequentist statistical tests. + +pub use self::f::*; +pub use self::levenes::*; +pub use self::mann_whitney_u::*; +pub use self::shapiro_wilk::*; +pub use self::students_t::*; +pub use self::welchs_t::*; +pub use self::wilcoxon_w::*; + +mod f; +mod levenes; +mod mann_whitney_u; +mod shapiro_wilk; +mod students_t; +mod welchs_t; +mod wilcoxon_w; + +/// Alternative hypothesis for comparing two means. +pub enum AlternativeHypothesis { + Greater, + Different, + Less, +} diff --git a/extern/stattest/src/test/shapiro_wilk.rs b/extern/stattest/src/test/shapiro_wilk.rs new file mode 100644 index 0000000..4684fbe --- /dev/null +++ b/extern/stattest/src/test/shapiro_wilk.rs @@ -0,0 +1,263 @@ +use crate::distribution::ShapiroWilk; +use statrs::distribution::{ContinuousCDF, Normal}; +use statrs::function::evaluate::polynomial; +use statrs::statistics::Statistics; +use std::cmp; +use std::f64::consts::{FRAC_1_SQRT_2, FRAC_PI_3}; + +/// Implements the [Shapiro-Wilk test](https://en.wikipedia.org/wiki/Shapiro%E2%80%93Wilk_test) +/// (Shapiro & Wilk, 1965). A simplified port of the algorithm +/// described by Royston (1992, 1995). +/// +/// # References +/// +/// Royston, P. (1992). Approximating the Shapiro-Wilk W-test for non-normality. +/// Statistics and Computing, 2(3), 117–119. +/// +/// Royston, P. (1995). Remark AS R94: A Remark on Algorithm AS 181: +/// The W-test for Normality. Journal of the Royal Statistical Society. +/// Series C (Applied Statistics), 44(4), 547–551. +/// +/// Shapiro, S. S., & Wilk, M. B. (1965). An analysis of variance test for normality +/// (complete samples)†. Biometrika, 52(3–4), 591–611. +#[derive(Debug, PartialEq, Clone)] +pub struct ShapiroWilkTest { + p_value: f64, + estimate: f64, + weights: Vec, + status: ShapiroWilkStatus, +} + +/// Representation of non-fatal `IFAULT` codes (Royston, 1995). +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum ShapiroWilkStatus { + /// `IFAULT = 0` (no fault) + Ok, + /// `IFAULT = 2` (n > 5000) + TooMany, +} + +/// Representation of fatal `IFAULT` codes (Royston, 1995). +/// +/// As for the other codes not listed here or in [ShapiroWilkStatus]: +/// - `IFAULT = 3` (insufficient storage for A) --- A is now allocated within the method +/// - `IFAULT = 4` (censoring while n < 20) --- censoring is not implemented in this port +/// - `IFAULT = 5` (the proportion censored > 0.8) --- censoring is not implemented in this port +/// - `IFAULT = 7` (the data are not in ascending order) --- data are now sorted within the method +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum ShapiroWilkError { + /// `IFAULT = 1` (n < 3) + TooFew, + /// `IFAULT = 6` (the data have zero range) + NoDifference, + /// Should not happen + CannotMakeDistribution, +} + +static SMALL: f64 = 1E-19; // smaller for f64? +static FRAC_6_PI: f64 = 1.90985931710274; // 6/pi + +// Polynomials for estimating weights. +static C1: [f64; 6] = [0.0, 0.221157, -0.147981, -2.07119, 4.434685, -2.706056]; +static C2: [f64; 6] = [0.0, 0.042981, -0.293762, -1.752461, 5.682633, -3.582633]; +// Polynomial for estimating scaling factor. +static G: [f64; 2] = [-2.273, 0.459]; + +impl ShapiroWilkTest { + /// Run the Shapiro-Wilk test on the sample `x`. + pub fn new(x: &[f64]) -> Result { + let n = x.len(); + let mut sorted = x.to_owned(); + sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(cmp::Ordering::Equal)); + + let range = sorted.last().unwrap() - sorted[0]; + + if range.lt(&SMALL) { + return Err(ShapiroWilkError::NoDifference); + } else if n < 3 { + return Err(ShapiroWilkError::TooFew); + } + + let weights = Self::get_weights(n); + let mean = (&sorted).mean(); + + let (denominator, numerator): (f64, f64) = (0..n) + .map(|i| { + let distance = sorted[i] - mean; + (distance * distance, distance * weights[i]) + }) + .fold((0.0, 0.0), |sum, value| (sum.0 + value.0, sum.1 + value.1)); + + // Calculate complement (denominator - numerator) / denominator + // to keep precision when numerator / denominator is close to 1 + let complement = (denominator - numerator.powi(2)) / denominator; + let estimate = 1.0 - complement; + + let status = if n > 5000 { + ShapiroWilkStatus::TooMany + } else { + ShapiroWilkStatus::Ok + }; + let p_value = if n == 3 { + FRAC_6_PI * (estimate.sqrt().asin() - FRAC_PI_3).max(0.0) + } else { + let distribution = match ShapiroWilk::new(n) { + Ok(distribution) => distribution, + Err(_) => return Err(ShapiroWilkError::CannotMakeDistribution), + }; + 1.0 - distribution.cdf(if n <= 11 { + let gamma = polynomial(n as f64, &G); + -(gamma - complement.ln()).ln() + } else { + complement.ln() + }) + }; + + Ok(ShapiroWilkTest { + weights, + status, + estimate, + p_value, + }) + } + + fn get_weights(n: usize) -> Vec { + if n == 3 { + return vec![-FRAC_1_SQRT_2, 0.0, FRAC_1_SQRT_2]; + } + + let normal = Normal::new(0.0, 1.0).unwrap(); + let mut weights = vec![0.0; n]; + + let half = n / 2; + let float_n = n as f64; + let an_25 = float_n + 0.25; + + let mut sum_squared = 0_f64; + for i in 0..half { + weights[i] = normal.inverse_cdf((i as f64 + 0.625) / an_25); + weights[n - i - 1] = -weights[i]; + sum_squared += weights[i].powi(2); + } + sum_squared *= 2.0; + + let root_sum_squared = sum_squared.sqrt(); + let rsn = float_n.sqrt().recip(); + let weight0 = polynomial(rsn, &C1) - weights[0] / root_sum_squared; + + let (weights_set, scale) = if n > 5 { + let weight1 = polynomial(rsn, &C2) - weights[1] / root_sum_squared; + let scale = ((sum_squared - 2.0 * (weights[0].powi(2) + weights[1].powi(2))) + / (1.0 - 2.0 * (weight0.powi(2) + weight1.powi(2)))) + .sqrt(); + weights[1] = -weight1; + weights[n - 2] = weight1; + (2, scale) + } else { + let scale = + ((sum_squared - 2.0 * weights[0].powi(2)) / (1.0 - 2.0 * weight0.powi(2))).sqrt(); + (1, scale) + }; + + weights[0] = -weight0; + weights[n - 1] = weight0; + + for i in weights_set..half { + weights[i] /= scale; + weights[n - i - 1] = -weights[i]; + } + + weights + } +} + +#[cfg(test)] +mod tests { + #[test] + fn shapiro_wilk() { + let x = vec![ + 0.139, 0.157, 0.175, 0.256, 0.344, 0.413, 0.503, 0.577, 0.614, 0.655, 0.954, 1.392, + 1.557, 1.648, 1.690, 1.994, 2.174, 2.206, 3.245, 3.510, 3.571, 4.354, 4.980, 6.084, + 8.351, + ]; + let test = super::ShapiroWilkTest::new(&x).unwrap(); + assert_eq!(test.estimate, 0.8346662753181684); + assert_eq!(test.p_value, 0.0009134904817755807); + } + + #[test] + fn shapiro_wilk_1() { + let x = vec![ + 134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0, + ]; + let test = super::ShapiroWilkTest::new(&x).unwrap(); + assert_eq!(test.estimate, 0.9923657326481632); + assert_eq!(test.p_value, 0.9999699312420669); + } + + #[test] + fn shapiro_wilk_2() { + let x = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0]; + let test = super::ShapiroWilkTest::new(&x).unwrap(); + assert_eq!(test.estimate, 0.9980061683004456); + assert_eq!(test.p_value, 0.9999411393249124); + } + + #[test] + fn large_range() { + let x = vec![ + 0.139E100, 0.157E100, 0.175E100, 0.256E100, 0.344E100, 0.413E100, 0.503E100, 0.577E100, + 0.614E100, 0.655E100, 0.954E100, 1.392E100, 1.557E100, 1.648E100, 1.690E100, 1.994E100, + 2.174E100, 2.206E100, 3.245E100, 3.510E100, 3.571E100, 4.354E100, 4.980E100, 6.084E100, + 8.351E100, + ]; + let test = super::ShapiroWilkTest::new(&x).unwrap(); + assert_eq!(test.estimate, 0.8346662753181684); + assert_eq!(test.p_value, 0.0009134904817755807); + } + + #[test] + fn nearly_normally_distributed() { + let x = vec![ + -0.44, -0.31, -0.25, -0.21, -0.18, -0.15, -0.12, -0.10, -0.08, -0.06, -0.04, -0.02, + 0.0, 0.02, 0.04, 0.06, 0.08, 0.10, 0.12, 0.15, 0.18, 0.21, 0.25, 0.31, 0.44, + ]; + let test = super::ShapiroWilkTest::new(&x).unwrap(); + assert_eq!(test.estimate, 0.9997987717271388); + assert_eq!(test.p_value, 1.0); + } + + #[test] + fn normally_distributed() { + let x = vec![ + -0.4417998157703872, + -0.310841224215176, + -0.2544758389229413, + -0.21509882047762266, + -0.18254731741828356, + -0.1541570107663562, + -0.12852819470327187, + -0.1048199468783084, + -0.08247628697708857, + -0.061100278634889656, + -0.040388574421146996, + -0.020093702456713224, + 0.0, + 0.020093702456713224, + 0.040388574421146996, + 0.061100278634889656, + 0.08247628697708857, + 0.1048199468783084, + 0.12852819470327187, + 0.1541570107663562, + 0.18254731741828356, + 0.21509882047762266, + 0.2544758389229413, + 0.310841224215176, + 0.4417998157703872, + ]; + let test = super::ShapiroWilkTest::new(&x).unwrap(); + assert_eq!(test.estimate, 0.9999999999999999); + assert_eq!(test.p_value, 1.0); + } +} diff --git a/extern/stattest/src/test/students_t.rs b/extern/stattest/src/test/students_t.rs new file mode 100644 index 0000000..9b5e245 --- /dev/null +++ b/extern/stattest/src/test/students_t.rs @@ -0,0 +1,89 @@ +use crate::statistics::StatisticsExt; +use statrs::distribution::{ContinuousCDF, StudentsT}; +use statrs::statistics::Statistics; + +/// Implements [Student's t-test](https://en.wikipedia.org/wiki/Student%27s_t-test). +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct StudentsTTest { + df: f64, + estimate: f64, + effect_size: f64, + p_value: f64, +} + +impl StudentsTTest { + /// Run Student's two-sample t-test on samples `x` and `y`. + pub fn independent(x: &[f64], y: &[f64]) -> statrs::Result { + let n_x = x.n(); + let n_y = y.n(); + let df = n_x + n_y; + + let effect_size = (x.mean() - y.mean()) / x.pooled_variance(y).sqrt(); + let t = effect_size / (n_x.recip() + n_y.recip()).sqrt(); + + let t_distribution = StudentsT::new(0.0, 1.0, df)?; + let p_value = 2.0 * t_distribution.cdf(-t.abs()); + + Ok(StudentsTTest { + df, + effect_size: effect_size.abs(), + estimate: t, + p_value, + }) + } + + /// Run paired Student's t-test on samples `x` and `y`. + pub fn paired(x: &[f64], y: &[f64]) -> statrs::Result { + let d: Vec<_> = x.iter().zip(y).map(|(x, y)| x - y).collect(); + let df = x.df(); + let effect_size = (&d).mean() / (&d).std_dev(); + let t = effect_size * x.n().sqrt(); + + let t_distribution = StudentsT::new(0.0, 1.0, df)?; + let p_value = 2.0 * t_distribution.cdf(-t.abs()); + + Ok(StudentsTTest { + df, + effect_size: effect_size.abs(), + estimate: t, + p_value, + }) + } +} + +#[cfg(test)] +mod tests { + #[test] + fn students_t() { + let x = vec![ + 134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0, + ]; + let y = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0]; + let test = super::StudentsTTest::independent(&x, &y).unwrap(); + assert_eq!(test.estimate, 1.8914363974423305); + assert_eq!(test.effect_size, 0.8995574392432595); + assert_eq!(test.p_value, 0.073911127032672); + } + + #[test] + fn reverse() { + let x = vec![ + 134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0, + ]; + let y = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0]; + let test = super::StudentsTTest::independent(&y, &x).unwrap(); + assert_eq!(test.estimate, -1.8914363974423305); + assert_eq!(test.effect_size, 0.8995574392432595); + assert_eq!(test.p_value, 0.073911127032672); + } + + #[test] + fn paired() { + let x = vec![8.0, 6.0, 5.5, 11.0, 8.5, 5.0, 6.0, 6.0]; + let y = vec![8.5, 9.0, 6.5, 10.5, 9.0, 7.0, 6.5, 7.0]; + let test = super::StudentsTTest::paired(&x, &y).unwrap(); + assert_eq!(test.estimate, -2.645751311064591); + assert_eq!(test.effect_size, 0.9354143466934856); + assert_eq!(test.p_value, 0.03314550026377362); + } +} diff --git a/extern/stattest/src/test/welchs_t.rs b/extern/stattest/src/test/welchs_t.rs new file mode 100644 index 0000000..7a76d95 --- /dev/null +++ b/extern/stattest/src/test/welchs_t.rs @@ -0,0 +1,73 @@ +use crate::statistics::StatisticsExt; +use statrs::distribution::{ContinuousCDF, StudentsT}; +use statrs::statistics::Statistics; + +/// Implements [Welch's t-test](https://en.wikipedia.org/wiki/Welch's_t-test) (Welch, 1947). +/// +/// # References +/// +/// Welch, B. L. (1947). The generalisation of student’s problems when several different population +/// variances are involved. Biometrika, 34(1–2), 28–35. +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct WelchsTTest { + df: f64, + estimate: f64, + effect_size: f64, + p_value: f64, +} + +impl WelchsTTest { + /// Run Welch's two-sample t-test on samples `x` and `y`. + pub fn independent(x: &[f64], y: &[f64]) -> statrs::Result { + let var_x = x.variance(); + let var_y = y.variance(); + let var_x_n = var_x / x.n(); + let var_y_n = var_y / y.n(); + let linear_combination = var_x_n + var_y_n; + + let df = linear_combination.powi(2) / (var_x_n.powi(2) / x.df() + var_y_n.powi(2) / y.df()); + + let mean_difference = x.mean() - y.mean(); + let effect_size = mean_difference.abs() / ((var_x + var_y) / 2.0).sqrt(); + let t = mean_difference / linear_combination.sqrt(); + + let t_distribution = StudentsT::new(0.0, 1.0, df)?; + let p_value = 2.0 * t_distribution.cdf(-t.abs()); + + Ok(WelchsTTest { + df, + effect_size, + estimate: t, + p_value, + }) + } +} + +#[cfg(test)] +mod tests { + #[test] + fn students_t() { + let x = vec![ + 134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0, + ]; + let y = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0]; + let test = super::WelchsTTest::independent(&x, &y).unwrap(); + assert_eq!(test.df, 13.081702113268564); + assert_eq!(test.estimate, 1.9107001042454415); + assert_eq!(test.effect_size, 0.904358069450997); + assert_eq!(test.p_value, 0.0782070409214568); + } + + #[test] + fn reverse() { + let x = vec![ + 134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0, + ]; + let y = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0]; + let test = super::WelchsTTest::independent(&y, &x).unwrap(); + assert_eq!(test.df, 13.081702113268564); + assert_eq!(test.estimate, -1.9107001042454415); + assert_eq!(test.effect_size, 0.904358069450997); + assert_eq!(test.p_value, 0.0782070409214568); + } +} diff --git a/extern/stattest/src/test/wilcoxon_w.rs b/extern/stattest/src/test/wilcoxon_w.rs new file mode 100644 index 0000000..db1d89f --- /dev/null +++ b/extern/stattest/src/test/wilcoxon_w.rs @@ -0,0 +1,70 @@ +use crate::distribution::SignedRank; +use crate::statistics::*; +use statrs::distribution::ContinuousCDF; + +/// Implements the [Wilcoxon signed rank test](https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test). +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct WilcoxonWTest { + estimate: (f64, f64), + effect_size: f64, + p_value: f64, +} + +impl WilcoxonWTest { + /// Run Wilcoxon signed rank test on samples `x` and `y`. + pub fn paired(x: &[f64], y: &[f64]) -> statrs::Result { + let d: Vec<_> = x.iter().zip(y).map(|(x, y)| (x - y).abs()).collect(); + let (ranks, tie_correction) = (&d).ranks(); + let mut estimate = (0.0, 0.0); + let mut zeroes = 0; + + for ((x, y), rank) in x.iter().zip(y).zip(ranks) { + if x < y { + estimate.0 += rank; + } else if x > y { + estimate.1 += rank; + } else { + zeroes += 1; + } + } + + let estimate_small = if estimate.0 < estimate.1 { + estimate.0 + } else { + estimate.1 + }; + let distribution = SignedRank::new(d.len(), zeroes, tie_correction)?; + let p_value = distribution.cdf(estimate_small); + + let n = (&d).n(); + let rank_sum = n * (n + 1.0) / 2.0; + let effect_size = estimate_small / rank_sum; + + Ok(WilcoxonWTest { + effect_size, + estimate, + p_value, + }) + } +} + +#[cfg(test)] +mod tests { + #[test] + fn paired() { + let x = vec![8.0, 6.0, 5.5, 11.0, 8.5, 5.0, 6.0, 6.0]; + let y = vec![8.5, 9.0, 6.5, 10.5, 9.0, 7.0, 6.5, 7.0]; + let test = super::WilcoxonWTest::paired(&x, &y).unwrap(); + assert_eq!(test.estimate, (33.5, 2.5)); + assert_eq!(test.p_value, 0.027785782704095215); + } + + #[test] + fn paired_2() { + let x = vec![209.0, 200.0, 177.0, 169.0, 159.0, 169.0, 187.0, 198.0]; + let y = vec![151.0, 168.0, 147.0, 164.0, 166.0, 163.0, 176.0, 188.0]; + let test = super::WilcoxonWTest::paired(&x, &y).unwrap(); + assert_eq!(test.estimate, (3.0, 33.0)); + assert_eq!(test.p_value, 0.0390625); + } +} diff --git a/extern/swilk.o b/extern/swilk.o new file mode 100644 index 0000000..17f9e34 Binary files /dev/null and b/extern/swilk.o differ diff --git a/src/main.cpp b/src/main.cpp index aea7fd6..f3dd49c 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -21,6 +21,8 @@ blt::gfx::batch_renderer_2d renderer_2d(resources); constexpr blt::i32 MAX_DEPTH = 17; +extern "C" void test(); + struct node; blt::area_allocator node_allocator; @@ -360,6 +362,7 @@ void update(std::int32_t w, std::int32_t h) int main() { + test(); blt::gfx::init(blt::gfx::window_data{"Window of GP test", init, update}.setSyncInterval(1)); global_matrices.cleanup(); resources.cleanup(); diff --git a/src/shifting.cpp b/src/shifting.cpp index b43ea5c..17cc69a 100644 --- a/src/shifting.cpp +++ b/src/shifting.cpp @@ -142,7 +142,7 @@ std::vector out; bool has_intersection(Point p, char c) { - + return false; } void shift_right(int units)