From 03c8cbbfd3804713a10bf4a44de1561d980aaf3f Mon Sep 17 00:00:00 2001 From: Brett Laptop Date: Mon, 29 Jan 2024 11:02:13 -0500 Subject: [PATCH] no need for useless git repo in source --- extern/stattest/.github/workflows/rust.yml | 22 -- extern/stattest/.gitignore | 3 - extern/stattest/Cargo.toml | 12 - extern/stattest/LICENSE | 21 -- extern/stattest/README.md | 50 --- extern/stattest/src/distribution/mod.rs | 7 - extern/stattest/src/distribution/rank_sum.rs | 0 .../stattest/src/distribution/shapiro_wilk.rs | 139 -------- .../stattest/src/distribution/signed_rank.rs | 313 ------------------ extern/stattest/src/lib.rs | 3 - .../src/statistics/iter_statistics_ext.rs | 73 ---- extern/stattest/src/statistics/mod.rs | 9 - extern/stattest/src/statistics/ranks.rs | 155 --------- .../stattest/src/statistics/statistics_ext.rs | 15 - extern/stattest/src/test/f.rs | 59 ---- extern/stattest/src/test/levenes.rs | 64 ---- extern/stattest/src/test/mann_whitney_u.rs | 69 ---- extern/stattest/src/test/mod.rs | 24 -- extern/stattest/src/test/shapiro_wilk.rs | 263 --------------- extern/stattest/src/test/students_t.rs | 89 ----- extern/stattest/src/test/welchs_t.rs | 73 ---- extern/stattest/src/test/wilcoxon_w.rs | 70 ---- 22 files changed, 1533 deletions(-) delete mode 100644 extern/stattest/.github/workflows/rust.yml delete mode 100644 extern/stattest/.gitignore delete mode 100644 extern/stattest/Cargo.toml delete mode 100644 extern/stattest/LICENSE delete mode 100644 extern/stattest/README.md delete mode 100644 extern/stattest/src/distribution/mod.rs delete mode 100644 extern/stattest/src/distribution/rank_sum.rs delete mode 100644 extern/stattest/src/distribution/shapiro_wilk.rs delete mode 100644 extern/stattest/src/distribution/signed_rank.rs delete mode 100644 extern/stattest/src/lib.rs delete mode 100644 extern/stattest/src/statistics/iter_statistics_ext.rs delete mode 100644 extern/stattest/src/statistics/mod.rs delete mode 100644 extern/stattest/src/statistics/ranks.rs delete mode 100644 extern/stattest/src/statistics/statistics_ext.rs delete mode 100644 extern/stattest/src/test/f.rs delete mode 100644 extern/stattest/src/test/levenes.rs delete mode 100644 extern/stattest/src/test/mann_whitney_u.rs delete mode 100644 extern/stattest/src/test/mod.rs delete mode 100644 extern/stattest/src/test/shapiro_wilk.rs delete mode 100644 extern/stattest/src/test/students_t.rs delete mode 100644 extern/stattest/src/test/welchs_t.rs delete mode 100644 extern/stattest/src/test/wilcoxon_w.rs diff --git a/extern/stattest/.github/workflows/rust.yml b/extern/stattest/.github/workflows/rust.yml deleted file mode 100644 index 7ae98f3..0000000 --- a/extern/stattest/.github/workflows/rust.yml +++ /dev/null @@ -1,22 +0,0 @@ -name: Rust - -on: - push: - branches: [ main ] - pull_request: - branches: [ main ] - -env: - CARGO_TERM_COLOR: always - -jobs: - build: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v2 - - name: Build - run: cargo build --verbose - - name: Run tests - run: cargo test --verbose diff --git a/extern/stattest/.gitignore b/extern/stattest/.gitignore deleted file mode 100644 index a13b9ba..0000000 --- a/extern/stattest/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -/dev -/target -Cargo.lock diff --git a/extern/stattest/Cargo.toml b/extern/stattest/Cargo.toml deleted file mode 100644 index a2c8dd9..0000000 --- a/extern/stattest/Cargo.toml +++ /dev/null @@ -1,12 +0,0 @@ -[package] -name = "stattest" -version = "0.0.0" -authors = ["Lars Willighagen "] -edition = "2018" - -[lib] -doctest = false - -[dependencies] -statrs = { git = "https://github.com/larsgw/statrs", branch = "patch-1" } -rand = "0.7.3" diff --git a/extern/stattest/LICENSE b/extern/stattest/LICENSE deleted file mode 100644 index 5926a00..0000000 --- a/extern/stattest/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2021 Lars Willighagen - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/extern/stattest/README.md b/extern/stattest/README.md deleted file mode 100644 index 3b53a4b..0000000 --- a/extern/stattest/README.md +++ /dev/null @@ -1,50 +0,0 @@ -# Stat-Test - -This crate provides statistical tests and relevant utilities. - ---- - -## Status - -Currently a number of tests are implemented for comparing the means of two -independent samples, and for checking assumptions regarding the former tests. -However, these tests are all two-sample tests that do not yet support specifying -alternative hypotheses. - -### Comparison of independent means - - - **Student's t-test** - `stattest::test::StudentsTTest` - *Assumptions:* normality, homogeneity of variances - - - **Welch's t-test** - `stattest::test::WelchsTTest` - *Assumptions:* normality - - - **Mann-Whitney U-test/Wilcoxon rank-sum test** - `stattest::test::MannWhitneyUTest` - *Assumptions:* – - -### Comparison of paired observations - - - **Student's t-test** - `stattest::test::StudentsTTest` - *Assumptions:* normality, homogeneity of variances - - - **Wilcoxon signed rank test** - `stattest::test::WilcoxonWTest` - *Assumptions:* – - -### Assumption tests - - - **Levene's test** - `stattest::test::LevenesTest` - *Tests:* homogeneity of variances - - - **F-test of equality of variances** - `stattest::test::FTest` - *Tests:* homogeneity of variances - - - **Shapiro-Wilk test** - `stattest::test::ShapiroWilkTest` - *Tests:* normality diff --git a/extern/stattest/src/distribution/mod.rs b/extern/stattest/src/distribution/mod.rs deleted file mode 100644 index 4ab736e..0000000 --- a/extern/stattest/src/distribution/mod.rs +++ /dev/null @@ -1,7 +0,0 @@ -//! Defines interfaces for creating and approximating statistical distributions. - -pub use self::shapiro_wilk::*; -pub use self::signed_rank::*; - -mod shapiro_wilk; -mod signed_rank; diff --git a/extern/stattest/src/distribution/rank_sum.rs b/extern/stattest/src/distribution/rank_sum.rs deleted file mode 100644 index e69de29..0000000 diff --git a/extern/stattest/src/distribution/shapiro_wilk.rs b/extern/stattest/src/distribution/shapiro_wilk.rs deleted file mode 100644 index b4473ca..0000000 --- a/extern/stattest/src/distribution/shapiro_wilk.rs +++ /dev/null @@ -1,139 +0,0 @@ -use rand::Rng; -use statrs::distribution::{Continuous, ContinuousCDF, Normal}; -use statrs::function::evaluate::polynomial; -use statrs::statistics::*; -use statrs::Result; - -// Polynomials used to calculate the mean and standard deviation -// of a normal distribution that approximates the distribution of -// the W statistic of the Shapiro-Wilk test (Royston, 1992). -static C3: [f64; 4] = [0.5440, -0.39978, 0.025054, -0.0006714]; -static C4: [f64; 4] = [1.3822, -0.77857, 0.062767, -0.0020322]; -static C5: [f64; 4] = [-1.5861, -0.31082, -0.083751, 0.0038915]; -static C6: [f64; 3] = [-0.4803, -0.082676, 0.0030302]; - -/// Implements an approximation of the distribution of the W -/// statistic of the [Shapiro-Wilk test](https://en.wikipedia.org/wiki/Shapiro%E2%80%93Wilk_test). -/// -/// # References -/// -/// Royston, P. (1992). Approximating the Shapiro-Wilk W-test for non-normality. -/// Statistics and Computing, 2(3), 117–119. -/// -/// Shapiro, S. S., & Wilk, M. B. (1965). An analysis of variance test for normality -/// (complete samples)†. Biometrika, 52(3–4), 591–611. -#[derive(Debug, Copy, Clone, PartialEq)] -pub struct ShapiroWilk { - normal: Normal, - n: usize, - mean: f64, - std_dev: f64, -} - -impl ShapiroWilk { - /// Create a new approximation for the distribution of W for a given sample size `n`. - /// - /// # Examples - /// - /// ``` - /// use stattest::distribution::ShapiroWilk; - /// - /// let result = ShapiroWilk::new(25); - /// assert!(result.is_ok()); - /// ``` - pub fn new(n: usize) -> Result { - let float_n = n as f64; - - let (mean, std_dev) = if n <= 11 { - (polynomial(float_n, &C3), polynomial(float_n, &C4).exp()) - } else { - let log_n = float_n.ln(); - (polynomial(log_n, &C5), polynomial(log_n, &C6).exp()) - }; - - let result = Normal::new(mean, std_dev); - match result { - Ok(normal) => Ok(ShapiroWilk { - normal, - n, - mean, - std_dev, - }), - Err(error) => Err(error), - } - } - - /// Return the sample size associated with this distribution. - pub fn n(&self) -> usize { - self.n - } -} - -impl ::rand::distributions::Distribution for ShapiroWilk { - fn sample(&self, r: &mut R) -> f64 { - ::rand::distributions::Distribution::sample(&self.normal, r) - } -} - -impl ContinuousCDF for ShapiroWilk { - fn cdf(&self, x: f64) -> f64 { - self.normal.cdf(x) - } - - fn inverse_cdf(&self, x: f64) -> f64 { - self.normal.inverse_cdf(x) - } -} - -impl Min for ShapiroWilk { - fn min(&self) -> f64 { - self.normal.min() - } -} - -impl Max for ShapiroWilk { - fn max(&self) -> f64 { - self.normal.max() - } -} - -impl Distribution for ShapiroWilk { - fn mean(&self) -> Option { - self.normal.mean() - } - fn variance(&self) -> Option { - self.normal.variance() - } - fn entropy(&self) -> Option { - self.normal.entropy() - } - fn skewness(&self) -> Option { - self.normal.skewness() - } -} - -impl Mode> for ShapiroWilk { - fn mode(&self) -> Option { - self.normal.mode() - } -} - -impl Continuous for ShapiroWilk { - fn pdf(&self, x: f64) -> f64 { - self.normal.pdf(x) - } - - fn ln_pdf(&self, x: f64) -> f64 { - self.normal.ln_pdf(x) - } -} - -#[cfg(test)] -mod tests { - #[test] - fn n_25() { - let distribution = super::ShapiroWilk::new(25).unwrap(); - assert_eq!(distribution.mean, -3.3245620722111475); - assert_eq!(distribution.std_dev, 0.4891787150186814); - } -} diff --git a/extern/stattest/src/distribution/signed_rank.rs b/extern/stattest/src/distribution/signed_rank.rs deleted file mode 100644 index a846b73..0000000 --- a/extern/stattest/src/distribution/signed_rank.rs +++ /dev/null @@ -1,313 +0,0 @@ -use rand::Rng; -use statrs::distribution::{Continuous, ContinuousCDF, Normal}; -use statrs::function::factorial::binomial; -use statrs::statistics::*; -use statrs::Result; - -/// Implements an approximation of the distribution of the W -/// statistic of the [Wilcoxon signed rank test](https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test). -/// -/// # References -/// -/// Wilcoxon, F. (1945). Individual Comparisons by Ranking Methods. -/// Biometrics Bulletin, 1(6), 80–83. -#[derive(Debug, Copy, Clone, PartialEq)] -pub struct SignedRank { - approximation: Approximation, - n: usize, -} - -#[derive(Debug, Copy, Clone, PartialEq)] -enum Approximation { - Normal(Normal), - Exact, -} - -impl SignedRank { - /// Determine whether to use an approximation or an exact distribution of W for a given sample size `n` - /// and given a number of paired observations with a non-zero difference `non_zero`. - /// - /// # Examples - /// - /// ``` - /// use stattest::distribution::SignedRank; - /// - /// let result = SignedRank::new(25, 25); - /// assert!(result.is_ok()); - /// ``` - pub fn new(n: usize, zeroes: usize, tie_correction: usize) -> Result { - if zeroes > 0 || tie_correction > 0 || (n - zeroes) >= 20 { - SignedRank::approximate(n - zeroes, tie_correction) - } else { - SignedRank::exact(n) - } - } - - /// Create a new approximation for the distribution of W for a given sample size `n` - /// and given a number of paired observations with a non-zero difference `non_zero`. - /// - /// # Examples - /// - /// ``` - /// use stattest::distribution::SignedRank; - /// - /// let result = SignedRank::approximate(25, 25); - /// assert!(result.is_ok()); - /// ``` - pub fn approximate(n: usize, tie_correction: usize) -> Result { - let mean = (n * (n + 1)) as f64 / 4.0; - let var = mean * (2 * n + 1) as f64 / 6.0 - tie_correction as f64 / 48.0; - let normal = Normal::new(mean, var.sqrt())?; - - Ok(SignedRank { - n, - approximation: Approximation::Normal(normal), - }) - } - - /// Create a new exact distribution of W for a given sample size `n`. - /// - /// # Examples - /// - /// ``` - /// use stattest::distribution::SignedRank; - /// - /// let result = SignedRank::exact(25); - /// assert!(result.is_ok()); - /// ``` - pub fn exact(n: usize) -> Result { - Ok(SignedRank { - n, - approximation: Approximation::Exact, - }) - } -} - -fn partitions(number: usize, parts: usize, size: usize) -> usize { - if number == parts { - 1 - } else if number == 0 || parts == 0 || size == 0 { - 0 - } else { - (1..=std::cmp::min(number, size)) - .map(|highest_part| partitions(number - highest_part, parts - 1, highest_part)) - .sum() - } -} - -impl ::rand::distributions::Distribution for SignedRank { - fn sample(&self, r: &mut R) -> f64 { - match self.approximation { - Approximation::Normal(normal) => { - ::rand::distributions::Distribution::sample(&normal, r) - } - Approximation::Exact => { - todo!(); - } - } - } -} - -impl ContinuousCDF for SignedRank { - fn cdf(&self, x: f64) -> f64 { - match self.approximation { - Approximation::Normal(normal) => 2.0 * normal.cdf(x), - Approximation::Exact => { - let r = x.round() as usize; - let mut sum = 1; - - for n in 1..=self.n { - let n_choose_2 = binomial(n as u64, 2) as usize; - if n_choose_2 > r { - continue; - } - for i in n..=(r - n_choose_2) { - sum += partitions(i, n, self.n - n + 1); - } - } - - sum as f64 / 2_f64.powi(self.n as i32 - 1) - } - } - } - - fn inverse_cdf(&self, x: f64) -> f64 { - match self.approximation { - Approximation::Normal(normal) => normal.inverse_cdf(x), - Approximation::Exact => { - todo!(); - } - } - } -} - -impl Min for SignedRank { - fn min(&self) -> f64 { - 0.0 - } -} - -impl Max for SignedRank { - fn max(&self) -> f64 { - (self.n * (self.n + 1) / 2) as f64 - } -} - -impl Distribution for SignedRank { - fn mean(&self) -> Option { - match self.approximation { - Approximation::Normal(normal) => normal.mean(), - Approximation::Exact => Some((self.n * (self.n + 1)) as f64 / 4.0), - } - } - - fn variance(&self) -> Option { - match self.approximation { - Approximation::Normal(normal) => normal.variance(), - Approximation::Exact => Some((self.n * (self.n + 1) * (2 * self.n + 1)) as f64 / 24.0), - } - } - - fn entropy(&self) -> Option { - match self.approximation { - Approximation::Normal(normal) => normal.entropy(), - Approximation::Exact => None, - } - } - - fn skewness(&self) -> Option { - match self.approximation { - Approximation::Normal(normal) => normal.skewness(), - Approximation::Exact => Some(0.0), - } - } -} - -impl Median for SignedRank { - fn median(&self) -> f64 { - match self.approximation { - Approximation::Normal(normal) => normal.median(), - Approximation::Exact => self.mean().unwrap(), - } - } -} - -impl Mode> for SignedRank { - fn mode(&self) -> Option { - match self.approximation { - Approximation::Normal(normal) => normal.mode(), - Approximation::Exact => self.mean(), - } - } -} - -impl Continuous for SignedRank { - fn pdf(&self, x: f64) -> f64 { - match self.approximation { - Approximation::Normal(normal) => normal.pdf(x), - Approximation::Exact => { - let r = x.round() as usize; - let mut sum = 0; - - for n in 1..=self.n { - let n_choose_2 = binomial(n as u64, 2) as usize; - if n_choose_2 > r { - break; - } - sum += partitions(r - n_choose_2, n, self.n - n + 1); - } - - sum as f64 / 2_f64.powi(self.n as i32) - } - } - } - - fn ln_pdf(&self, x: f64) -> f64 { - match self.approximation { - Approximation::Normal(normal) => normal.ln_pdf(x), - Approximation::Exact => self.pdf(x).ln(), - } - } -} - -#[cfg(test)] -mod tests { - use statrs::distribution::{Continuous, ContinuousCDF}; - use statrs::statistics::Distribution; - - #[test] - fn n_20() { - let distribution = super::SignedRank::new(20, 0, 0).unwrap(); - assert_eq!(distribution.mean(), Some(105.0)); - assert_eq!(distribution.std_dev(), Some(26.78619047195775)); - assert_eq!(distribution.cdf(50.0), 0.04004379807046464); - } - - #[test] - fn n_30() { - let distribution = super::SignedRank::new(30, 0, 0).unwrap(); - assert_eq!(distribution.mean(), Some(232.5)); - assert_eq!(distribution.std_dev(), Some(48.61841215013094)); - assert_eq!(distribution.cdf(150.0), 0.08971783777126728); - } - - #[test] - fn n_20_exact() { - let distribution = super::SignedRank::exact(20).unwrap(); - assert_eq!(distribution.cdf(50.0), 0.039989471435546875); - } - - #[test] - fn n_11() { - let distribution = super::SignedRank::exact(11).unwrap(); - assert_eq!(distribution.cdf(11.0), 0.0537109375); - assert_eq!(distribution.cdf(7.0), 0.0185546875); - assert_eq!(distribution.cdf(5.0), 0.009765625); - } - - #[test] - fn n_10() { - let distribution = super::SignedRank::exact(10).unwrap(); - assert_eq!(distribution.cdf(8.0), 0.048828125); - assert_eq!(distribution.cdf(5.0), 0.01953125); - assert_eq!(distribution.cdf(3.0), 0.009765625); - } - - #[test] - fn n_9() { - let distribution = super::SignedRank::exact(9).unwrap(); - assert_eq!(distribution.cdf(6.0), 0.0546875); - assert_eq!(distribution.cdf(3.0), 0.01953125); - assert_eq!(distribution.cdf(2.0), 0.01171875); - } - - #[test] - fn n_8() { - let distribution = super::SignedRank::exact(8).unwrap(); - assert_eq!(distribution.cdf(4.0), 0.0546875); - assert_eq!(distribution.cdf(3.0), 0.0390625); - assert_eq!(distribution.cdf(2.5), 0.0390625); - assert_eq!(distribution.cdf(2.0), 0.0234375); - } - - #[test] - fn n_8_pdf() { - let distribution = super::SignedRank::exact(8).unwrap(); - assert_eq!(distribution.pdf(4.0), 0.0078125); - assert_eq!(distribution.pdf(3.0), 0.0078125); - assert_eq!(distribution.pdf(2.0), 0.00390625); - } - - #[test] - fn n_8_ties() { - let distribution = super::SignedRank::new(8, 0, 7).unwrap(); - assert_eq!(distribution.cdf(3.0), 0.03542823032427003); - assert_eq!(distribution.cdf(2.5), 0.029739401378297385); - assert_eq!(distribution.cdf(2.0), 0.024854396634115632); - } - - #[test] - fn partition() { - assert_eq!(super::partitions(7, 3, 5), 4); - } -} diff --git a/extern/stattest/src/lib.rs b/extern/stattest/src/lib.rs deleted file mode 100644 index a7cf23d..0000000 --- a/extern/stattest/src/lib.rs +++ /dev/null @@ -1,3 +0,0 @@ -pub mod distribution; -pub mod statistics; -pub mod test; diff --git a/extern/stattest/src/statistics/iter_statistics_ext.rs b/extern/stattest/src/statistics/iter_statistics_ext.rs deleted file mode 100644 index 16cb885..0000000 --- a/extern/stattest/src/statistics/iter_statistics_ext.rs +++ /dev/null @@ -1,73 +0,0 @@ -use crate::statistics::StatisticsExt; -use statrs::statistics::Statistics; -use std::borrow::Borrow; -use std::f64; - -impl StatisticsExt for T -where - T: IntoIterator + Clone, - T::Item: Borrow, -{ - fn n(self) -> f64 { - self.into_iter().count() as f64 - } - - fn df(self) -> f64 { - self.n() - 1.0 - } - - fn pooled_variance(self, other: Self) -> f64 { - let df_x = self.clone().df(); - let df_y = other.clone().df(); - let var_x = self.variance(); - let var_y = other.variance(); - - (df_x * var_x + df_y * var_y) / (df_x + df_y) - } - - fn pooled_std_dev(self, other: Self) -> f64 { - self.pooled_variance(other).sqrt() - } - - fn variance_ratio(self, other: Self) -> f64 { - self.variance() / other.variance() - } -} - -#[cfg(test)] -mod tests { - #[allow(dead_code)] - fn round(number: f64, digits: Option) -> f64 { - match digits { - Some(digits) => { - let factor = 10_f64.powi(digits); - (number * factor).round() / factor - } - None => number.round(), - } - } - - #[test] - fn pooled_variance() { - let x = vec![ - 134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0, - ]; - let y = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0]; - assert_eq!( - round(super::StatisticsExt::pooled_variance(&x, &y), Some(3)), - 446.118 - ); - } - - #[test] - fn pooled_std_dev() { - let x = vec![ - 134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0, - ]; - let y = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0]; - assert_eq!( - round(super::StatisticsExt::pooled_std_dev(&x, &y), Some(3)), - 21.121 - ); - } -} diff --git a/extern/stattest/src/statistics/mod.rs b/extern/stattest/src/statistics/mod.rs deleted file mode 100644 index acda8d8..0000000 --- a/extern/stattest/src/statistics/mod.rs +++ /dev/null @@ -1,9 +0,0 @@ -//! Provides traits for statistical computation. - -pub use self::iter_statistics_ext::*; -pub use self::ranks::*; -pub use self::statistics_ext::*; - -mod iter_statistics_ext; -mod ranks; -mod statistics_ext; diff --git a/extern/stattest/src/statistics/ranks.rs b/extern/stattest/src/statistics/ranks.rs deleted file mode 100644 index 0399435..0000000 --- a/extern/stattest/src/statistics/ranks.rs +++ /dev/null @@ -1,155 +0,0 @@ -use std::borrow::Borrow; -use std::iter::FusedIterator; - -/// For the ranking of groups of variables. -pub trait Ranks { - /// Returns a vector of ranks. - fn ranks(self) -> (Vec, usize); -} - -impl Ranks for T -where - T: IntoIterator, - T::Item: Borrow, -{ - fn ranks(self) -> (Vec, usize) { - let mut observations: Vec<_> = self.into_iter().map(|x| *x.borrow()).enumerate().collect(); - observations - .sort_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)); - - let (sorted_indices, sorted_values): (Vec<_>, Vec<_>) = observations.into_iter().unzip(); - let groups = DedupWithCount::new(sorted_values.iter()); - - let mut resolved_ties = ResolveTies::new(groups); - let mut ranks = vec![0.0; sorted_values.len()]; - - for (rank, old_index) in (&mut resolved_ties).zip(sorted_indices) { - ranks[old_index] = rank; - } - - (ranks, resolved_ties.tie_correction()) - } -} - -struct ResolveTies -where - I: Iterator, -{ - iter: I, - index: usize, - left: usize, - resolved: Option, - tie_correction: usize, -} - -impl ResolveTies -where - I: Iterator, -{ - fn new(iter: I) -> Self { - ResolveTies { - iter, - index: 0, - left: 0, - resolved: None, - tie_correction: 0, - } - } - - fn tie_correction(&self) -> usize { - self.tie_correction - } -} - -impl Iterator for ResolveTies -where - I: Iterator, -{ - type Item = f64; - - fn next(&mut self) -> Option { - if self.left > 0 { - self.left -= 1; - self.index += 1; - self.resolved - } else { - match self.iter.next() { - Some((count, _)) => { - self.resolved = Some(((1.0 + count as f64) / 2.0) + self.index as f64); - self.left = count - 1; - self.index += 1; - self.tie_correction += count.pow(3) - count; - self.resolved - } - None => None, - } - } - } - - fn size_hint(&self) -> (usize, Option) { - let (low, _high) = self.iter.size_hint(); - (low, None) - } -} - -impl FusedIterator for ResolveTies where I: Iterator {} - -struct DedupWithCount -where - I: Iterator, - I::Item: PartialEq, -{ - iter: I, - peek: Option, -} - -impl DedupWithCount -where - I: Iterator, - I::Item: PartialEq, -{ - fn new(mut iter: I) -> Self { - DedupWithCount { - peek: iter.next(), - iter, - } - } -} - -impl Iterator for DedupWithCount -where - I: Iterator, - I::Item: PartialEq, -{ - type Item = (usize, I::Item); - - fn next(&mut self) -> Option<(usize, I::Item)> { - let mut count = 1; - loop { - if self.peek.is_some() { - let next = self.iter.next(); - - if self.peek == next { - count += 1; - continue; - } - - let value = match next { - Some(value) => self.peek.replace(value).unwrap(), - None => self.peek.take().unwrap(), - }; - - break Some((count, value)); - } else { - break None; - } - } - } - - fn size_hint(&self) -> (usize, Option) { - let (low, high) = self.iter.size_hint(); - (if low == 0 { 0 } else { 1 }, high) - } -} - -impl FusedIterator for DedupWithCount where I::Item: PartialEq {} diff --git a/extern/stattest/src/statistics/statistics_ext.rs b/extern/stattest/src/statistics/statistics_ext.rs deleted file mode 100644 index 582d7f0..0000000 --- a/extern/stattest/src/statistics/statistics_ext.rs +++ /dev/null @@ -1,15 +0,0 @@ -/// Provides additional statistical utilities, complementing [statrs::statistics::Statistics]. -pub trait StatisticsExt { - /// Returns the amount of observations. - fn n(self) -> T; - /// Returns the degrees of freedom of the data. - fn df(self) -> T; - - /// Returns the pooled variance. - fn pooled_variance(self, other: Self) -> T; - /// Returns the pooled standard deviation. - fn pooled_std_dev(self, other: Self) -> T; - - /// Returns the ratio between two variances. - fn variance_ratio(self, other: Self) -> T; -} diff --git a/extern/stattest/src/test/f.rs b/extern/stattest/src/test/f.rs deleted file mode 100644 index 4e0eff0..0000000 --- a/extern/stattest/src/test/f.rs +++ /dev/null @@ -1,59 +0,0 @@ -use crate::statistics::StatisticsExt; -use statrs::distribution::{ContinuousCDF, FisherSnedecor}; - -/// Implements the [F-test of equality of variances](https://en.wikipedia.org/wiki/F-test_of_equality_of_variances). -#[derive(Debug, Copy, Clone, PartialEq)] -pub struct FTest { - df: (f64, f64), - estimate: f64, - p_value: f64, -} - -impl FTest { - /// Carry out the F-test of equality of variances on the samples `x` and `y`. - pub fn new(x: &[f64], y: &[f64]) -> statrs::Result { - let f = x.variance_ratio(y); - let df = (x.df(), y.df()); - - let distribution = FisherSnedecor::new(df.0, df.1)?; - let probability = distribution.cdf(f); - let p_value = if f.gt(&1.0) { - 1.0 - probability - } else { - probability - }; - - Ok(FTest { - df, - estimate: f, - p_value, - }) - } -} - -#[cfg(test)] -mod tests { - #[test] - fn f_test() { - let x = vec![ - 134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0, - ]; - let y = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0]; - let result = super::FTest::new(&x, &y).unwrap(); - assert_eq!(result.df, (11.0, 6.0)); - assert_eq!(result.estimate, 1.0755200911940725); - assert_eq!(result.p_value, 0.4893961256182331); - } - - #[test] - fn f_test_reverse() { - let x = vec![ - 134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0, - ]; - let y = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0]; - let result = super::FTest::new(&y, &x).unwrap(); - assert_eq!(result.df, (6.0, 11.0)); - assert_eq!(result.estimate, 0.9297827239003709); - assert_eq!(result.p_value, 0.48939612561823265); - } -} diff --git a/extern/stattest/src/test/levenes.rs b/extern/stattest/src/test/levenes.rs deleted file mode 100644 index b84c689..0000000 --- a/extern/stattest/src/test/levenes.rs +++ /dev/null @@ -1,64 +0,0 @@ -use crate::statistics::StatisticsExt; -use statrs::distribution::{ContinuousCDF, FisherSnedecor}; -use statrs::statistics::Statistics; - -/// Implements [Levene's test](https://en.wikipedia.org/wiki/Levene%27s_test) (Brown & Forsythe, 1974). -/// -/// # References -/// -/// Brown, M. B., & Forsythe, A. B. (1974). Robust Tests for the Equality of Variances. -/// Journal of the American Statistical Association, 69(346), 364–367. -#[derive(Debug, Copy, Clone, PartialEq)] -pub struct LevenesTest { - df: f64, - estimate: f64, - p_value: f64, -} - -impl LevenesTest { - /// Run Levene's test on the samples `x` and `y`. - pub fn new(x: &[f64], y: &[f64]) -> statrs::Result { - let n_x = x.n(); - let n_y = y.n(); - let diff_x = x.iter().map(|xi| (xi - x.mean()).abs()); - let diff_y = y.iter().map(|yi| (yi - y.mean()).abs()); - - let mean_diff_x = diff_x.clone().mean(); - let mean_diff_y = diff_y.clone().mean(); - let mean_diff = Iterator::chain(diff_x.clone(), diff_y.clone()).mean(); - - let a: f64 = - n_x * (mean_diff_x - mean_diff).powi(2) + n_y * (mean_diff_y - mean_diff).powi(2); - let b: f64 = Iterator::chain( - diff_x.map(|diff| (diff - mean_diff_x).powi(2)), - diff_y.map(|diff| (diff - mean_diff_y).powi(2)), - ) - .sum(); - - let df = n_x + n_y - 2.0; - let estimate = df * a / b; - let distribution = FisherSnedecor::new(1.0, df)?; - let p_value = 1.0 - distribution.cdf(estimate); - - Ok(LevenesTest { - df, - estimate, - p_value, - }) - } -} - -#[cfg(test)] -mod tests { - #[test] - fn levenes_test() { - let x = vec![ - 134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0, - ]; - let y = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0]; - let result = super::LevenesTest::new(&x, &y).unwrap(); - assert_eq!(result.df, 17.0); - assert_eq!(result.estimate, 0.014721055064513417); - assert_eq!(result.p_value, 0.9048519802923365); - } -} diff --git a/extern/stattest/src/test/mann_whitney_u.rs b/extern/stattest/src/test/mann_whitney_u.rs deleted file mode 100644 index ff46a03..0000000 --- a/extern/stattest/src/test/mann_whitney_u.rs +++ /dev/null @@ -1,69 +0,0 @@ -use crate::statistics::*; -use statrs::distribution::{ContinuousCDF, Normal}; - -/// Implements the [Mann-Whitney U test](https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test), -/// also known as the Wilcoxon rank-sum test. -#[derive(Debug, Copy, Clone, PartialEq)] -pub struct MannWhitneyUTest { - estimate: (f64, f64), - effect_size: f64, - p_value: f64, -} - -impl MannWhitneyUTest { - /// Run Mann-Whitney U test/Wilcoxon rank-sum test on samples `x` and `y`. - pub fn independent(x: &[f64], y: &[f64]) -> statrs::Result { - let (ranks, tie_correction) = x.iter().chain(y).ranks(); - let n_x = x.n(); - let n_y = y.n(); - let n_xy = n_x * n_y; - - let estimate = (n_x * (n_x + 1.0)) / 2.0 - ranks[0..x.len()].iter().sum::(); - let estimate_x = n_xy + estimate; - let estimate_y = -estimate; - let estimate_small = if estimate < 0.0 { - estimate_x - } else { - estimate_y - }; - - let n = n_x + n_y; - let distribution_mean = n_xy / 2.0; - let distribution_var = (n_xy * (n + 1.0 - tie_correction as f64 / (n * (n - 1.0)))) / 12.0; - - let normal = Normal::new(distribution_mean, distribution_var.sqrt())?; - let p_value = 2.0 * normal.cdf(estimate_small); - let effect_size = 1.0 - (2.0 * estimate_small) / n_xy; - - Ok(MannWhitneyUTest { - effect_size, - estimate: (estimate_x, estimate_y), - p_value, - }) - } -} - -#[cfg(test)] -mod tests { - #[test] - fn mann_whitney_u() { - let x = vec![ - 134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0, - ]; - let y = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0]; - let test = super::MannWhitneyUTest::independent(&x, &y).unwrap(); - assert_eq!(test.estimate, (21.5, 62.5)); - assert_eq!(test.effect_size, 0.48809523809523814); - assert_eq!(test.p_value, 0.08303763193135497); - } - - #[test] - fn mann_whitney_u_2() { - let x = vec![68.0, 68.0, 59.0, 72.0, 64.0, 67.0, 70.0, 74.0]; - let y = vec![60.0, 67.0, 61.0, 62.0, 67.0, 63.0, 56.0, 58.0]; - let test = super::MannWhitneyUTest::independent(&x, &y).unwrap(); - assert_eq!(test.estimate, (9.0, 55.0)); - assert_eq!(test.effect_size, 0.71875); - assert_eq!(test.p_value, 0.01533316211294691); - } -} diff --git a/extern/stattest/src/test/mod.rs b/extern/stattest/src/test/mod.rs deleted file mode 100644 index cda457c..0000000 --- a/extern/stattest/src/test/mod.rs +++ /dev/null @@ -1,24 +0,0 @@ -//! Defines frequentist statistical tests. - -pub use self::f::*; -pub use self::levenes::*; -pub use self::mann_whitney_u::*; -pub use self::shapiro_wilk::*; -pub use self::students_t::*; -pub use self::welchs_t::*; -pub use self::wilcoxon_w::*; - -mod f; -mod levenes; -mod mann_whitney_u; -mod shapiro_wilk; -mod students_t; -mod welchs_t; -mod wilcoxon_w; - -/// Alternative hypothesis for comparing two means. -pub enum AlternativeHypothesis { - Greater, - Different, - Less, -} diff --git a/extern/stattest/src/test/shapiro_wilk.rs b/extern/stattest/src/test/shapiro_wilk.rs deleted file mode 100644 index 4684fbe..0000000 --- a/extern/stattest/src/test/shapiro_wilk.rs +++ /dev/null @@ -1,263 +0,0 @@ -use crate::distribution::ShapiroWilk; -use statrs::distribution::{ContinuousCDF, Normal}; -use statrs::function::evaluate::polynomial; -use statrs::statistics::Statistics; -use std::cmp; -use std::f64::consts::{FRAC_1_SQRT_2, FRAC_PI_3}; - -/// Implements the [Shapiro-Wilk test](https://en.wikipedia.org/wiki/Shapiro%E2%80%93Wilk_test) -/// (Shapiro & Wilk, 1965). A simplified port of the algorithm -/// described by Royston (1992, 1995). -/// -/// # References -/// -/// Royston, P. (1992). Approximating the Shapiro-Wilk W-test for non-normality. -/// Statistics and Computing, 2(3), 117–119. -/// -/// Royston, P. (1995). Remark AS R94: A Remark on Algorithm AS 181: -/// The W-test for Normality. Journal of the Royal Statistical Society. -/// Series C (Applied Statistics), 44(4), 547–551. -/// -/// Shapiro, S. S., & Wilk, M. B. (1965). An analysis of variance test for normality -/// (complete samples)†. Biometrika, 52(3–4), 591–611. -#[derive(Debug, PartialEq, Clone)] -pub struct ShapiroWilkTest { - p_value: f64, - estimate: f64, - weights: Vec, - status: ShapiroWilkStatus, -} - -/// Representation of non-fatal `IFAULT` codes (Royston, 1995). -#[derive(Debug, PartialEq, Eq, Clone)] -pub enum ShapiroWilkStatus { - /// `IFAULT = 0` (no fault) - Ok, - /// `IFAULT = 2` (n > 5000) - TooMany, -} - -/// Representation of fatal `IFAULT` codes (Royston, 1995). -/// -/// As for the other codes not listed here or in [ShapiroWilkStatus]: -/// - `IFAULT = 3` (insufficient storage for A) --- A is now allocated within the method -/// - `IFAULT = 4` (censoring while n < 20) --- censoring is not implemented in this port -/// - `IFAULT = 5` (the proportion censored > 0.8) --- censoring is not implemented in this port -/// - `IFAULT = 7` (the data are not in ascending order) --- data are now sorted within the method -#[derive(Debug, PartialEq, Eq, Clone)] -pub enum ShapiroWilkError { - /// `IFAULT = 1` (n < 3) - TooFew, - /// `IFAULT = 6` (the data have zero range) - NoDifference, - /// Should not happen - CannotMakeDistribution, -} - -static SMALL: f64 = 1E-19; // smaller for f64? -static FRAC_6_PI: f64 = 1.90985931710274; // 6/pi - -// Polynomials for estimating weights. -static C1: [f64; 6] = [0.0, 0.221157, -0.147981, -2.07119, 4.434685, -2.706056]; -static C2: [f64; 6] = [0.0, 0.042981, -0.293762, -1.752461, 5.682633, -3.582633]; -// Polynomial for estimating scaling factor. -static G: [f64; 2] = [-2.273, 0.459]; - -impl ShapiroWilkTest { - /// Run the Shapiro-Wilk test on the sample `x`. - pub fn new(x: &[f64]) -> Result { - let n = x.len(); - let mut sorted = x.to_owned(); - sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(cmp::Ordering::Equal)); - - let range = sorted.last().unwrap() - sorted[0]; - - if range.lt(&SMALL) { - return Err(ShapiroWilkError::NoDifference); - } else if n < 3 { - return Err(ShapiroWilkError::TooFew); - } - - let weights = Self::get_weights(n); - let mean = (&sorted).mean(); - - let (denominator, numerator): (f64, f64) = (0..n) - .map(|i| { - let distance = sorted[i] - mean; - (distance * distance, distance * weights[i]) - }) - .fold((0.0, 0.0), |sum, value| (sum.0 + value.0, sum.1 + value.1)); - - // Calculate complement (denominator - numerator) / denominator - // to keep precision when numerator / denominator is close to 1 - let complement = (denominator - numerator.powi(2)) / denominator; - let estimate = 1.0 - complement; - - let status = if n > 5000 { - ShapiroWilkStatus::TooMany - } else { - ShapiroWilkStatus::Ok - }; - let p_value = if n == 3 { - FRAC_6_PI * (estimate.sqrt().asin() - FRAC_PI_3).max(0.0) - } else { - let distribution = match ShapiroWilk::new(n) { - Ok(distribution) => distribution, - Err(_) => return Err(ShapiroWilkError::CannotMakeDistribution), - }; - 1.0 - distribution.cdf(if n <= 11 { - let gamma = polynomial(n as f64, &G); - -(gamma - complement.ln()).ln() - } else { - complement.ln() - }) - }; - - Ok(ShapiroWilkTest { - weights, - status, - estimate, - p_value, - }) - } - - fn get_weights(n: usize) -> Vec { - if n == 3 { - return vec![-FRAC_1_SQRT_2, 0.0, FRAC_1_SQRT_2]; - } - - let normal = Normal::new(0.0, 1.0).unwrap(); - let mut weights = vec![0.0; n]; - - let half = n / 2; - let float_n = n as f64; - let an_25 = float_n + 0.25; - - let mut sum_squared = 0_f64; - for i in 0..half { - weights[i] = normal.inverse_cdf((i as f64 + 0.625) / an_25); - weights[n - i - 1] = -weights[i]; - sum_squared += weights[i].powi(2); - } - sum_squared *= 2.0; - - let root_sum_squared = sum_squared.sqrt(); - let rsn = float_n.sqrt().recip(); - let weight0 = polynomial(rsn, &C1) - weights[0] / root_sum_squared; - - let (weights_set, scale) = if n > 5 { - let weight1 = polynomial(rsn, &C2) - weights[1] / root_sum_squared; - let scale = ((sum_squared - 2.0 * (weights[0].powi(2) + weights[1].powi(2))) - / (1.0 - 2.0 * (weight0.powi(2) + weight1.powi(2)))) - .sqrt(); - weights[1] = -weight1; - weights[n - 2] = weight1; - (2, scale) - } else { - let scale = - ((sum_squared - 2.0 * weights[0].powi(2)) / (1.0 - 2.0 * weight0.powi(2))).sqrt(); - (1, scale) - }; - - weights[0] = -weight0; - weights[n - 1] = weight0; - - for i in weights_set..half { - weights[i] /= scale; - weights[n - i - 1] = -weights[i]; - } - - weights - } -} - -#[cfg(test)] -mod tests { - #[test] - fn shapiro_wilk() { - let x = vec![ - 0.139, 0.157, 0.175, 0.256, 0.344, 0.413, 0.503, 0.577, 0.614, 0.655, 0.954, 1.392, - 1.557, 1.648, 1.690, 1.994, 2.174, 2.206, 3.245, 3.510, 3.571, 4.354, 4.980, 6.084, - 8.351, - ]; - let test = super::ShapiroWilkTest::new(&x).unwrap(); - assert_eq!(test.estimate, 0.8346662753181684); - assert_eq!(test.p_value, 0.0009134904817755807); - } - - #[test] - fn shapiro_wilk_1() { - let x = vec![ - 134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0, - ]; - let test = super::ShapiroWilkTest::new(&x).unwrap(); - assert_eq!(test.estimate, 0.9923657326481632); - assert_eq!(test.p_value, 0.9999699312420669); - } - - #[test] - fn shapiro_wilk_2() { - let x = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0]; - let test = super::ShapiroWilkTest::new(&x).unwrap(); - assert_eq!(test.estimate, 0.9980061683004456); - assert_eq!(test.p_value, 0.9999411393249124); - } - - #[test] - fn large_range() { - let x = vec![ - 0.139E100, 0.157E100, 0.175E100, 0.256E100, 0.344E100, 0.413E100, 0.503E100, 0.577E100, - 0.614E100, 0.655E100, 0.954E100, 1.392E100, 1.557E100, 1.648E100, 1.690E100, 1.994E100, - 2.174E100, 2.206E100, 3.245E100, 3.510E100, 3.571E100, 4.354E100, 4.980E100, 6.084E100, - 8.351E100, - ]; - let test = super::ShapiroWilkTest::new(&x).unwrap(); - assert_eq!(test.estimate, 0.8346662753181684); - assert_eq!(test.p_value, 0.0009134904817755807); - } - - #[test] - fn nearly_normally_distributed() { - let x = vec![ - -0.44, -0.31, -0.25, -0.21, -0.18, -0.15, -0.12, -0.10, -0.08, -0.06, -0.04, -0.02, - 0.0, 0.02, 0.04, 0.06, 0.08, 0.10, 0.12, 0.15, 0.18, 0.21, 0.25, 0.31, 0.44, - ]; - let test = super::ShapiroWilkTest::new(&x).unwrap(); - assert_eq!(test.estimate, 0.9997987717271388); - assert_eq!(test.p_value, 1.0); - } - - #[test] - fn normally_distributed() { - let x = vec![ - -0.4417998157703872, - -0.310841224215176, - -0.2544758389229413, - -0.21509882047762266, - -0.18254731741828356, - -0.1541570107663562, - -0.12852819470327187, - -0.1048199468783084, - -0.08247628697708857, - -0.061100278634889656, - -0.040388574421146996, - -0.020093702456713224, - 0.0, - 0.020093702456713224, - 0.040388574421146996, - 0.061100278634889656, - 0.08247628697708857, - 0.1048199468783084, - 0.12852819470327187, - 0.1541570107663562, - 0.18254731741828356, - 0.21509882047762266, - 0.2544758389229413, - 0.310841224215176, - 0.4417998157703872, - ]; - let test = super::ShapiroWilkTest::new(&x).unwrap(); - assert_eq!(test.estimate, 0.9999999999999999); - assert_eq!(test.p_value, 1.0); - } -} diff --git a/extern/stattest/src/test/students_t.rs b/extern/stattest/src/test/students_t.rs deleted file mode 100644 index 9b5e245..0000000 --- a/extern/stattest/src/test/students_t.rs +++ /dev/null @@ -1,89 +0,0 @@ -use crate::statistics::StatisticsExt; -use statrs::distribution::{ContinuousCDF, StudentsT}; -use statrs::statistics::Statistics; - -/// Implements [Student's t-test](https://en.wikipedia.org/wiki/Student%27s_t-test). -#[derive(Debug, Copy, Clone, PartialEq)] -pub struct StudentsTTest { - df: f64, - estimate: f64, - effect_size: f64, - p_value: f64, -} - -impl StudentsTTest { - /// Run Student's two-sample t-test on samples `x` and `y`. - pub fn independent(x: &[f64], y: &[f64]) -> statrs::Result { - let n_x = x.n(); - let n_y = y.n(); - let df = n_x + n_y; - - let effect_size = (x.mean() - y.mean()) / x.pooled_variance(y).sqrt(); - let t = effect_size / (n_x.recip() + n_y.recip()).sqrt(); - - let t_distribution = StudentsT::new(0.0, 1.0, df)?; - let p_value = 2.0 * t_distribution.cdf(-t.abs()); - - Ok(StudentsTTest { - df, - effect_size: effect_size.abs(), - estimate: t, - p_value, - }) - } - - /// Run paired Student's t-test on samples `x` and `y`. - pub fn paired(x: &[f64], y: &[f64]) -> statrs::Result { - let d: Vec<_> = x.iter().zip(y).map(|(x, y)| x - y).collect(); - let df = x.df(); - let effect_size = (&d).mean() / (&d).std_dev(); - let t = effect_size * x.n().sqrt(); - - let t_distribution = StudentsT::new(0.0, 1.0, df)?; - let p_value = 2.0 * t_distribution.cdf(-t.abs()); - - Ok(StudentsTTest { - df, - effect_size: effect_size.abs(), - estimate: t, - p_value, - }) - } -} - -#[cfg(test)] -mod tests { - #[test] - fn students_t() { - let x = vec![ - 134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0, - ]; - let y = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0]; - let test = super::StudentsTTest::independent(&x, &y).unwrap(); - assert_eq!(test.estimate, 1.8914363974423305); - assert_eq!(test.effect_size, 0.8995574392432595); - assert_eq!(test.p_value, 0.073911127032672); - } - - #[test] - fn reverse() { - let x = vec![ - 134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0, - ]; - let y = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0]; - let test = super::StudentsTTest::independent(&y, &x).unwrap(); - assert_eq!(test.estimate, -1.8914363974423305); - assert_eq!(test.effect_size, 0.8995574392432595); - assert_eq!(test.p_value, 0.073911127032672); - } - - #[test] - fn paired() { - let x = vec![8.0, 6.0, 5.5, 11.0, 8.5, 5.0, 6.0, 6.0]; - let y = vec![8.5, 9.0, 6.5, 10.5, 9.0, 7.0, 6.5, 7.0]; - let test = super::StudentsTTest::paired(&x, &y).unwrap(); - assert_eq!(test.estimate, -2.645751311064591); - assert_eq!(test.effect_size, 0.9354143466934856); - assert_eq!(test.p_value, 0.03314550026377362); - } -} diff --git a/extern/stattest/src/test/welchs_t.rs b/extern/stattest/src/test/welchs_t.rs deleted file mode 100644 index 7a76d95..0000000 --- a/extern/stattest/src/test/welchs_t.rs +++ /dev/null @@ -1,73 +0,0 @@ -use crate::statistics::StatisticsExt; -use statrs::distribution::{ContinuousCDF, StudentsT}; -use statrs::statistics::Statistics; - -/// Implements [Welch's t-test](https://en.wikipedia.org/wiki/Welch's_t-test) (Welch, 1947). -/// -/// # References -/// -/// Welch, B. L. (1947). The generalisation of student’s problems when several different population -/// variances are involved. Biometrika, 34(1–2), 28–35. -#[derive(Debug, Copy, Clone, PartialEq)] -pub struct WelchsTTest { - df: f64, - estimate: f64, - effect_size: f64, - p_value: f64, -} - -impl WelchsTTest { - /// Run Welch's two-sample t-test on samples `x` and `y`. - pub fn independent(x: &[f64], y: &[f64]) -> statrs::Result { - let var_x = x.variance(); - let var_y = y.variance(); - let var_x_n = var_x / x.n(); - let var_y_n = var_y / y.n(); - let linear_combination = var_x_n + var_y_n; - - let df = linear_combination.powi(2) / (var_x_n.powi(2) / x.df() + var_y_n.powi(2) / y.df()); - - let mean_difference = x.mean() - y.mean(); - let effect_size = mean_difference.abs() / ((var_x + var_y) / 2.0).sqrt(); - let t = mean_difference / linear_combination.sqrt(); - - let t_distribution = StudentsT::new(0.0, 1.0, df)?; - let p_value = 2.0 * t_distribution.cdf(-t.abs()); - - Ok(WelchsTTest { - df, - effect_size, - estimate: t, - p_value, - }) - } -} - -#[cfg(test)] -mod tests { - #[test] - fn students_t() { - let x = vec![ - 134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0, - ]; - let y = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0]; - let test = super::WelchsTTest::independent(&x, &y).unwrap(); - assert_eq!(test.df, 13.081702113268564); - assert_eq!(test.estimate, 1.9107001042454415); - assert_eq!(test.effect_size, 0.904358069450997); - assert_eq!(test.p_value, 0.0782070409214568); - } - - #[test] - fn reverse() { - let x = vec![ - 134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0, - ]; - let y = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0]; - let test = super::WelchsTTest::independent(&y, &x).unwrap(); - assert_eq!(test.df, 13.081702113268564); - assert_eq!(test.estimate, -1.9107001042454415); - assert_eq!(test.effect_size, 0.904358069450997); - assert_eq!(test.p_value, 0.0782070409214568); - } -} diff --git a/extern/stattest/src/test/wilcoxon_w.rs b/extern/stattest/src/test/wilcoxon_w.rs deleted file mode 100644 index db1d89f..0000000 --- a/extern/stattest/src/test/wilcoxon_w.rs +++ /dev/null @@ -1,70 +0,0 @@ -use crate::distribution::SignedRank; -use crate::statistics::*; -use statrs::distribution::ContinuousCDF; - -/// Implements the [Wilcoxon signed rank test](https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test). -#[derive(Debug, Copy, Clone, PartialEq)] -pub struct WilcoxonWTest { - estimate: (f64, f64), - effect_size: f64, - p_value: f64, -} - -impl WilcoxonWTest { - /// Run Wilcoxon signed rank test on samples `x` and `y`. - pub fn paired(x: &[f64], y: &[f64]) -> statrs::Result { - let d: Vec<_> = x.iter().zip(y).map(|(x, y)| (x - y).abs()).collect(); - let (ranks, tie_correction) = (&d).ranks(); - let mut estimate = (0.0, 0.0); - let mut zeroes = 0; - - for ((x, y), rank) in x.iter().zip(y).zip(ranks) { - if x < y { - estimate.0 += rank; - } else if x > y { - estimate.1 += rank; - } else { - zeroes += 1; - } - } - - let estimate_small = if estimate.0 < estimate.1 { - estimate.0 - } else { - estimate.1 - }; - let distribution = SignedRank::new(d.len(), zeroes, tie_correction)?; - let p_value = distribution.cdf(estimate_small); - - let n = (&d).n(); - let rank_sum = n * (n + 1.0) / 2.0; - let effect_size = estimate_small / rank_sum; - - Ok(WilcoxonWTest { - effect_size, - estimate, - p_value, - }) - } -} - -#[cfg(test)] -mod tests { - #[test] - fn paired() { - let x = vec![8.0, 6.0, 5.5, 11.0, 8.5, 5.0, 6.0, 6.0]; - let y = vec![8.5, 9.0, 6.5, 10.5, 9.0, 7.0, 6.5, 7.0]; - let test = super::WilcoxonWTest::paired(&x, &y).unwrap(); - assert_eq!(test.estimate, (33.5, 2.5)); - assert_eq!(test.p_value, 0.027785782704095215); - } - - #[test] - fn paired_2() { - let x = vec![209.0, 200.0, 177.0, 169.0, 159.0, 169.0, 187.0, 198.0]; - let y = vec![151.0, 168.0, 147.0, 164.0, 166.0, 163.0, 176.0, 188.0]; - let test = super::WilcoxonWTest::paired(&x, &y).unwrap(); - assert_eq!(test.estimate, (3.0, 33.0)); - assert_eq!(test.p_value, 0.0390625); - } -}