no need for useless git repo in source
parent
10279d1291
commit
03c8cbbfd3
|
@ -1,22 +0,0 @@
|
|||
name: Rust
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: cargo build --verbose
|
||||
- name: Run tests
|
||||
run: cargo test --verbose
|
|
@ -1,3 +0,0 @@
|
|||
/dev
|
||||
/target
|
||||
Cargo.lock
|
|
@ -1,12 +0,0 @@
|
|||
[package]
|
||||
name = "stattest"
|
||||
version = "0.0.0"
|
||||
authors = ["Lars Willighagen <lars.willighagen@gmail.com>"]
|
||||
edition = "2018"
|
||||
|
||||
[lib]
|
||||
doctest = false
|
||||
|
||||
[dependencies]
|
||||
statrs = { git = "https://github.com/larsgw/statrs", branch = "patch-1" }
|
||||
rand = "0.7.3"
|
|
@ -1,21 +0,0 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2021 Lars Willighagen
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
|
@ -1,50 +0,0 @@
|
|||
# Stat-Test
|
||||
|
||||
This crate provides statistical tests and relevant utilities.
|
||||
|
||||
---
|
||||
|
||||
## Status
|
||||
|
||||
Currently a number of tests are implemented for comparing the means of two
|
||||
independent samples, and for checking assumptions regarding the former tests.
|
||||
However, these tests are all two-sample tests that do not yet support specifying
|
||||
alternative hypotheses.
|
||||
|
||||
### Comparison of independent means
|
||||
|
||||
- **Student's t-test**
|
||||
`stattest::test::StudentsTTest`
|
||||
*Assumptions:* normality, homogeneity of variances
|
||||
|
||||
- **Welch's t-test**
|
||||
`stattest::test::WelchsTTest`
|
||||
*Assumptions:* normality
|
||||
|
||||
- **Mann-Whitney U-test/Wilcoxon rank-sum test**
|
||||
`stattest::test::MannWhitneyUTest`
|
||||
*Assumptions:* –
|
||||
|
||||
### Comparison of paired observations
|
||||
|
||||
- **Student's t-test**
|
||||
`stattest::test::StudentsTTest`
|
||||
*Assumptions:* normality, homogeneity of variances
|
||||
|
||||
- **Wilcoxon signed rank test**
|
||||
`stattest::test::WilcoxonWTest`
|
||||
*Assumptions:* –
|
||||
|
||||
### Assumption tests
|
||||
|
||||
- **Levene's test**
|
||||
`stattest::test::LevenesTest`
|
||||
*Tests:* homogeneity of variances
|
||||
|
||||
- **F-test of equality of variances**
|
||||
`stattest::test::FTest`
|
||||
*Tests:* homogeneity of variances
|
||||
|
||||
- **Shapiro-Wilk test**
|
||||
`stattest::test::ShapiroWilkTest`
|
||||
*Tests:* normality
|
|
@ -1,7 +0,0 @@
|
|||
//! Defines interfaces for creating and approximating statistical distributions.
|
||||
|
||||
pub use self::shapiro_wilk::*;
|
||||
pub use self::signed_rank::*;
|
||||
|
||||
mod shapiro_wilk;
|
||||
mod signed_rank;
|
|
@ -1,139 +0,0 @@
|
|||
use rand::Rng;
|
||||
use statrs::distribution::{Continuous, ContinuousCDF, Normal};
|
||||
use statrs::function::evaluate::polynomial;
|
||||
use statrs::statistics::*;
|
||||
use statrs::Result;
|
||||
|
||||
// Polynomials used to calculate the mean and standard deviation
|
||||
// of a normal distribution that approximates the distribution of
|
||||
// the W statistic of the Shapiro-Wilk test (Royston, 1992).
|
||||
static C3: [f64; 4] = [0.5440, -0.39978, 0.025054, -0.0006714];
|
||||
static C4: [f64; 4] = [1.3822, -0.77857, 0.062767, -0.0020322];
|
||||
static C5: [f64; 4] = [-1.5861, -0.31082, -0.083751, 0.0038915];
|
||||
static C6: [f64; 3] = [-0.4803, -0.082676, 0.0030302];
|
||||
|
||||
/// Implements an approximation of the distribution of the W
|
||||
/// statistic of the [Shapiro-Wilk test](https://en.wikipedia.org/wiki/Shapiro%E2%80%93Wilk_test).
|
||||
///
|
||||
/// # References
|
||||
///
|
||||
/// Royston, P. (1992). Approximating the Shapiro-Wilk W-test for non-normality.
|
||||
/// Statistics and Computing, 2(3), 117–119. <https://doi.org/10.1007/BF01891203>
|
||||
///
|
||||
/// Shapiro, S. S., & Wilk, M. B. (1965). An analysis of variance test for normality
|
||||
/// (complete samples)†. Biometrika, 52(3–4), 591–611. <https://doi.org/10.1093/biomet/52.3-4.591>
|
||||
#[derive(Debug, Copy, Clone, PartialEq)]
|
||||
pub struct ShapiroWilk {
|
||||
normal: Normal,
|
||||
n: usize,
|
||||
mean: f64,
|
||||
std_dev: f64,
|
||||
}
|
||||
|
||||
impl ShapiroWilk {
|
||||
/// Create a new approximation for the distribution of W for a given sample size `n`.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use stattest::distribution::ShapiroWilk;
|
||||
///
|
||||
/// let result = ShapiroWilk::new(25);
|
||||
/// assert!(result.is_ok());
|
||||
/// ```
|
||||
pub fn new(n: usize) -> Result<ShapiroWilk> {
|
||||
let float_n = n as f64;
|
||||
|
||||
let (mean, std_dev) = if n <= 11 {
|
||||
(polynomial(float_n, &C3), polynomial(float_n, &C4).exp())
|
||||
} else {
|
||||
let log_n = float_n.ln();
|
||||
(polynomial(log_n, &C5), polynomial(log_n, &C6).exp())
|
||||
};
|
||||
|
||||
let result = Normal::new(mean, std_dev);
|
||||
match result {
|
||||
Ok(normal) => Ok(ShapiroWilk {
|
||||
normal,
|
||||
n,
|
||||
mean,
|
||||
std_dev,
|
||||
}),
|
||||
Err(error) => Err(error),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the sample size associated with this distribution.
|
||||
pub fn n(&self) -> usize {
|
||||
self.n
|
||||
}
|
||||
}
|
||||
|
||||
impl ::rand::distributions::Distribution<f64> for ShapiroWilk {
|
||||
fn sample<R: Rng + ?Sized>(&self, r: &mut R) -> f64 {
|
||||
::rand::distributions::Distribution::sample(&self.normal, r)
|
||||
}
|
||||
}
|
||||
|
||||
impl ContinuousCDF<f64, f64> for ShapiroWilk {
|
||||
fn cdf(&self, x: f64) -> f64 {
|
||||
self.normal.cdf(x)
|
||||
}
|
||||
|
||||
fn inverse_cdf(&self, x: f64) -> f64 {
|
||||
self.normal.inverse_cdf(x)
|
||||
}
|
||||
}
|
||||
|
||||
impl Min<f64> for ShapiroWilk {
|
||||
fn min(&self) -> f64 {
|
||||
self.normal.min()
|
||||
}
|
||||
}
|
||||
|
||||
impl Max<f64> for ShapiroWilk {
|
||||
fn max(&self) -> f64 {
|
||||
self.normal.max()
|
||||
}
|
||||
}
|
||||
|
||||
impl Distribution<f64> for ShapiroWilk {
|
||||
fn mean(&self) -> Option<f64> {
|
||||
self.normal.mean()
|
||||
}
|
||||
fn variance(&self) -> Option<f64> {
|
||||
self.normal.variance()
|
||||
}
|
||||
fn entropy(&self) -> Option<f64> {
|
||||
self.normal.entropy()
|
||||
}
|
||||
fn skewness(&self) -> Option<f64> {
|
||||
self.normal.skewness()
|
||||
}
|
||||
}
|
||||
|
||||
impl Mode<Option<f64>> for ShapiroWilk {
|
||||
fn mode(&self) -> Option<f64> {
|
||||
self.normal.mode()
|
||||
}
|
||||
}
|
||||
|
||||
impl Continuous<f64, f64> for ShapiroWilk {
|
||||
fn pdf(&self, x: f64) -> f64 {
|
||||
self.normal.pdf(x)
|
||||
}
|
||||
|
||||
fn ln_pdf(&self, x: f64) -> f64 {
|
||||
self.normal.ln_pdf(x)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
fn n_25() {
|
||||
let distribution = super::ShapiroWilk::new(25).unwrap();
|
||||
assert_eq!(distribution.mean, -3.3245620722111475);
|
||||
assert_eq!(distribution.std_dev, 0.4891787150186814);
|
||||
}
|
||||
}
|
|
@ -1,313 +0,0 @@
|
|||
use rand::Rng;
|
||||
use statrs::distribution::{Continuous, ContinuousCDF, Normal};
|
||||
use statrs::function::factorial::binomial;
|
||||
use statrs::statistics::*;
|
||||
use statrs::Result;
|
||||
|
||||
/// Implements an approximation of the distribution of the W
|
||||
/// statistic of the [Wilcoxon signed rank test](https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test).
|
||||
///
|
||||
/// # References
|
||||
///
|
||||
/// Wilcoxon, F. (1945). Individual Comparisons by Ranking Methods.
|
||||
/// Biometrics Bulletin, 1(6), 80–83. <https://doi.org/10.2307/3001968>
|
||||
#[derive(Debug, Copy, Clone, PartialEq)]
|
||||
pub struct SignedRank {
|
||||
approximation: Approximation,
|
||||
n: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq)]
|
||||
enum Approximation {
|
||||
Normal(Normal),
|
||||
Exact,
|
||||
}
|
||||
|
||||
impl SignedRank {
|
||||
/// Determine whether to use an approximation or an exact distribution of W for a given sample size `n`
|
||||
/// and given a number of paired observations with a non-zero difference `non_zero`.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use stattest::distribution::SignedRank;
|
||||
///
|
||||
/// let result = SignedRank::new(25, 25);
|
||||
/// assert!(result.is_ok());
|
||||
/// ```
|
||||
pub fn new(n: usize, zeroes: usize, tie_correction: usize) -> Result<SignedRank> {
|
||||
if zeroes > 0 || tie_correction > 0 || (n - zeroes) >= 20 {
|
||||
SignedRank::approximate(n - zeroes, tie_correction)
|
||||
} else {
|
||||
SignedRank::exact(n)
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new approximation for the distribution of W for a given sample size `n`
|
||||
/// and given a number of paired observations with a non-zero difference `non_zero`.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use stattest::distribution::SignedRank;
|
||||
///
|
||||
/// let result = SignedRank::approximate(25, 25);
|
||||
/// assert!(result.is_ok());
|
||||
/// ```
|
||||
pub fn approximate(n: usize, tie_correction: usize) -> Result<SignedRank> {
|
||||
let mean = (n * (n + 1)) as f64 / 4.0;
|
||||
let var = mean * (2 * n + 1) as f64 / 6.0 - tie_correction as f64 / 48.0;
|
||||
let normal = Normal::new(mean, var.sqrt())?;
|
||||
|
||||
Ok(SignedRank {
|
||||
n,
|
||||
approximation: Approximation::Normal(normal),
|
||||
})
|
||||
}
|
||||
|
||||
/// Create a new exact distribution of W for a given sample size `n`.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use stattest::distribution::SignedRank;
|
||||
///
|
||||
/// let result = SignedRank::exact(25);
|
||||
/// assert!(result.is_ok());
|
||||
/// ```
|
||||
pub fn exact(n: usize) -> Result<SignedRank> {
|
||||
Ok(SignedRank {
|
||||
n,
|
||||
approximation: Approximation::Exact,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn partitions(number: usize, parts: usize, size: usize) -> usize {
|
||||
if number == parts {
|
||||
1
|
||||
} else if number == 0 || parts == 0 || size == 0 {
|
||||
0
|
||||
} else {
|
||||
(1..=std::cmp::min(number, size))
|
||||
.map(|highest_part| partitions(number - highest_part, parts - 1, highest_part))
|
||||
.sum()
|
||||
}
|
||||
}
|
||||
|
||||
impl ::rand::distributions::Distribution<f64> for SignedRank {
|
||||
fn sample<R: Rng + ?Sized>(&self, r: &mut R) -> f64 {
|
||||
match self.approximation {
|
||||
Approximation::Normal(normal) => {
|
||||
::rand::distributions::Distribution::sample(&normal, r)
|
||||
}
|
||||
Approximation::Exact => {
|
||||
todo!();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ContinuousCDF<f64, f64> for SignedRank {
|
||||
fn cdf(&self, x: f64) -> f64 {
|
||||
match self.approximation {
|
||||
Approximation::Normal(normal) => 2.0 * normal.cdf(x),
|
||||
Approximation::Exact => {
|
||||
let r = x.round() as usize;
|
||||
let mut sum = 1;
|
||||
|
||||
for n in 1..=self.n {
|
||||
let n_choose_2 = binomial(n as u64, 2) as usize;
|
||||
if n_choose_2 > r {
|
||||
continue;
|
||||
}
|
||||
for i in n..=(r - n_choose_2) {
|
||||
sum += partitions(i, n, self.n - n + 1);
|
||||
}
|
||||
}
|
||||
|
||||
sum as f64 / 2_f64.powi(self.n as i32 - 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn inverse_cdf(&self, x: f64) -> f64 {
|
||||
match self.approximation {
|
||||
Approximation::Normal(normal) => normal.inverse_cdf(x),
|
||||
Approximation::Exact => {
|
||||
todo!();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Min<f64> for SignedRank {
|
||||
fn min(&self) -> f64 {
|
||||
0.0
|
||||
}
|
||||
}
|
||||
|
||||
impl Max<f64> for SignedRank {
|
||||
fn max(&self) -> f64 {
|
||||
(self.n * (self.n + 1) / 2) as f64
|
||||
}
|
||||
}
|
||||
|
||||
impl Distribution<f64> for SignedRank {
|
||||
fn mean(&self) -> Option<f64> {
|
||||
match self.approximation {
|
||||
Approximation::Normal(normal) => normal.mean(),
|
||||
Approximation::Exact => Some((self.n * (self.n + 1)) as f64 / 4.0),
|
||||
}
|
||||
}
|
||||
|
||||
fn variance(&self) -> Option<f64> {
|
||||
match self.approximation {
|
||||
Approximation::Normal(normal) => normal.variance(),
|
||||
Approximation::Exact => Some((self.n * (self.n + 1) * (2 * self.n + 1)) as f64 / 24.0),
|
||||
}
|
||||
}
|
||||
|
||||
fn entropy(&self) -> Option<f64> {
|
||||
match self.approximation {
|
||||
Approximation::Normal(normal) => normal.entropy(),
|
||||
Approximation::Exact => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn skewness(&self) -> Option<f64> {
|
||||
match self.approximation {
|
||||
Approximation::Normal(normal) => normal.skewness(),
|
||||
Approximation::Exact => Some(0.0),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Median<f64> for SignedRank {
|
||||
fn median(&self) -> f64 {
|
||||
match self.approximation {
|
||||
Approximation::Normal(normal) => normal.median(),
|
||||
Approximation::Exact => self.mean().unwrap(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Mode<Option<f64>> for SignedRank {
|
||||
fn mode(&self) -> Option<f64> {
|
||||
match self.approximation {
|
||||
Approximation::Normal(normal) => normal.mode(),
|
||||
Approximation::Exact => self.mean(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Continuous<f64, f64> for SignedRank {
|
||||
fn pdf(&self, x: f64) -> f64 {
|
||||
match self.approximation {
|
||||
Approximation::Normal(normal) => normal.pdf(x),
|
||||
Approximation::Exact => {
|
||||
let r = x.round() as usize;
|
||||
let mut sum = 0;
|
||||
|
||||
for n in 1..=self.n {
|
||||
let n_choose_2 = binomial(n as u64, 2) as usize;
|
||||
if n_choose_2 > r {
|
||||
break;
|
||||
}
|
||||
sum += partitions(r - n_choose_2, n, self.n - n + 1);
|
||||
}
|
||||
|
||||
sum as f64 / 2_f64.powi(self.n as i32)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn ln_pdf(&self, x: f64) -> f64 {
|
||||
match self.approximation {
|
||||
Approximation::Normal(normal) => normal.ln_pdf(x),
|
||||
Approximation::Exact => self.pdf(x).ln(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use statrs::distribution::{Continuous, ContinuousCDF};
|
||||
use statrs::statistics::Distribution;
|
||||
|
||||
#[test]
|
||||
fn n_20() {
|
||||
let distribution = super::SignedRank::new(20, 0, 0).unwrap();
|
||||
assert_eq!(distribution.mean(), Some(105.0));
|
||||
assert_eq!(distribution.std_dev(), Some(26.78619047195775));
|
||||
assert_eq!(distribution.cdf(50.0), 0.04004379807046464);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn n_30() {
|
||||
let distribution = super::SignedRank::new(30, 0, 0).unwrap();
|
||||
assert_eq!(distribution.mean(), Some(232.5));
|
||||
assert_eq!(distribution.std_dev(), Some(48.61841215013094));
|
||||
assert_eq!(distribution.cdf(150.0), 0.08971783777126728);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn n_20_exact() {
|
||||
let distribution = super::SignedRank::exact(20).unwrap();
|
||||
assert_eq!(distribution.cdf(50.0), 0.039989471435546875);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn n_11() {
|
||||
let distribution = super::SignedRank::exact(11).unwrap();
|
||||
assert_eq!(distribution.cdf(11.0), 0.0537109375);
|
||||
assert_eq!(distribution.cdf(7.0), 0.0185546875);
|
||||
assert_eq!(distribution.cdf(5.0), 0.009765625);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn n_10() {
|
||||
let distribution = super::SignedRank::exact(10).unwrap();
|
||||
assert_eq!(distribution.cdf(8.0), 0.048828125);
|
||||
assert_eq!(distribution.cdf(5.0), 0.01953125);
|
||||
assert_eq!(distribution.cdf(3.0), 0.009765625);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn n_9() {
|
||||
let distribution = super::SignedRank::exact(9).unwrap();
|
||||
assert_eq!(distribution.cdf(6.0), 0.0546875);
|
||||
assert_eq!(distribution.cdf(3.0), 0.01953125);
|
||||
assert_eq!(distribution.cdf(2.0), 0.01171875);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn n_8() {
|
||||
let distribution = super::SignedRank::exact(8).unwrap();
|
||||
assert_eq!(distribution.cdf(4.0), 0.0546875);
|
||||
assert_eq!(distribution.cdf(3.0), 0.0390625);
|
||||
assert_eq!(distribution.cdf(2.5), 0.0390625);
|
||||
assert_eq!(distribution.cdf(2.0), 0.0234375);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn n_8_pdf() {
|
||||
let distribution = super::SignedRank::exact(8).unwrap();
|
||||
assert_eq!(distribution.pdf(4.0), 0.0078125);
|
||||
assert_eq!(distribution.pdf(3.0), 0.0078125);
|
||||
assert_eq!(distribution.pdf(2.0), 0.00390625);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn n_8_ties() {
|
||||
let distribution = super::SignedRank::new(8, 0, 7).unwrap();
|
||||
assert_eq!(distribution.cdf(3.0), 0.03542823032427003);
|
||||
assert_eq!(distribution.cdf(2.5), 0.029739401378297385);
|
||||
assert_eq!(distribution.cdf(2.0), 0.024854396634115632);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn partition() {
|
||||
assert_eq!(super::partitions(7, 3, 5), 4);
|
||||
}
|
||||
}
|
|
@ -1,3 +0,0 @@
|
|||
pub mod distribution;
|
||||
pub mod statistics;
|
||||
pub mod test;
|
|
@ -1,73 +0,0 @@
|
|||
use crate::statistics::StatisticsExt;
|
||||
use statrs::statistics::Statistics;
|
||||
use std::borrow::Borrow;
|
||||
use std::f64;
|
||||
|
||||
impl<T> StatisticsExt<f64> for T
|
||||
where
|
||||
T: IntoIterator + Clone,
|
||||
T::Item: Borrow<f64>,
|
||||
{
|
||||
fn n(self) -> f64 {
|
||||
self.into_iter().count() as f64
|
||||
}
|
||||
|
||||
fn df(self) -> f64 {
|
||||
self.n() - 1.0
|
||||
}
|
||||
|
||||
fn pooled_variance(self, other: Self) -> f64 {
|
||||
let df_x = self.clone().df();
|
||||
let df_y = other.clone().df();
|
||||
let var_x = self.variance();
|
||||
let var_y = other.variance();
|
||||
|
||||
(df_x * var_x + df_y * var_y) / (df_x + df_y)
|
||||
}
|
||||
|
||||
fn pooled_std_dev(self, other: Self) -> f64 {
|
||||
self.pooled_variance(other).sqrt()
|
||||
}
|
||||
|
||||
fn variance_ratio(self, other: Self) -> f64 {
|
||||
self.variance() / other.variance()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[allow(dead_code)]
|
||||
fn round(number: f64, digits: Option<i32>) -> f64 {
|
||||
match digits {
|
||||
Some(digits) => {
|
||||
let factor = 10_f64.powi(digits);
|
||||
(number * factor).round() / factor
|
||||
}
|
||||
None => number.round(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pooled_variance() {
|
||||
let x = vec![
|
||||
134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0,
|
||||
];
|
||||
let y = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0];
|
||||
assert_eq!(
|
||||
round(super::StatisticsExt::pooled_variance(&x, &y), Some(3)),
|
||||
446.118
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pooled_std_dev() {
|
||||
let x = vec![
|
||||
134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0,
|
||||
];
|
||||
let y = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0];
|
||||
assert_eq!(
|
||||
round(super::StatisticsExt::pooled_std_dev(&x, &y), Some(3)),
|
||||
21.121
|
||||
);
|
||||
}
|
||||
}
|
|
@ -1,9 +0,0 @@
|
|||
//! Provides traits for statistical computation.
|
||||
|
||||
pub use self::iter_statistics_ext::*;
|
||||
pub use self::ranks::*;
|
||||
pub use self::statistics_ext::*;
|
||||
|
||||
mod iter_statistics_ext;
|
||||
mod ranks;
|
||||
mod statistics_ext;
|
|
@ -1,155 +0,0 @@
|
|||
use std::borrow::Borrow;
|
||||
use std::iter::FusedIterator;
|
||||
|
||||
/// For the ranking of groups of variables.
|
||||
pub trait Ranks<T> {
|
||||
/// Returns a vector of ranks.
|
||||
fn ranks(self) -> (Vec<T>, usize);
|
||||
}
|
||||
|
||||
impl<T> Ranks<f64> for T
|
||||
where
|
||||
T: IntoIterator,
|
||||
T::Item: Borrow<f64>,
|
||||
{
|
||||
fn ranks(self) -> (Vec<f64>, usize) {
|
||||
let mut observations: Vec<_> = self.into_iter().map(|x| *x.borrow()).enumerate().collect();
|
||||
observations
|
||||
.sort_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
|
||||
|
||||
let (sorted_indices, sorted_values): (Vec<_>, Vec<_>) = observations.into_iter().unzip();
|
||||
let groups = DedupWithCount::new(sorted_values.iter());
|
||||
|
||||
let mut resolved_ties = ResolveTies::new(groups);
|
||||
let mut ranks = vec![0.0; sorted_values.len()];
|
||||
|
||||
for (rank, old_index) in (&mut resolved_ties).zip(sorted_indices) {
|
||||
ranks[old_index] = rank;
|
||||
}
|
||||
|
||||
(ranks, resolved_ties.tie_correction())
|
||||
}
|
||||
}
|
||||
|
||||
struct ResolveTies<I, T>
|
||||
where
|
||||
I: Iterator<Item = (usize, T)>,
|
||||
{
|
||||
iter: I,
|
||||
index: usize,
|
||||
left: usize,
|
||||
resolved: Option<f64>,
|
||||
tie_correction: usize,
|
||||
}
|
||||
|
||||
impl<I, T> ResolveTies<I, T>
|
||||
where
|
||||
I: Iterator<Item = (usize, T)>,
|
||||
{
|
||||
fn new(iter: I) -> Self {
|
||||
ResolveTies {
|
||||
iter,
|
||||
index: 0,
|
||||
left: 0,
|
||||
resolved: None,
|
||||
tie_correction: 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn tie_correction(&self) -> usize {
|
||||
self.tie_correction
|
||||
}
|
||||
}
|
||||
|
||||
impl<I, T> Iterator for ResolveTies<I, T>
|
||||
where
|
||||
I: Iterator<Item = (usize, T)>,
|
||||
{
|
||||
type Item = f64;
|
||||
|
||||
fn next(&mut self) -> Option<f64> {
|
||||
if self.left > 0 {
|
||||
self.left -= 1;
|
||||
self.index += 1;
|
||||
self.resolved
|
||||
} else {
|
||||
match self.iter.next() {
|
||||
Some((count, _)) => {
|
||||
self.resolved = Some(((1.0 + count as f64) / 2.0) + self.index as f64);
|
||||
self.left = count - 1;
|
||||
self.index += 1;
|
||||
self.tie_correction += count.pow(3) - count;
|
||||
self.resolved
|
||||
}
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
let (low, _high) = self.iter.size_hint();
|
||||
(low, None)
|
||||
}
|
||||
}
|
||||
|
||||
impl<I, T> FusedIterator for ResolveTies<I, T> where I: Iterator<Item = (usize, T)> {}
|
||||
|
||||
struct DedupWithCount<I>
|
||||
where
|
||||
I: Iterator,
|
||||
I::Item: PartialEq,
|
||||
{
|
||||
iter: I,
|
||||
peek: Option<I::Item>,
|
||||
}
|
||||
|
||||
impl<I> DedupWithCount<I>
|
||||
where
|
||||
I: Iterator,
|
||||
I::Item: PartialEq,
|
||||
{
|
||||
fn new(mut iter: I) -> Self {
|
||||
DedupWithCount {
|
||||
peek: iter.next(),
|
||||
iter,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<I> Iterator for DedupWithCount<I>
|
||||
where
|
||||
I: Iterator,
|
||||
I::Item: PartialEq,
|
||||
{
|
||||
type Item = (usize, I::Item);
|
||||
|
||||
fn next(&mut self) -> Option<(usize, I::Item)> {
|
||||
let mut count = 1;
|
||||
loop {
|
||||
if self.peek.is_some() {
|
||||
let next = self.iter.next();
|
||||
|
||||
if self.peek == next {
|
||||
count += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
let value = match next {
|
||||
Some(value) => self.peek.replace(value).unwrap(),
|
||||
None => self.peek.take().unwrap(),
|
||||
};
|
||||
|
||||
break Some((count, value));
|
||||
} else {
|
||||
break None;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
let (low, high) = self.iter.size_hint();
|
||||
(if low == 0 { 0 } else { 1 }, high)
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: Iterator> FusedIterator for DedupWithCount<I> where I::Item: PartialEq {}
|
|
@ -1,15 +0,0 @@
|
|||
/// Provides additional statistical utilities, complementing [statrs::statistics::Statistics].
|
||||
pub trait StatisticsExt<T> {
|
||||
/// Returns the amount of observations.
|
||||
fn n(self) -> T;
|
||||
/// Returns the degrees of freedom of the data.
|
||||
fn df(self) -> T;
|
||||
|
||||
/// Returns the pooled variance.
|
||||
fn pooled_variance(self, other: Self) -> T;
|
||||
/// Returns the pooled standard deviation.
|
||||
fn pooled_std_dev(self, other: Self) -> T;
|
||||
|
||||
/// Returns the ratio between two variances.
|
||||
fn variance_ratio(self, other: Self) -> T;
|
||||
}
|
|
@ -1,59 +0,0 @@
|
|||
use crate::statistics::StatisticsExt;
|
||||
use statrs::distribution::{ContinuousCDF, FisherSnedecor};
|
||||
|
||||
/// Implements the [F-test of equality of variances](https://en.wikipedia.org/wiki/F-test_of_equality_of_variances).
|
||||
#[derive(Debug, Copy, Clone, PartialEq)]
|
||||
pub struct FTest {
|
||||
df: (f64, f64),
|
||||
estimate: f64,
|
||||
p_value: f64,
|
||||
}
|
||||
|
||||
impl FTest {
|
||||
/// Carry out the F-test of equality of variances on the samples `x` and `y`.
|
||||
pub fn new(x: &[f64], y: &[f64]) -> statrs::Result<FTest> {
|
||||
let f = x.variance_ratio(y);
|
||||
let df = (x.df(), y.df());
|
||||
|
||||
let distribution = FisherSnedecor::new(df.0, df.1)?;
|
||||
let probability = distribution.cdf(f);
|
||||
let p_value = if f.gt(&1.0) {
|
||||
1.0 - probability
|
||||
} else {
|
||||
probability
|
||||
};
|
||||
|
||||
Ok(FTest {
|
||||
df,
|
||||
estimate: f,
|
||||
p_value,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
fn f_test() {
|
||||
let x = vec![
|
||||
134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0,
|
||||
];
|
||||
let y = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0];
|
||||
let result = super::FTest::new(&x, &y).unwrap();
|
||||
assert_eq!(result.df, (11.0, 6.0));
|
||||
assert_eq!(result.estimate, 1.0755200911940725);
|
||||
assert_eq!(result.p_value, 0.4893961256182331);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn f_test_reverse() {
|
||||
let x = vec![
|
||||
134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0,
|
||||
];
|
||||
let y = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0];
|
||||
let result = super::FTest::new(&y, &x).unwrap();
|
||||
assert_eq!(result.df, (6.0, 11.0));
|
||||
assert_eq!(result.estimate, 0.9297827239003709);
|
||||
assert_eq!(result.p_value, 0.48939612561823265);
|
||||
}
|
||||
}
|
|
@ -1,64 +0,0 @@
|
|||
use crate::statistics::StatisticsExt;
|
||||
use statrs::distribution::{ContinuousCDF, FisherSnedecor};
|
||||
use statrs::statistics::Statistics;
|
||||
|
||||
/// Implements [Levene's test](https://en.wikipedia.org/wiki/Levene%27s_test) (Brown & Forsythe, 1974).
|
||||
///
|
||||
/// # References
|
||||
///
|
||||
/// Brown, M. B., & Forsythe, A. B. (1974). Robust Tests for the Equality of Variances.
|
||||
/// Journal of the American Statistical Association, 69(346), 364–367. <https://doi.org/10.2307/2285659>
|
||||
#[derive(Debug, Copy, Clone, PartialEq)]
|
||||
pub struct LevenesTest {
|
||||
df: f64,
|
||||
estimate: f64,
|
||||
p_value: f64,
|
||||
}
|
||||
|
||||
impl LevenesTest {
|
||||
/// Run Levene's test on the samples `x` and `y`.
|
||||
pub fn new(x: &[f64], y: &[f64]) -> statrs::Result<LevenesTest> {
|
||||
let n_x = x.n();
|
||||
let n_y = y.n();
|
||||
let diff_x = x.iter().map(|xi| (xi - x.mean()).abs());
|
||||
let diff_y = y.iter().map(|yi| (yi - y.mean()).abs());
|
||||
|
||||
let mean_diff_x = diff_x.clone().mean();
|
||||
let mean_diff_y = diff_y.clone().mean();
|
||||
let mean_diff = Iterator::chain(diff_x.clone(), diff_y.clone()).mean();
|
||||
|
||||
let a: f64 =
|
||||
n_x * (mean_diff_x - mean_diff).powi(2) + n_y * (mean_diff_y - mean_diff).powi(2);
|
||||
let b: f64 = Iterator::chain(
|
||||
diff_x.map(|diff| (diff - mean_diff_x).powi(2)),
|
||||
diff_y.map(|diff| (diff - mean_diff_y).powi(2)),
|
||||
)
|
||||
.sum();
|
||||
|
||||
let df = n_x + n_y - 2.0;
|
||||
let estimate = df * a / b;
|
||||
let distribution = FisherSnedecor::new(1.0, df)?;
|
||||
let p_value = 1.0 - distribution.cdf(estimate);
|
||||
|
||||
Ok(LevenesTest {
|
||||
df,
|
||||
estimate,
|
||||
p_value,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
fn levenes_test() {
|
||||
let x = vec![
|
||||
134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0,
|
||||
];
|
||||
let y = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0];
|
||||
let result = super::LevenesTest::new(&x, &y).unwrap();
|
||||
assert_eq!(result.df, 17.0);
|
||||
assert_eq!(result.estimate, 0.014721055064513417);
|
||||
assert_eq!(result.p_value, 0.9048519802923365);
|
||||
}
|
||||
}
|
|
@ -1,69 +0,0 @@
|
|||
use crate::statistics::*;
|
||||
use statrs::distribution::{ContinuousCDF, Normal};
|
||||
|
||||
/// Implements the [Mann-Whitney U test](https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test),
|
||||
/// also known as the Wilcoxon rank-sum test.
|
||||
#[derive(Debug, Copy, Clone, PartialEq)]
|
||||
pub struct MannWhitneyUTest {
|
||||
estimate: (f64, f64),
|
||||
effect_size: f64,
|
||||
p_value: f64,
|
||||
}
|
||||
|
||||
impl MannWhitneyUTest {
|
||||
/// Run Mann-Whitney U test/Wilcoxon rank-sum test on samples `x` and `y`.
|
||||
pub fn independent(x: &[f64], y: &[f64]) -> statrs::Result<MannWhitneyUTest> {
|
||||
let (ranks, tie_correction) = x.iter().chain(y).ranks();
|
||||
let n_x = x.n();
|
||||
let n_y = y.n();
|
||||
let n_xy = n_x * n_y;
|
||||
|
||||
let estimate = (n_x * (n_x + 1.0)) / 2.0 - ranks[0..x.len()].iter().sum::<f64>();
|
||||
let estimate_x = n_xy + estimate;
|
||||
let estimate_y = -estimate;
|
||||
let estimate_small = if estimate < 0.0 {
|
||||
estimate_x
|
||||
} else {
|
||||
estimate_y
|
||||
};
|
||||
|
||||
let n = n_x + n_y;
|
||||
let distribution_mean = n_xy / 2.0;
|
||||
let distribution_var = (n_xy * (n + 1.0 - tie_correction as f64 / (n * (n - 1.0)))) / 12.0;
|
||||
|
||||
let normal = Normal::new(distribution_mean, distribution_var.sqrt())?;
|
||||
let p_value = 2.0 * normal.cdf(estimate_small);
|
||||
let effect_size = 1.0 - (2.0 * estimate_small) / n_xy;
|
||||
|
||||
Ok(MannWhitneyUTest {
|
||||
effect_size,
|
||||
estimate: (estimate_x, estimate_y),
|
||||
p_value,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
fn mann_whitney_u() {
|
||||
let x = vec![
|
||||
134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0,
|
||||
];
|
||||
let y = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0];
|
||||
let test = super::MannWhitneyUTest::independent(&x, &y).unwrap();
|
||||
assert_eq!(test.estimate, (21.5, 62.5));
|
||||
assert_eq!(test.effect_size, 0.48809523809523814);
|
||||
assert_eq!(test.p_value, 0.08303763193135497);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mann_whitney_u_2() {
|
||||
let x = vec![68.0, 68.0, 59.0, 72.0, 64.0, 67.0, 70.0, 74.0];
|
||||
let y = vec![60.0, 67.0, 61.0, 62.0, 67.0, 63.0, 56.0, 58.0];
|
||||
let test = super::MannWhitneyUTest::independent(&x, &y).unwrap();
|
||||
assert_eq!(test.estimate, (9.0, 55.0));
|
||||
assert_eq!(test.effect_size, 0.71875);
|
||||
assert_eq!(test.p_value, 0.01533316211294691);
|
||||
}
|
||||
}
|
|
@ -1,24 +0,0 @@
|
|||
//! Defines frequentist statistical tests.
|
||||
|
||||
pub use self::f::*;
|
||||
pub use self::levenes::*;
|
||||
pub use self::mann_whitney_u::*;
|
||||
pub use self::shapiro_wilk::*;
|
||||
pub use self::students_t::*;
|
||||
pub use self::welchs_t::*;
|
||||
pub use self::wilcoxon_w::*;
|
||||
|
||||
mod f;
|
||||
mod levenes;
|
||||
mod mann_whitney_u;
|
||||
mod shapiro_wilk;
|
||||
mod students_t;
|
||||
mod welchs_t;
|
||||
mod wilcoxon_w;
|
||||
|
||||
/// Alternative hypothesis for comparing two means.
|
||||
pub enum AlternativeHypothesis {
|
||||
Greater,
|
||||
Different,
|
||||
Less,
|
||||
}
|
|
@ -1,263 +0,0 @@
|
|||
use crate::distribution::ShapiroWilk;
|
||||
use statrs::distribution::{ContinuousCDF, Normal};
|
||||
use statrs::function::evaluate::polynomial;
|
||||
use statrs::statistics::Statistics;
|
||||
use std::cmp;
|
||||
use std::f64::consts::{FRAC_1_SQRT_2, FRAC_PI_3};
|
||||
|
||||
/// Implements the [Shapiro-Wilk test](https://en.wikipedia.org/wiki/Shapiro%E2%80%93Wilk_test)
|
||||
/// (Shapiro & Wilk, 1965). A simplified port of the algorithm
|
||||
/// described by Royston (1992, 1995).
|
||||
///
|
||||
/// # References
|
||||
///
|
||||
/// Royston, P. (1992). Approximating the Shapiro-Wilk W-test for non-normality.
|
||||
/// Statistics and Computing, 2(3), 117–119. <https://doi.org/10.1007/BF01891203>
|
||||
///
|
||||
/// Royston, P. (1995). Remark AS R94: A Remark on Algorithm AS 181:
|
||||
/// The W-test for Normality. Journal of the Royal Statistical Society.
|
||||
/// Series C (Applied Statistics), 44(4), 547–551. <https://doi.org/10.2307/2986146>
|
||||
///
|
||||
/// Shapiro, S. S., & Wilk, M. B. (1965). An analysis of variance test for normality
|
||||
/// (complete samples)†. Biometrika, 52(3–4), 591–611. <https://doi.org/10.1093/biomet/52.3-4.591>
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct ShapiroWilkTest {
|
||||
p_value: f64,
|
||||
estimate: f64,
|
||||
weights: Vec<f64>,
|
||||
status: ShapiroWilkStatus,
|
||||
}
|
||||
|
||||
/// Representation of non-fatal `IFAULT` codes (Royston, 1995).
|
||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||
pub enum ShapiroWilkStatus {
|
||||
/// `IFAULT = 0` (no fault)
|
||||
Ok,
|
||||
/// `IFAULT = 2` (n > 5000)
|
||||
TooMany,
|
||||
}
|
||||
|
||||
/// Representation of fatal `IFAULT` codes (Royston, 1995).
|
||||
///
|
||||
/// As for the other codes not listed here or in [ShapiroWilkStatus]:
|
||||
/// - `IFAULT = 3` (insufficient storage for A) --- A is now allocated within the method
|
||||
/// - `IFAULT = 4` (censoring while n < 20) --- censoring is not implemented in this port
|
||||
/// - `IFAULT = 5` (the proportion censored > 0.8) --- censoring is not implemented in this port
|
||||
/// - `IFAULT = 7` (the data are not in ascending order) --- data are now sorted within the method
|
||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||
pub enum ShapiroWilkError {
|
||||
/// `IFAULT = 1` (n < 3)
|
||||
TooFew,
|
||||
/// `IFAULT = 6` (the data have zero range)
|
||||
NoDifference,
|
||||
/// Should not happen
|
||||
CannotMakeDistribution,
|
||||
}
|
||||
|
||||
static SMALL: f64 = 1E-19; // smaller for f64?
|
||||
static FRAC_6_PI: f64 = 1.90985931710274; // 6/pi
|
||||
|
||||
// Polynomials for estimating weights.
|
||||
static C1: [f64; 6] = [0.0, 0.221157, -0.147981, -2.07119, 4.434685, -2.706056];
|
||||
static C2: [f64; 6] = [0.0, 0.042981, -0.293762, -1.752461, 5.682633, -3.582633];
|
||||
// Polynomial for estimating scaling factor.
|
||||
static G: [f64; 2] = [-2.273, 0.459];
|
||||
|
||||
impl ShapiroWilkTest {
|
||||
/// Run the Shapiro-Wilk test on the sample `x`.
|
||||
pub fn new(x: &[f64]) -> Result<ShapiroWilkTest, ShapiroWilkError> {
|
||||
let n = x.len();
|
||||
let mut sorted = x.to_owned();
|
||||
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(cmp::Ordering::Equal));
|
||||
|
||||
let range = sorted.last().unwrap() - sorted[0];
|
||||
|
||||
if range.lt(&SMALL) {
|
||||
return Err(ShapiroWilkError::NoDifference);
|
||||
} else if n < 3 {
|
||||
return Err(ShapiroWilkError::TooFew);
|
||||
}
|
||||
|
||||
let weights = Self::get_weights(n);
|
||||
let mean = (&sorted).mean();
|
||||
|
||||
let (denominator, numerator): (f64, f64) = (0..n)
|
||||
.map(|i| {
|
||||
let distance = sorted[i] - mean;
|
||||
(distance * distance, distance * weights[i])
|
||||
})
|
||||
.fold((0.0, 0.0), |sum, value| (sum.0 + value.0, sum.1 + value.1));
|
||||
|
||||
// Calculate complement (denominator - numerator) / denominator
|
||||
// to keep precision when numerator / denominator is close to 1
|
||||
let complement = (denominator - numerator.powi(2)) / denominator;
|
||||
let estimate = 1.0 - complement;
|
||||
|
||||
let status = if n > 5000 {
|
||||
ShapiroWilkStatus::TooMany
|
||||
} else {
|
||||
ShapiroWilkStatus::Ok
|
||||
};
|
||||
let p_value = if n == 3 {
|
||||
FRAC_6_PI * (estimate.sqrt().asin() - FRAC_PI_3).max(0.0)
|
||||
} else {
|
||||
let distribution = match ShapiroWilk::new(n) {
|
||||
Ok(distribution) => distribution,
|
||||
Err(_) => return Err(ShapiroWilkError::CannotMakeDistribution),
|
||||
};
|
||||
1.0 - distribution.cdf(if n <= 11 {
|
||||
let gamma = polynomial(n as f64, &G);
|
||||
-(gamma - complement.ln()).ln()
|
||||
} else {
|
||||
complement.ln()
|
||||
})
|
||||
};
|
||||
|
||||
Ok(ShapiroWilkTest {
|
||||
weights,
|
||||
status,
|
||||
estimate,
|
||||
p_value,
|
||||
})
|
||||
}
|
||||
|
||||
fn get_weights(n: usize) -> Vec<f64> {
|
||||
if n == 3 {
|
||||
return vec![-FRAC_1_SQRT_2, 0.0, FRAC_1_SQRT_2];
|
||||
}
|
||||
|
||||
let normal = Normal::new(0.0, 1.0).unwrap();
|
||||
let mut weights = vec![0.0; n];
|
||||
|
||||
let half = n / 2;
|
||||
let float_n = n as f64;
|
||||
let an_25 = float_n + 0.25;
|
||||
|
||||
let mut sum_squared = 0_f64;
|
||||
for i in 0..half {
|
||||
weights[i] = normal.inverse_cdf((i as f64 + 0.625) / an_25);
|
||||
weights[n - i - 1] = -weights[i];
|
||||
sum_squared += weights[i].powi(2);
|
||||
}
|
||||
sum_squared *= 2.0;
|
||||
|
||||
let root_sum_squared = sum_squared.sqrt();
|
||||
let rsn = float_n.sqrt().recip();
|
||||
let weight0 = polynomial(rsn, &C1) - weights[0] / root_sum_squared;
|
||||
|
||||
let (weights_set, scale) = if n > 5 {
|
||||
let weight1 = polynomial(rsn, &C2) - weights[1] / root_sum_squared;
|
||||
let scale = ((sum_squared - 2.0 * (weights[0].powi(2) + weights[1].powi(2)))
|
||||
/ (1.0 - 2.0 * (weight0.powi(2) + weight1.powi(2))))
|
||||
.sqrt();
|
||||
weights[1] = -weight1;
|
||||
weights[n - 2] = weight1;
|
||||
(2, scale)
|
||||
} else {
|
||||
let scale =
|
||||
((sum_squared - 2.0 * weights[0].powi(2)) / (1.0 - 2.0 * weight0.powi(2))).sqrt();
|
||||
(1, scale)
|
||||
};
|
||||
|
||||
weights[0] = -weight0;
|
||||
weights[n - 1] = weight0;
|
||||
|
||||
for i in weights_set..half {
|
||||
weights[i] /= scale;
|
||||
weights[n - i - 1] = -weights[i];
|
||||
}
|
||||
|
||||
weights
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
fn shapiro_wilk() {
|
||||
let x = vec![
|
||||
0.139, 0.157, 0.175, 0.256, 0.344, 0.413, 0.503, 0.577, 0.614, 0.655, 0.954, 1.392,
|
||||
1.557, 1.648, 1.690, 1.994, 2.174, 2.206, 3.245, 3.510, 3.571, 4.354, 4.980, 6.084,
|
||||
8.351,
|
||||
];
|
||||
let test = super::ShapiroWilkTest::new(&x).unwrap();
|
||||
assert_eq!(test.estimate, 0.8346662753181684);
|
||||
assert_eq!(test.p_value, 0.0009134904817755807);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shapiro_wilk_1() {
|
||||
let x = vec![
|
||||
134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0,
|
||||
];
|
||||
let test = super::ShapiroWilkTest::new(&x).unwrap();
|
||||
assert_eq!(test.estimate, 0.9923657326481632);
|
||||
assert_eq!(test.p_value, 0.9999699312420669);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shapiro_wilk_2() {
|
||||
let x = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0];
|
||||
let test = super::ShapiroWilkTest::new(&x).unwrap();
|
||||
assert_eq!(test.estimate, 0.9980061683004456);
|
||||
assert_eq!(test.p_value, 0.9999411393249124);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn large_range() {
|
||||
let x = vec![
|
||||
0.139E100, 0.157E100, 0.175E100, 0.256E100, 0.344E100, 0.413E100, 0.503E100, 0.577E100,
|
||||
0.614E100, 0.655E100, 0.954E100, 1.392E100, 1.557E100, 1.648E100, 1.690E100, 1.994E100,
|
||||
2.174E100, 2.206E100, 3.245E100, 3.510E100, 3.571E100, 4.354E100, 4.980E100, 6.084E100,
|
||||
8.351E100,
|
||||
];
|
||||
let test = super::ShapiroWilkTest::new(&x).unwrap();
|
||||
assert_eq!(test.estimate, 0.8346662753181684);
|
||||
assert_eq!(test.p_value, 0.0009134904817755807);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nearly_normally_distributed() {
|
||||
let x = vec![
|
||||
-0.44, -0.31, -0.25, -0.21, -0.18, -0.15, -0.12, -0.10, -0.08, -0.06, -0.04, -0.02,
|
||||
0.0, 0.02, 0.04, 0.06, 0.08, 0.10, 0.12, 0.15, 0.18, 0.21, 0.25, 0.31, 0.44,
|
||||
];
|
||||
let test = super::ShapiroWilkTest::new(&x).unwrap();
|
||||
assert_eq!(test.estimate, 0.9997987717271388);
|
||||
assert_eq!(test.p_value, 1.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normally_distributed() {
|
||||
let x = vec![
|
||||
-0.4417998157703872,
|
||||
-0.310841224215176,
|
||||
-0.2544758389229413,
|
||||
-0.21509882047762266,
|
||||
-0.18254731741828356,
|
||||
-0.1541570107663562,
|
||||
-0.12852819470327187,
|
||||
-0.1048199468783084,
|
||||
-0.08247628697708857,
|
||||
-0.061100278634889656,
|
||||
-0.040388574421146996,
|
||||
-0.020093702456713224,
|
||||
0.0,
|
||||
0.020093702456713224,
|
||||
0.040388574421146996,
|
||||
0.061100278634889656,
|
||||
0.08247628697708857,
|
||||
0.1048199468783084,
|
||||
0.12852819470327187,
|
||||
0.1541570107663562,
|
||||
0.18254731741828356,
|
||||
0.21509882047762266,
|
||||
0.2544758389229413,
|
||||
0.310841224215176,
|
||||
0.4417998157703872,
|
||||
];
|
||||
let test = super::ShapiroWilkTest::new(&x).unwrap();
|
||||
assert_eq!(test.estimate, 0.9999999999999999);
|
||||
assert_eq!(test.p_value, 1.0);
|
||||
}
|
||||
}
|
|
@ -1,89 +0,0 @@
|
|||
use crate::statistics::StatisticsExt;
|
||||
use statrs::distribution::{ContinuousCDF, StudentsT};
|
||||
use statrs::statistics::Statistics;
|
||||
|
||||
/// Implements [Student's t-test](https://en.wikipedia.org/wiki/Student%27s_t-test).
|
||||
#[derive(Debug, Copy, Clone, PartialEq)]
|
||||
pub struct StudentsTTest {
|
||||
df: f64,
|
||||
estimate: f64,
|
||||
effect_size: f64,
|
||||
p_value: f64,
|
||||
}
|
||||
|
||||
impl StudentsTTest {
|
||||
/// Run Student's two-sample t-test on samples `x` and `y`.
|
||||
pub fn independent(x: &[f64], y: &[f64]) -> statrs::Result<StudentsTTest> {
|
||||
let n_x = x.n();
|
||||
let n_y = y.n();
|
||||
let df = n_x + n_y;
|
||||
|
||||
let effect_size = (x.mean() - y.mean()) / x.pooled_variance(y).sqrt();
|
||||
let t = effect_size / (n_x.recip() + n_y.recip()).sqrt();
|
||||
|
||||
let t_distribution = StudentsT::new(0.0, 1.0, df)?;
|
||||
let p_value = 2.0 * t_distribution.cdf(-t.abs());
|
||||
|
||||
Ok(StudentsTTest {
|
||||
df,
|
||||
effect_size: effect_size.abs(),
|
||||
estimate: t,
|
||||
p_value,
|
||||
})
|
||||
}
|
||||
|
||||
/// Run paired Student's t-test on samples `x` and `y`.
|
||||
pub fn paired(x: &[f64], y: &[f64]) -> statrs::Result<StudentsTTest> {
|
||||
let d: Vec<_> = x.iter().zip(y).map(|(x, y)| x - y).collect();
|
||||
let df = x.df();
|
||||
let effect_size = (&d).mean() / (&d).std_dev();
|
||||
let t = effect_size * x.n().sqrt();
|
||||
|
||||
let t_distribution = StudentsT::new(0.0, 1.0, df)?;
|
||||
let p_value = 2.0 * t_distribution.cdf(-t.abs());
|
||||
|
||||
Ok(StudentsTTest {
|
||||
df,
|
||||
effect_size: effect_size.abs(),
|
||||
estimate: t,
|
||||
p_value,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
fn students_t() {
|
||||
let x = vec![
|
||||
134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0,
|
||||
];
|
||||
let y = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0];
|
||||
let test = super::StudentsTTest::independent(&x, &y).unwrap();
|
||||
assert_eq!(test.estimate, 1.8914363974423305);
|
||||
assert_eq!(test.effect_size, 0.8995574392432595);
|
||||
assert_eq!(test.p_value, 0.073911127032672);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reverse() {
|
||||
let x = vec![
|
||||
134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0,
|
||||
];
|
||||
let y = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0];
|
||||
let test = super::StudentsTTest::independent(&y, &x).unwrap();
|
||||
assert_eq!(test.estimate, -1.8914363974423305);
|
||||
assert_eq!(test.effect_size, 0.8995574392432595);
|
||||
assert_eq!(test.p_value, 0.073911127032672);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn paired() {
|
||||
let x = vec![8.0, 6.0, 5.5, 11.0, 8.5, 5.0, 6.0, 6.0];
|
||||
let y = vec![8.5, 9.0, 6.5, 10.5, 9.0, 7.0, 6.5, 7.0];
|
||||
let test = super::StudentsTTest::paired(&x, &y).unwrap();
|
||||
assert_eq!(test.estimate, -2.645751311064591);
|
||||
assert_eq!(test.effect_size, 0.9354143466934856);
|
||||
assert_eq!(test.p_value, 0.03314550026377362);
|
||||
}
|
||||
}
|
|
@ -1,73 +0,0 @@
|
|||
use crate::statistics::StatisticsExt;
|
||||
use statrs::distribution::{ContinuousCDF, StudentsT};
|
||||
use statrs::statistics::Statistics;
|
||||
|
||||
/// Implements [Welch's t-test](https://en.wikipedia.org/wiki/Welch's_t-test) (Welch, 1947).
|
||||
///
|
||||
/// # References
|
||||
///
|
||||
/// Welch, B. L. (1947). The generalisation of student’s problems when several different population
|
||||
/// variances are involved. Biometrika, 34(1–2), 28–35. <https://doi.org/10.1093/biomet/34.1-2.28>
|
||||
#[derive(Debug, Copy, Clone, PartialEq)]
|
||||
pub struct WelchsTTest {
|
||||
df: f64,
|
||||
estimate: f64,
|
||||
effect_size: f64,
|
||||
p_value: f64,
|
||||
}
|
||||
|
||||
impl WelchsTTest {
|
||||
/// Run Welch's two-sample t-test on samples `x` and `y`.
|
||||
pub fn independent(x: &[f64], y: &[f64]) -> statrs::Result<WelchsTTest> {
|
||||
let var_x = x.variance();
|
||||
let var_y = y.variance();
|
||||
let var_x_n = var_x / x.n();
|
||||
let var_y_n = var_y / y.n();
|
||||
let linear_combination = var_x_n + var_y_n;
|
||||
|
||||
let df = linear_combination.powi(2) / (var_x_n.powi(2) / x.df() + var_y_n.powi(2) / y.df());
|
||||
|
||||
let mean_difference = x.mean() - y.mean();
|
||||
let effect_size = mean_difference.abs() / ((var_x + var_y) / 2.0).sqrt();
|
||||
let t = mean_difference / linear_combination.sqrt();
|
||||
|
||||
let t_distribution = StudentsT::new(0.0, 1.0, df)?;
|
||||
let p_value = 2.0 * t_distribution.cdf(-t.abs());
|
||||
|
||||
Ok(WelchsTTest {
|
||||
df,
|
||||
effect_size,
|
||||
estimate: t,
|
||||
p_value,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
fn students_t() {
|
||||
let x = vec![
|
||||
134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0,
|
||||
];
|
||||
let y = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0];
|
||||
let test = super::WelchsTTest::independent(&x, &y).unwrap();
|
||||
assert_eq!(test.df, 13.081702113268564);
|
||||
assert_eq!(test.estimate, 1.9107001042454415);
|
||||
assert_eq!(test.effect_size, 0.904358069450997);
|
||||
assert_eq!(test.p_value, 0.0782070409214568);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reverse() {
|
||||
let x = vec![
|
||||
134.0, 146.0, 104.0, 119.0, 124.0, 161.0, 107.0, 83.0, 113.0, 129.0, 97.0, 123.0,
|
||||
];
|
||||
let y = vec![70.0, 118.0, 101.0, 85.0, 107.0, 132.0, 94.0];
|
||||
let test = super::WelchsTTest::independent(&y, &x).unwrap();
|
||||
assert_eq!(test.df, 13.081702113268564);
|
||||
assert_eq!(test.estimate, -1.9107001042454415);
|
||||
assert_eq!(test.effect_size, 0.904358069450997);
|
||||
assert_eq!(test.p_value, 0.0782070409214568);
|
||||
}
|
||||
}
|
|
@ -1,70 +0,0 @@
|
|||
use crate::distribution::SignedRank;
|
||||
use crate::statistics::*;
|
||||
use statrs::distribution::ContinuousCDF;
|
||||
|
||||
/// Implements the [Wilcoxon signed rank test](https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test).
|
||||
#[derive(Debug, Copy, Clone, PartialEq)]
|
||||
pub struct WilcoxonWTest {
|
||||
estimate: (f64, f64),
|
||||
effect_size: f64,
|
||||
p_value: f64,
|
||||
}
|
||||
|
||||
impl WilcoxonWTest {
|
||||
/// Run Wilcoxon signed rank test on samples `x` and `y`.
|
||||
pub fn paired(x: &[f64], y: &[f64]) -> statrs::Result<WilcoxonWTest> {
|
||||
let d: Vec<_> = x.iter().zip(y).map(|(x, y)| (x - y).abs()).collect();
|
||||
let (ranks, tie_correction) = (&d).ranks();
|
||||
let mut estimate = (0.0, 0.0);
|
||||
let mut zeroes = 0;
|
||||
|
||||
for ((x, y), rank) in x.iter().zip(y).zip(ranks) {
|
||||
if x < y {
|
||||
estimate.0 += rank;
|
||||
} else if x > y {
|
||||
estimate.1 += rank;
|
||||
} else {
|
||||
zeroes += 1;
|
||||
}
|
||||
}
|
||||
|
||||
let estimate_small = if estimate.0 < estimate.1 {
|
||||
estimate.0
|
||||
} else {
|
||||
estimate.1
|
||||
};
|
||||
let distribution = SignedRank::new(d.len(), zeroes, tie_correction)?;
|
||||
let p_value = distribution.cdf(estimate_small);
|
||||
|
||||
let n = (&d).n();
|
||||
let rank_sum = n * (n + 1.0) / 2.0;
|
||||
let effect_size = estimate_small / rank_sum;
|
||||
|
||||
Ok(WilcoxonWTest {
|
||||
effect_size,
|
||||
estimate,
|
||||
p_value,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
fn paired() {
|
||||
let x = vec![8.0, 6.0, 5.5, 11.0, 8.5, 5.0, 6.0, 6.0];
|
||||
let y = vec![8.5, 9.0, 6.5, 10.5, 9.0, 7.0, 6.5, 7.0];
|
||||
let test = super::WilcoxonWTest::paired(&x, &y).unwrap();
|
||||
assert_eq!(test.estimate, (33.5, 2.5));
|
||||
assert_eq!(test.p_value, 0.027785782704095215);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn paired_2() {
|
||||
let x = vec![209.0, 200.0, 177.0, 169.0, 159.0, 169.0, 187.0, 198.0];
|
||||
let y = vec![151.0, 168.0, 147.0, 164.0, 166.0, 163.0, 176.0, 188.0];
|
||||
let test = super::WilcoxonWTest::paired(&x, &y).unwrap();
|
||||
assert_eq!(test.estimate, (3.0, 33.0));
|
||||
assert_eq!(test.p_value, 0.0390625);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue