fslaborg · omaus · Jan 27, 2022 · Jan 27, 2022 · Jan 28, 2022 · bvenn
diff --git a/src/FSharp.Stats/FSharp.Stats.fsproj b/src/FSharp.Stats/FSharp.Stats.fsproj
@@ -113,6 +113,7 @@
     <Compile Include="Testing\Correlation.fs" />
     <Compile Include="Testing\TTest.fs" />
     <Compile Include="Testing\FTest.fs" />
+    <Compile Include="Testing\UTest.fs" />
     <Compile Include="Testing\FriedmanTest.fs" />
     <Compile Include="Testing\HTest.fs" />
     <Compile Include="Testing\ChiSquareTest.fs" />

diff --git a/src/FSharp.Stats/Testing/TestStatistics.fs b/src/FSharp.Stats/Testing/TestStatistics.fs
@@ -96,4 +96,26 @@ module TestStatistics =
         let cdf  =  Distributions.Continuous.Normal.CDF 0. 1.  statistic         
         let pvalue = 1.-  cdf
         let pvalueTwoTailed = pvalue * 2.
-        {Statistic=statistic; PValueLeft=pvalue;PValueRight = cdf; PValueTwoTailed = pvalueTwoTailed}
+        {Statistic=statistic; PValueLeft=pvalue;PValueRight = cdf; PValueTwoTailed = pvalueTwoTailed}
+
+
+    /// <summary>
+    ///   Computes the Mann-Whitney U-test statistics for a given statistic.
+    /// </summary>
+    /// <param name="Statistic">The test statistic.</param>
+    /// <param name="PValue">One Tailed/Sided.</param>
+    /// <param name="PValueTwoTailed">Two Tailed/Sided.</param>
+    type UTestTestStatistics = {
+        Statistic       : float
+        PValueLeft      : float
+        PValueRight     : float
+        PValueTwoTailed : float
+    }
+    let createUTest statistic : UTestTestStatistics =
+        let cdf = Distributions.Continuous.Normal.CDF 0. 1. statistic
+        {
+            Statistic       = statistic
+            PValueLeft      = 1. - cdf
+            PValueRight     = cdf
+            PValueTwoTailed = cdf * 2.
+        }
diff --git a/src/FSharp.Stats/Testing/UTest.fs b/src/FSharp.Stats/Testing/UTest.fs
@@ -0,0 +1,51 @@
+namespace FSharp.Stats.Testing
+
+// taken/implemented from: https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test#U_statistic
+module UTest =
+
+    open FSharp.Stats
+    open FSharp.Stats.Testing
+
+    // TO DO:   Bergmann et al. (2000) showed that there are different implementations of this test that lead to different results.
+    //          They implied that some of them are using a false algorithm. Check if the mathematical derivation from above is wrong too.
+    //          Read: https://www.jstor.org/stable/2685616
+    let inline private compute (seq1 : seq<'T>) (seq2 : seq<'T>) =
+        let sortedMerge = 
+            (seq1 |> Seq.map (fun v -> float v, 0), seq2 |> Seq.map (fun v -> float v, 1)) // 0 = first group; 1 = second group
+            ||> Seq.append
+            |> Seq.sortByDescending (fun (v,groupIndex) -> v)
+            |> Array.ofSeq
+        // let abundance = // method for equal ranks instead of mean ranks when identical values occur.
+        //     sortedMerge
+        //     |> Array.map (
+        //         fun v -> Array.filter (fun v2 -> v2 = v) sortedMerge
+        //         >> Array.length
+        //     )
+        // let myMap = sortedMerge |> Array.mapi (fun i x -> x, i + 2 - Array.item i abundance) |> Map // wrong: must return mean of ranksums with equal ranks, not always the same rank!
+        // let rankedMerge = sortedMerge |> Array.map (fun (v,group) -> float myMap.[(v,group)],v,group)
+        let rankedMerge = // method for mean ranks instead of equal ranks when identical values occur.
+            sortedMerge 
+            |> Array.map fst 
+            |> Rank.rankAverage 
+            |> fun res -> 
+                (sortedMerge, res)
+                ||> Array.map2 (fun (v,group) rank -> rank, v, group)
+        let calcRankSum group = 
+            rankedMerge
+            |> Array.filter (fun (rank,v,group') -> group' = group)
+            |> Array.fold (fun state (rank,v,group') -> state + rank) 0.
+        let rankSumSeq1 = calcRankSum 0
+        let rankSumSeq2 = calcRankSum 1
+        let seq1Length = Seq.length seq1 |> float
+        let seq2Length = Seq.length seq2 |> float
+        let u1 = seq1Length * seq2Length + (seq1Length * (seq1Length + 1.) / 2.) - rankSumSeq1
+        let u2 = seq1Length * seq2Length + (seq2Length * (seq2Length + 1.) / 2.) - rankSumSeq2
+        let uMin = min u1 u2
+        let z = (uMin - seq1Length * seq2Length / 2.) / System.Math.Sqrt (seq1Length * seq2Length * (seq1Length + seq2Length + 1.) / 12.)
+        z
+
+    /// Computes a Mann-Whitney U-test. Aka Wilcoxon-Mann-Whitney test. 
+    /// Use this test for independent samples and the Wilcoxon test (= Wilcoxon ranksum test) for dependent samples.
+    let inline computeUtest (seq1 : seq<'T>) (seq2 : seq<'T>) =
+        let z = compute seq1 seq2
+        TestStatistics.createUTest z
diff --git a/tests/FSharp.Stats.Tests/Testing.fs b/tests/FSharp.Stats.Tests/Testing.fs
@@ -235,6 +235,34 @@ let tTestTests =
             Expect.floatClose Accuracy.low tTest4.Statistic 0.514 "t statistic should be equal."
     ]
 
+
+[<Tests>]
+let uTestTests =
+    // taken from https://de.wikipedia.org/wiki/Wilcoxon-Mann-Whitney-Test#Beispiel
+    let testList1 =
+        ([0;400;500;550;600;650;750;800;900;950;1000;1100;1200;1500;1600;1800;1900;2000;2200;3500 ],["M";"W";"M";"W";"M";"W";"M";"M";"W";"W";"M";"M";"W";"M";"W";"M";"M";"M";"M";"M"])
+        ||> List.map2 (fun pay sex -> sex, pay) |> List.sortBy fst
+
+    let testList1A = testList1 |> List.choose (fun (sex,pay) -> if sex = "W" then Some pay else None)
+    let testList1B = testList1 |> List.choose (fun (sex,pay) -> if sex = "M" then Some pay else None)
+
+    let observedResult1 = UTest.computeUtest testList1A testList1B
+    let expectedResult1 : TestStatistics.UTestTestStatistics = {
+        Statistic       = -1.15
+        PValueTwoTailed = 0.2505
+        PValueLeft      = 0.875
+        PValueRight     = 0.1253
+    }
+
+    testList "Testing.UTest" [
+        testCase "TwoSample" <| fun () ->
+            Expect.floatClose Accuracy.low observedResult1.PValueLeft expectedResult1.PValueLeft "left p-value should be equal"
+            Expect.floatClose Accuracy.low observedResult1.PValueRight expectedResult1.PValueRight "right p-value should be equal"
+            Expect.floatClose Accuracy.low observedResult1.PValueTwoTailed expectedResult1.PValueTwoTailed "p-value should be equal"
+            Expect.floatClose Accuracy.low observedResult1.Statistic expectedResult1.Statistic "test statistic should be equal"
+    ]
+
+
 [<Tests>]
 let chiSquaredTests = 
     // ChiSquared https://www.graphpad.com/quickcalcs/chisquared2/