// required for auxiliary functions
#r "nuget: FSharpAux, 2.0.0"           
// required for all calculations     
#r "nuget: FSharp.Stats, 0.6.0"
// required to read the pvalue set  
#r "nuget: FSharp.Data, 4.2.7"            
// required for charting  
#r "nuget: Plotly.NET.Interactive, 5.0.0"   

open FSharpAux
open FSharp.Stats
open FSharp.Data
open Plotly.NET
open Plotly.NET.StyleParam
open Plotly.NET.LayoutObjects

Loading extensions from `C:\Users\bvenn\.nuget\packages\plotly.net.interactive\5.0.0\lib\netstandard2.1\Plotly.NET.Interactive.dll`

let distributionA = Distributions.Continuous.Normal.Init 10.0 1.0
let distributionB = Distributions.Continuous.Normal.Init 10.0 1.0

let distributionChartAB = 
    [
        Chart.Area(xy = ([5. .. 0.01 .. 15.] |> List.map (fun x -> x,distributionA.PDF x)), Name = "distA")
        Chart.Area(xy = ([5. .. 0.01 .. 15.] |> List.map (fun x -> x,distributionB.PDF x)), Name = "distB")
    ]
    |> Chart.combine
    |> Chart.withTemplate ChartTemplates.lightMirrored
    |> Chart.withXAxisStyle "variable X"
    |> Chart.withYAxisStyle "relative count"
    |> Chart.withSize (900.,600.)
    |> Chart.withTitle "null hypothesis"

distributionChartAB

let getSample n (dist: Distributions.ContinuousDistribution<float,float>) =
    Vector.init n (fun _ -> dist.Sample())
    
let sampleA = getSample 5 distributionA
let sampleB = getSample 5 distributionB

let pValue = (Testing.TTest.twoSample true sampleA sampleB).PValue

pValue

0.3659784597826854

/// Calculates the p-value for samples from two distributions with a given sampleSize.
let calcPValue (distributionA: Distributions.ContinuousDistribution<float,float>) distributionB (sampleSize: int) = 
    let sampleA = getSample sampleSize distributionA
    let sampleB = getSample sampleSize distributionB
    (Testing.TTest.twoSample true sampleA sampleB).PValue

/// Collection of 10k performed t-tests with samples drawn from distA and distB (identical distribution).
let pVals_identical =
    Array.init 10_000 (fun _ -> calcPValue distributionA distributionB 5)

// Creates a histogram of the p-values from the t-tests
// and highlights the area of p-values below 0.05 using a rectangle shape and an annotation.
Chart.Histogram(pVals_identical, XBins = TraceObjects.Bins.init(0., 1., 0.025))
|> Chart.withTemplate ChartTemplates.lightMirrored
|> Chart.withXAxisStyle "p value"
|> Chart.withYAxisStyle "frequency"
|> Chart.withTitle "p value distribution of the null hypothesis<br>10,000 t-tests of samples from the same distribution"
|> Chart.withShape(
        Shape.init(
            ShapeType = StyleParam.ShapeType.Rectangle,
            X0 = 0.0,
            X1 = 0.05,
            Y0 = 0.,
            Y1 = 280.,
            Line = Line.init(Color = Color.fromHex "c00000", Width = 2)
        )
    )
|> Chart.withAnnotation(
        Annotation.init(
            Text = "p < 0.05<br>= 5% false positives",
            X = 0.03,
            Y = 283.,
            ShowArrow = true,
            Font = Font.init(Size = 16),
            BGColor = Color.fromString "white",
            AX=90.,
            AY=(-80.)
        )
    )
|> Chart.withSize (900.,600.)

let distributionC = Distributions.Continuous.Normal.Init 11.5 1.0

let distributionChartAC = 
    [
        Chart.Area(xy = ([5. .. 0.01 .. 15.] |> List.map (fun x -> x,distributionA.PDF x)), Name = "Distribution A")
        Chart.Area(xy = ([5. .. 0.01 .. 15.] |> List.map (fun x -> x,distributionC.PDF x)), Name = "Distribution C")
    ]
    |> Chart.combine
    |> Chart.withLegendAnchor 1
    |> Chart.withTemplate ChartTemplates.lightMirrored
    |> Chart.withXAxisStyle "variable X"
    |> Chart.withYAxisStyle "density"

let pvals_null = 
    Array.init 7000 (fun _ -> calcPValue distributionA distributionB 5)
    |> fun x -> Distributions.EmpiricalDistribution.create 0.025 x
    |> Map.toArray |> Array.map (fun (a,b) -> a, b*7000.)

let pvals_alternative = 
    Array.init 3000 (fun _ -> calcPValue distributionA distributionC 5)
    |> fun x -> Distributions.EmpiricalDistribution.create 0.025 x
    |> Map.toArray |> Array.map (fun (a,b) -> a, b*3000.)

let histo_alternative =
    [
        Chart.StackedColumn(pvals_null, Name = "70 % null tests")
        Chart.StackedColumn(pvals_alternative, Name = "30 % with true effect")
    ]
    |> Chart.combine
    |> Chart.withLegendAnchor 2 // define legend id to separately move this legend down
    |> Chart.withTemplate ChartTemplates.lightMirrored
    |> Chart.withXAxisStyle "p value"
    |> Chart.withYAxisStyle "frequency"

[distributionChartAC;histo_alternative]
|> Chart.Grid(2,1)
|> Chart.withSize (1000.,900.)
|> Chart.withTitle "alternative hypothesis"
|> Chart.withLegend(Legend.init(Y = 0.30),Id = 2) // move legend of lower panel down
|> Chart.withAnnotations [
    Annotation.init(
        Text = "10,000<br>t tests",
        X = 1.2,
        Y = 0.5,
        XRef = "paper",
        YRef = "paper",
        ShowArrow = false,
        TextAngle = 270.
    )
    Annotation.init(
        Text = "effect",
        X = 0.115,
        Y = 400,
        AX = 30.,
        AY = -30.,
        ShowArrow = true,
        Font = Font.init(Family= FontFamily.Droid_Serif, Size = 16),
        XRef = "x2",
        YRef = "y2"

    )
]
|> Chart.withShape(
        Shape.init(
            ShapeType = StyleParam.ShapeType.SvgPath,
            // path can be generated or edited at https://yqnn.github.io/svg-path-editor/
            Path = "M 1.035 0.6025 C 1.11 0.5525 1.11 0.4525 1.0379 0.4005 L 1.053 0.3834 L 1.0115 0.3846 L 1.0122 0.4286 L 1.0272 0.4118 C 1.0605 0.4381 1.1146 0.5173 1.0279 0.5908 L 1.035 0.6025",
            Xref = "paper",
            Yref = "paper",
            FillColor = Color.fromHex "515151"
        )
    )

let bonferroniLine = 
    Shape.init(ShapeType=ShapeType.Line,X0=0.,X1=35.,Y0=0.05,Y1=0.05,Line=Line.init(Dash=DrawingStyle.Dash))

let fwer = 
    [1..35]
    |> List.map (fun x -> 
        x,(1. - (1. - 0.05)**(float x))
        )
    |> Chart.Point
    |> Chart.withTemplate ChartTemplates.lightMirrored
    |> Chart.withXAxisStyle "#tests"
    |> Chart.withYAxisStyle("p(at least one FP)",MinMax=(0.,1.))
    |> Chart.withShape bonferroniLine
    |> Chart.withTitle "FWER"
    
fwer

histo_alternative
|> Chart.withShapes [
    Shape.init(
        ShapeType = StyleParam.ShapeType.Rectangle,
        X0 = 0.0,
        X1 = 0.05,
        Y0 = 0.00,
        Y1 = 184,
        Line = Line.init(Color=Color.fromHex "c00000", Width = 2),
        Label = ShapeLabel.init(Text="FP",Font = Font.init(Color=Color.fromHex "c00000", Size = 16))
    )
    Shape.init(
        ShapeType = StyleParam.ShapeType.Rectangle,
        X0 = 0.051,
        X1 = 1.,
        Y0 = 0.00,
        Y1 = 184,
        Line = Line.init(Color=Color.fromHex "638e48", Width = 2),
        Label = ShapeLabel.init(Text="true negatives (TN)",Font = Font.init(Size = 16))
    )
    Shape.init(
        ShapeType = StyleParam.ShapeType.Rectangle,
        X0 = 0.0,
        X1 = 0.05,
        Y0 = 185,
        Y1 = 1500,
        Line = Line.init(Color=Color.fromHex "638e48", Width = 2),
        Label = ShapeLabel.init(Text="TP",Font = Font.init(Size = 18))
    )
    Shape.init(
        ShapeType = StyleParam.ShapeType.Rectangle,
        X0 = 0.051,
        X1 = 1.0,
        Y0 = 185,
        Y1 = 600,
        Line = Line.init(Color=Color.fromHex "c00000", Width = 2),
        Label = ShapeLabel.init(Text="false negatives (FN)",Font = Font.init(Color=Color.fromHex "c00000", Size = 16))
    )
]
|> Chart.withSize(1000.,600.)

let examplePVals = 
    let rawData = Http.RequestString @"https://raw.githubusercontent.com/fslaborg/datasets/main/data/pvalExample.txt"
    rawData.Split '\n'
    |> Array.tail
    |> Array.map float

//number of tests
let m =  
    examplePVals
    |> Array.length
    |> float

let nullLine =
    Shape.init(ShapeType=ShapeType.Line,X0=0.,X1=1.,Y0=1.,Y1=1.,Line=Line.init(Dash=DrawingStyle.Dash))

let empLine =
    Shape.init(ShapeType=ShapeType.Line,X0=0.,X1=1.,Y0=0.4,Y1=0.4,Line=Line.init(Dash=DrawingStyle.DashDot,Color=Color.fromHex "#FC3E36"))

let exampleDistribution = 
    [
        [
        examplePVals
        |> Distributions.Frequency.create 0.025
        |> Map.toArray 
        |> Array.map (fun (k,c) -> k,float c / (m * 0.025))
        |> Chart.Column
        |> Chart.withTraceInfo "density"
        |> Chart.withTemplate ChartTemplates.lightMirrored
        |> Chart.withXAxisStyle "p value"
        |> Chart.withYAxisStyle "density"
        |> Chart.withShapes [nullLine;empLine]

        examplePVals
        |> Distributions.Frequency.create 0.025
        |> Map.toArray 
        |> Array.map (fun (k,c) -> k,float c)
        |> Chart.Column
        |> Chart.withTraceInfo "gene count"
        |> Chart.withTemplate ChartTemplates.lightMirrored
        |> Chart.withXAxisStyle "p value"
        |> Chart.withYAxisStyle "gene count"
        ]
    ]
    |> Chart.Grid()
    |> Chart.withSize(1100.,550.)

exampleDistribution

let pi0 = 0.4

let getD p = 
    examplePVals 
    |> Array.sumBy (fun x -> if x <= p then 1. else 0.) 

let getFP p = p * pi0 * m

let getFDR p = (getFP p) / (getD p)

let qvaluesNotSmoothed = 
    examplePVals
    |> Array.sort
    |> Array.map (fun x -> 
        x, getFDR x)
    |> Chart.Line 
    |> Chart.withTraceInfo "not smoothed"
    
let qvaluesSmoothed = 
    let pValsSorted =
        examplePVals
        |> Array.sortDescending
    let rec loop i lowest acc  = 
        if i = pValsSorted.Length then 
            acc |> List.rev
        else 
            let p = pValsSorted.[i]
            let q = getFDR p
            if q > lowest then  
                loop (i+1) lowest ((p,lowest)::acc)
            else loop (i+1) q ((p,q)::acc)
    loop 0 1. []
    |> Chart.Line
    |> Chart.withTraceInfo "smoothed"

let eXpos = examplePVals |> Array.filter (fun x -> x <= 0.046135) |> Array.length

let p2qValeChart =
    [qvaluesNotSmoothed;qvaluesSmoothed]
    |> Chart.combine
    |> Chart.withYAxisStyle("",MinMax=(0.,1.))
    |> Chart.withTemplate ChartTemplates.lightMirrored
    |> Chart.withXAxisStyle "p value"
    |> Chart.withYAxisStyle "q value"
    |> Chart.withShape empLine
    |> Chart.withTitle (sprintf "#[genes with q value < 0.05] = %i" eXpos)

p2qValeChart

let pi0Est = 
    [|0. .. 0.05 .. 0.95|]
    |> Array.map (fun lambda -> 
        let num = 
            examplePVals 
            |> Array.sumBy (fun x -> if x > lambda then 1. else 0.) 
        let den = float examplePVals.Length * (1. - lambda)
        lambda, num/den
        )

let pi0EstChart = 
    pi0Est 
    |> Chart.Point
    |> Chart.withYAxisStyle("",MinMax=(0.,1.))
    |> Chart.withXAxisStyle("",MinMax=(0.,1.))
    |> Chart.withTemplate ChartTemplates.lightMirrored
    |> Chart.withXAxisStyle "$\lambda$"
    |> Chart.withYAxisStyle "$\hat \pi_0(\lambda)$"
    |> Chart.withMathTex(true)
    |> Chart.withConfig(
        Config.init(
            Responsive=true, 
            ModeBarButtonsToAdd=[
                ModeBarButton.DrawLine
                ModeBarButton.DrawOpenPath
                ModeBarButton.EraseShape
                ]
            )
        )

pi0EstChart

let getpi0Bootstrap (lambda:float[]) (pValues:float[]) =
    let rnd = System.Random()
    let m = pValues.Length |> float
    let getpi0hat lambda pVals=
        let hits = 
            pVals 
            |> Array.sumBy (fun x -> if x > lambda then 1. else 0.) 
        hits / (m * (1. - lambda))
    
    //calculate MSE for each lambda
    let getMSE lambda =
        let mse = 
            //generate 100 bootstrap samples of p values and calculate the MSE at given lambda
            Array.init 100 (fun b -> 
                Array.sampleWithReplacement rnd pValues pValues.Length  
                |> getpi0hat lambda
                )
        mse
    lambda
    |> Array.map (fun l -> l,getMSE l)
    

let minimalpihat = 
    //FSharp.Stats.Testing.MultipleTesting.Qvalues.pi0hats  [|0. .. 0.05 .. 0.96|] examplePVals |> Array.minBy snd |> snd
    0.3686417749

let minpiHatShape = 
    Shape.init(ShapeType=ShapeType.Line,X0=0.,X1=1.,Y0=minimalpihat,Y1=minimalpihat,Line=Line.init(Dash=DrawingStyle.Dash))

let bootstrappedPi0 =
    getpi0Bootstrap [|0. .. 0.05 .. 0.95|] examplePVals
    |> Array.map (fun (l,x) -> 
        Chart.BoxPlot(data=x,orientation=Orientation.Vertical,FillColor=Color.fromHex"#1F77B4",MarkerColor=Color.fromHex"#1F77B4",Name=sprintf "%.2f" l))
    |> Chart.combine
    |> Chart.withShape minpiHatShape
    |> Chart.withTemplate ChartTemplates.lightMirrored
    |> Chart.withXAxisStyle "lambda"
    |> Chart.withYAxisStyle "pi0 hat"

bootstrappedPi0

open Testing.MultipleTesting

let pi0Stats = Qvalues.pi0BootstrapWithLambda [|0.0 .. 0.05 .. 0.95|] examplePVals

pi0Stats

0.3703327922077925

let qValues = Qvalues.ofPValues pi0Stats examplePVals

qValues

[ 0.2690343536429767, 0.03451771894511998, 0.260005815044248, 0.2984261021835806, 0.08590835637088742, 0.3516354103053438, 0.14017753875059466, 0.3114779827361747, 0.25753341900121823, 0.028988378992537343, 0.3259266204071664, 0.031850238633144505, 0.2934858001322298, 0.13373258389042894, 0.010485759027169157, 0.16093162128197516, 0.02086019153225808, 0.3193118083488758, 0.0744394201633433, 0.06533030596475378 ... (9836 more) ]

let qvaluesRobust = 
    Testing.MultipleTesting.Qvalues.ofPValuesRobust pi0Stats examplePVals

let qChart =    
    [
        Chart.Line(Array.sortBy fst (Array.zip examplePVals qValues),Name="qValue")
        Chart.Line(Array.sortBy fst (Array.zip examplePVals qvaluesRobust),Name="qValueRobust")
    ]
    |> Chart.combine
    |> Chart.withTemplate ChartTemplates.lightMirrored
    |> Chart.withXAxisStyle "p value"
    |> Chart.withYAxisStyle "q value"

let pi0Line = 
    Shape.init(ShapeType=ShapeType.Line,X0=0.,X1=1.,Y0=pi0Stats,Y1=pi0Stats,Line=Line.init(Dash=DrawingStyle.Dash))

// relates the q value to each p value
let p2q = 
    Array.zip examplePVals qValues
    |> Array.sortBy fst
    |> Chart.Line
    |> Chart.withShape pi0Line
    |> Chart.withTemplate ChartTemplates.lightMirrored
    |> Chart.withXAxisStyle "p value"
    |> Chart.withYAxisStyle "q value"

// shows the p values distribution for an visual inspection of pi0 estimation
let pValueDistribution =
    let frequencyBins = 0.025 
    let m = examplePVals.Length |> float
    examplePVals 
    |> Distributions.Frequency.create frequencyBins 
    |> Map.toArray 
    |> Array.map (fun (k,c) -> k,float c / frequencyBins / m) 
    |> Chart.StackedColumn 
    |> Chart.withTraceInfo "p values"
    |> Chart.withTemplate ChartTemplates.lightMirrored
    |> Chart.withXAxisStyle "p value"
    |> Chart.withYAxisStyle "frequency density"
    |> Chart.withShape pi0Line

// shows pi0 estimation in relation to lambda
let pi0Estimation = 
    //Testing.MultipleTesting.Qvalues.pi0hats [|0. .. 0.05 .. 0.96|] examplePVals
    [|0. .. 0.05 .. 0.95|]
    |> Array.map (fun lambda -> 
        let num =   
            examplePVals 
            |> Array.sumBy (fun x -> if x > lambda then 1. else 0.)
        let den = float examplePVals.Length * (1. - lambda)
        lambda, num/den
        )
    |> Chart.Point
    |> Chart.withTemplate ChartTemplates.lightMirrored
    |> Chart.withXAxisStyle "$\lambda$"
    |> Chart.withYAxisStyle "$\hat \pi_0(\lambda)$"
    |> Chart.withMathTex(true)

p2q

pValueDistribution

pi0Estimation

Multiple testing correction: q values

Posted on 2022-3-20 by Benedikt Venn in Data Science

Multiple testing correction: q values¶

Getting started: Multiple testing problem¶

Referencing packages¶

The multiple testing problem¶

False discovery rate¶

q value¶

The automatic detection of $\pi_0$ is facilitated as follows¶

Variants¶

Quality plots¶

Definitions and Notes¶

FAQ¶

References¶