Covariance

Binder

Summary: This tutorial explains how to investigate the covariance of two samples with FSharp.Stats

Lets first define some sample data:

open FSharp.Stats

let rnd = System.Random()
let error() = rnd.Next(11)

let sampleA = Vector.init 50 (fun x -> float x)
let sampleB = Vector.init 50 (fun x -> float (x + error()))
let sampleBHigh = sampleB |> Vector.map (fun x -> 200. + x)
let sampleC = Vector.init 50 (fun x -> 100. - float (x + 3 * error()))
let sampleD = Vector.init 50 (fun x -> 100. + float (10 * error()))

open Plotly.NET

//Some axis styling
let myAxis title = Axis.LinearAxis.init(Title=title,Mirror=StyleParam.Mirror.All,Ticks=StyleParam.TickOptions.Inside,Showgrid=false,Showline=true,Zeroline=true)
let styleChart x y chart = chart |> Chart.withX_Axis (myAxis x) |> Chart.withY_Axis (myAxis y)

let sampleChart =
    [
        Chart.Point(sampleA,sampleB,"AB")
        Chart.Point(sampleA,sampleC,"AC")
        Chart.Point(sampleA,sampleD,"AD")  
        Chart.Point(sampleA,sampleBHigh,"AB+")   
    ]
    |> Chart.Combine
    |> styleChart "x" "y"
    |> Chart.withTitle "test cases for covariance calculation"

The covariance of two samples describes the relationship of both variables. If one variable tends to be high if its pair is high also, the covariance is positive. If on variable is low while its pair is high the covariance is negative. If there is no (monotone) relationship between both variables, the covariance is zero.

A positive covariance indicates a positive slope of a regression line, while a negative covariance indicates a negative slope. If the total population is given the covPopulation without Bessel's correction can be calculated.

\(\operatorname{cov}(X, Y) = \operatorname{E}{\big[(X - \operatorname{E}[X])(Y - \operatorname{E}[Y])\big]}\)

Note: The amplitude of covariance does not correlate with the slope, neither it correlates with the spread of the data points from the regression line.

A standardized measure for how well the data lie on the regression line is given by correlation analysis. The pearson correlation coefficient is defined as

\(\rho_{X,Y}= \frac{\operatorname{cov}(X,Y)}{\sigma_X \sigma_Y}\)

References:

  • Fahrmeir L et al., Statistik - Der Weg zur Datenanalyse, 8. Auflage, doi 10.1007/978-3-662-50372-0

cov and covPopulation are available as sequence (and other collections) extensions:

let covAB     = Vector.cov sampleA sampleB
let covAC     = Vector.cov sampleA sampleC
let covAD     = Vector.cov sampleA sampleD
let covABHigh = Vector.cov sampleA sampleBHigh

let covPopAB     = Vector.covPopulation sampleA sampleB
let covPopAC     = Vector.covPopulation sampleA sampleC
let covPopAD     = Vector.covPopulation sampleA sampleD
let covPopABHigh = Vector.covPopulation sampleA sampleBHigh

open Correlation
let pearsonAB     = Seq.pearson sampleA sampleB
let pearsonAC     = Seq.pearson sampleA sampleC
let pearsonAD     = Seq.pearson sampleA sampleD
let pearsonABHigh = Seq.pearson sampleA sampleBHigh
"Covariance of the presented four test cases
AB (blue)   cov: 217.45    covPopulation: 213.10   pearson: 0.983
AC (orange) cov: -201.1    covPopulation: -197.0   pearson: -0.83
AD (green)  cov: -86.73    covPopulation: -85.00   pearson: -0.192
AB+(red)    cov: 217.45    covPopulation: 213.10   pearson: 0.983"
Multiple items
namespace FSharp

--------------------
namespace Microsoft.FSharp
namespace FSharp.Stats
val rnd : System.Random
namespace System
Multiple items
type Random =
  new : unit -> Random + 1 overload
  member Next : unit -> int + 2 overloads
  member NextBytes : buffer:byte[] -> unit + 1 overload
  member NextDouble : unit -> float

--------------------
System.Random() : System.Random
System.Random(Seed: int) : System.Random
val error : unit -> int
System.Random.Next() : int
System.Random.Next(maxValue: int) : int
System.Random.Next(minValue: int, maxValue: int) : int
val sampleA : Vector<float>
Multiple items
module Vector

from FSharp.Stats

--------------------
type Vector<'T> =
  interface IEnumerable
  interface IEnumerable<'T>
  interface IStructuralEquatable
  interface IStructuralComparable
  interface IComparable
  new : opsV:INumeric<'T> option * arrV:'T array -> Vector<'T>
  override Equals : yobj:obj -> bool
  override GetHashCode : unit -> int
  member GetSlice : start:int option * finish:int option -> Vector<'T>
  member Permute : p:permutation -> Vector<'T>
  ...

--------------------
new : opsV:INumeric<'T> option * arrV:'T array -> Vector<'T>
val init : count:int -> initializer:(int -> float) -> Vector<float>
val x : int
Multiple items
val float : value:'T -> float (requires member op_Explicit)

--------------------
type float = System.Double

--------------------
type float<'Measure> = float
val sampleB : Vector<float>
val sampleBHigh : Vector<float>
val map : mapping:(float -> float) -> vector:vector -> Vector<float>
val x : float
val sampleC : Vector<float>
val sampleD : Vector<float>
namespace Plotly
namespace Plotly.NET
val myAxis : title:string -> Axis.LinearAxis
val title : string
module Axis

from Plotly.NET
Multiple items
type LinearAxis =
  inherit DynamicObj
  new : unit -> LinearAxis
  static member init : ?AxisType:AxisType * ?Title:string * ?Titlefont:Font * ?Autorange:AutoRange * ?Rangemode:RangeMode * ?Range:Range * ?RangeSlider:RangeSlider * ?Fixedrange:'a * ?Tickmode:TickMode * ?nTicks:'b * ?Tick0:'c * ?dTick:'d * ?Tickvals:'e * ?Ticktext:'f * ?Ticks:TickOptions * ?Mirror:Mirror * ?Ticklen:'g * ?Tickwidth:'h * ?Tickcolor:'i * ?Showticklabels:'j * ?Tickfont:Font * ?Tickangle:'k * ?Tickprefix:'l * ?Showtickprefix:ShowTickOption * ?Ticksuffix:'m * ?Showticksuffix:ShowTickOption * ?Showexponent:ShowExponent * ?Exponentformat:ExponentFormat * ?Tickformat:'n * ?Hoverformat:'o * ?Showline:bool * ?Linecolor:'p * ?Linewidth:'q * ?Showgrid:bool * ?Gridcolor:'r * ?Gridwidth:'s * ?Zeroline:bool * ?Zerolinecolor:'t * ?Zerolinewidth:'a1 * ?Anchor:AxisAnchorId * ?Side:Side * ?Overlaying:AxisAnchorId * ?Domain:Range * ?Position:float * ?IsSubplotObj:'a2 * ?Tickvalssrc:'a3 * ?Ticktextsrc:'a4 * ?Showspikes:'a5 * ?Spikesides:'a6 * ?Spikethickness:'a7 * ?Spikecolor:'a8 * ?Showbackground:'a9 * ?Backgroundcolor:'a10 * ?Showaxeslabels:'a11 -> LinearAxis
  static member style : ?AxisType:AxisType * ?Title:string * ?Titlefont:Font * ?Autorange:AutoRange * ?Rangemode:RangeMode * ?Range:Range * ?RangeSlider:RangeSlider * ?Fixedrange:'a * ?Tickmode:TickMode * ?nTicks:'b * ?Tick0:'c * ?dTick:'d * ?Tickvals:'e * ?Ticktext:'f * ?Ticks:TickOptions * ?Mirror:Mirror * ?Ticklen:'g * ?Tickwidth:'h * ?Tickcolor:'i * ?Showticklabels:'j * ?Tickfont:Font * ?Tickangle:'k * ?Tickprefix:'l * ?Showtickprefix:ShowTickOption * ?Ticksuffix:'m * ?Showticksuffix:ShowTickOption * ?Showexponent:ShowExponent * ?Exponentformat:ExponentFormat * ?Tickformat:'n * ?Hoverformat:'o * ?Showline:bool * ?Linecolor:'p * ?Linewidth:'q * ?Showgrid:bool * ?Gridcolor:'r * ?Gridwidth:'s * ?Zeroline:bool * ?Zerolinecolor:'t * ?Zerolinewidth:'a1 * ?Anchor:AxisAnchorId * ?Side:Side * ?Overlaying:AxisAnchorId * ?Domain:Range * ?Position:float * ?IsSubplotObj:'a2 * ?Tickvalssrc:'a3 * ?Ticktextsrc:'a4 * ?Showspikes:'a5 * ?Spikesides:'a6 * ?Spikethickness:'a7 * ?Spikecolor:'a8 * ?Showbackground:'a9 * ?Backgroundcolor:'a10 * ?Showaxeslabels:'a11 -> (LinearAxis -> LinearAxis)

--------------------
new : unit -> Axis.LinearAxis
static member Axis.LinearAxis.init : ?AxisType:StyleParam.AxisType * ?Title:string * ?Titlefont:Font * ?Autorange:StyleParam.AutoRange * ?Rangemode:StyleParam.RangeMode * ?Range:StyleParam.Range * ?RangeSlider:RangeSlider * ?Fixedrange:'a * ?Tickmode:StyleParam.TickMode * ?nTicks:'b * ?Tick0:'c * ?dTick:'d * ?Tickvals:'e * ?Ticktext:'f * ?Ticks:StyleParam.TickOptions * ?Mirror:StyleParam.Mirror * ?Ticklen:'g * ?Tickwidth:'h * ?Tickcolor:'i * ?Showticklabels:'j * ?Tickfont:Font * ?Tickangle:'k * ?Tickprefix:'l * ?Showtickprefix:StyleParam.ShowTickOption * ?Ticksuffix:'m * ?Showticksuffix:StyleParam.ShowTickOption * ?Showexponent:StyleParam.ShowExponent * ?Exponentformat:StyleParam.ExponentFormat * ?Tickformat:'n * ?Hoverformat:'o * ?Showline:bool * ?Linecolor:'p * ?Linewidth:'q * ?Showgrid:bool * ?Gridcolor:'r * ?Gridwidth:'s * ?Zeroline:bool * ?Zerolinecolor:'t * ?Zerolinewidth:'a1 * ?Anchor:StyleParam.AxisAnchorId * ?Side:StyleParam.Side * ?Overlaying:StyleParam.AxisAnchorId * ?Domain:StyleParam.Range * ?Position:float * ?IsSubplotObj:'a2 * ?Tickvalssrc:'a3 * ?Ticktextsrc:'a4 * ?Showspikes:'a5 * ?Spikesides:'a6 * ?Spikethickness:'a7 * ?Spikecolor:'a8 * ?Showbackground:'a9 * ?Backgroundcolor:'a10 * ?Showaxeslabels:'a11 -> Axis.LinearAxis
module StyleParam

from Plotly.NET
type Mirror =
  | True
  | Ticks
  | False
  | All
  | AllTicks
    static member convert : (Mirror -> obj)
    static member toString : (Mirror -> string)
union case StyleParam.Mirror.All: StyleParam.Mirror
type TickOptions =
  | Outside
  | Inside
  | Empty
    static member convert : (TickOptions -> obj)
    static member toString : (TickOptions -> string)
union case StyleParam.TickOptions.Inside: StyleParam.TickOptions
val styleChart : x:string -> y:string -> chart:GenericChart.GenericChart -> GenericChart.GenericChart
val x : string
val y : string
val chart : GenericChart.GenericChart
type Chart =
  static member Area : xy:seq<#IConvertible * #IConvertible> * ?Name:string * ?ShowMarkers:bool * ?Showlegend:bool * ?MarkerSymbol:Symbol * ?Color:string * ?Opacity:float * ?Labels:seq<string> * ?TextPosition:TextPosition * ?TextFont:Font * ?Dash:DrawingStyle * ?Width:'a2 -> GenericChart
  static member Area : x:seq<#IConvertible> * y:seq<#IConvertible> * ?Name:string * ?ShowMarkers:bool * ?Showlegend:bool * ?MarkerSymbol:Symbol * ?Color:string * ?Opacity:float * ?Labels:seq<string> * ?TextPosition:TextPosition * ?TextFont:Font * ?Dash:DrawingStyle * ?Width:'a2 -> GenericChart
  static member Bar : keysvalues:seq<#IConvertible * #IConvertible> * ?Name:string * ?Showlegend:bool * ?Color:'a2 * ?Opacity:float * ?Labels:seq<string> * ?TextPosition:TextPosition * ?TextFont:Font * ?Marker:Marker -> GenericChart
  static member Bar : keys:seq<#IConvertible> * values:seq<#IConvertible> * ?Name:string * ?Showlegend:bool * ?Color:'a2 * ?Opacity:float * ?Labels:seq<string> * ?TextPosition:TextPosition * ?TextFont:Font * ?Marker:Marker -> GenericChart
  static member BoxPlot : xy:seq<'a0 * 'a1> * ?Name:string * ?Showlegend:bool * ?Color:string * ?Fillcolor:'a2 * ?Opacity:float * ?Whiskerwidth:'a3 * ?Boxpoints:Boxpoints * ?Boxmean:BoxMean * ?Jitter:'a4 * ?Pointpos:'a5 * ?Orientation:Orientation * ?Marker:Marker * ?Line:Line * ?Alignmentgroup:'a6 * ?Offsetgroup:'a7 * ?Notched:bool * ?NotchWidth:float * ?QuartileMethod:QuartileMethod -> GenericChart
  static member BoxPlot : ?x:'a0 * ?y:'a1 * ?Name:string * ?Showlegend:bool * ?Color:string * ?Fillcolor:'a2 * ?Opacity:float * ?Whiskerwidth:'a3 * ?Boxpoints:Boxpoints * ?Boxmean:BoxMean * ?Jitter:'a4 * ?Pointpos:'a5 * ?Orientation:Orientation * ?Marker:Marker * ?Line:Line * ?Alignmentgroup:'a6 * ?Offsetgroup:'a7 * ?Notched:bool * ?NotchWidth:float * ?QuartileMethod:QuartileMethod -> GenericChart
  static member Bubble : xysizes:seq<#IConvertible * #IConvertible * #IConvertible> * ?Name:string * ?Showlegend:bool * ?MarkerSymbol:Symbol * ?Color:string * ?Opacity:float * ?Labels:seq<string> * ?TextPosition:TextPosition * ?TextFont:Font * ?StackGroup:string * ?Orientation:Orientation * ?GroupNorm:GroupNorm * ?UseWebGL:bool -> GenericChart
  static member Bubble : x:seq<#IConvertible> * y:seq<#IConvertible> * sizes:seq<#IConvertible> * ?Name:string * ?Showlegend:bool * ?MarkerSymbol:Symbol * ?Color:string * ?Opacity:float * ?Labels:seq<string> * ?TextPosition:TextPosition * ?TextFont:Font * ?StackGroup:string * ?Orientation:Orientation * ?GroupNorm:GroupNorm * ?UseWebGL:bool -> GenericChart
  static member Candlestick : stockTimeSeries:seq<DateTime * StockData> * ?Increasing:Line * ?Decreasing:Line * ?WhiskerWidth:float * ?Line:Line * ?XCalendar:Calendar -> GenericChart
  static member Candlestick : open:seq<#IConvertible> * high:seq<#IConvertible> * low:seq<#IConvertible> * close:seq<#IConvertible> * x:seq<#IConvertible> * ?Increasing:Line * ?Decreasing:Line * ?WhiskerWidth:float * ?Line:Line * ?XCalendar:Calendar -> GenericChart
  ...
static member Chart.withX_Axis : xAxis:Axis.LinearAxis * ?Id:int -> (GenericChart.GenericChart -> GenericChart.GenericChart)
static member Chart.withY_Axis : yAxis:Axis.LinearAxis * ?Id:int -> (GenericChart.GenericChart -> GenericChart.GenericChart)
val sampleChart : GenericChart.GenericChart
static member Chart.Point : xy:seq<#System.IConvertible * #System.IConvertible> * ?Name:string * ?Showlegend:bool * ?MarkerSymbol:StyleParam.Symbol * ?Color:string * ?Opacity:float * ?Labels:seq<string> * ?TextPosition:StyleParam.TextPosition * ?TextFont:Font * ?StackGroup:string * ?Orientation:StyleParam.Orientation * ?GroupNorm:StyleParam.GroupNorm * ?UseWebGL:bool -> GenericChart.GenericChart
static member Chart.Point : x:seq<#System.IConvertible> * y:seq<#System.IConvertible> * ?Name:string * ?Showlegend:bool * ?MarkerSymbol:StyleParam.Symbol * ?Color:string * ?Opacity:float * ?Labels:seq<string> * ?TextPosition:StyleParam.TextPosition * ?TextFont:Font * ?StackGroup:string * ?Orientation:StyleParam.Orientation * ?GroupNorm:StyleParam.GroupNorm * ?UseWebGL:bool -> GenericChart.GenericChart
static member Chart.Combine : gCharts:seq<GenericChart.GenericChart> -> GenericChart.GenericChart
static member Chart.withTitle : title:string * ?Titlefont:Font -> (GenericChart.GenericChart -> GenericChart.GenericChart)
val covAB : float
val cov : v1:vector -> v2:vector -> float
val covAC : float
val covAD : float
val covABHigh : float
val covPopAB : float
val covPopulation : v1:vector -> v2:vector -> float
val covPopAC : float
val covPopAD : float
val covPopABHigh : float
module Correlation

from FSharp.Stats
val pearsonAB : float
Multiple items
module Seq

from FSharp.Stats.Correlation

--------------------
module Seq

from Plotly.NET

--------------------
module Seq

from FSharp.Stats

--------------------
module Seq

from Microsoft.FSharp.Collections
val pearson : seq1:seq<'T> -> seq2:seq<'T> -> float (requires member op_Explicit and member get_Zero and member get_One)
val pearsonAC : float
val pearsonAD : float
val pearsonABHigh : float
module GenericChart

from Plotly.NET
val toChartHTML : gChart:GenericChart.GenericChart -> string
val covs : string
val sprintf : format:Printf.StringFormat<'T> -> 'T