Imputation

Binder

Short documentation how to impute values

open FSharp.Stats
open FSharp.Stats.ML


let a = [3.;2.;3.;4.;5.;]
let b = [1.;2.;3.;nan;5.;]
let c = [nan;2.;3.;4.;nan;]
let d = [5.;2.;6.;4.;5.;]
let e = [0.5;2.;3.;5.;5.;]

let data = [a;b;c;d;e]
"rawData

               0         1         2         3         4 
_________________________________________________________
0     |    3.000     2.000     3.000     4.000     5.000
1     |    1.000     2.000     3.000       NaN     5.000
2     |      NaN     2.000     3.000     4.000       NaN
3     |    5.000     2.000     6.000     4.000     5.000
4     |    0.500     2.000     3.000     5.000     5.000
"

k-Nearest imputation

Missing data imputation based on the k-nearest neighbour algorithm:

// init kNearest MatrixBaseImpute
let kn : Impute.MatrixBaseImputation<float[],float> = Impute.kNearestImpute 2
let imputedData = Impute.imputeBy kn Ops.isNan data
"k nearest neighbours imputed data

               0         1         2         3         4 
_________________________________________________________
0     |    3.000     2.000     3.000     4.000     5.000
1     |    1.000     2.000     3.000     4.500     5.000
2     |    1.750     2.000     3.000     4.000     5.000
3     |    5.000     2.000     6.000     4.000     5.000
4     |    0.500     2.000     3.000     5.000     5.000
"

random imputation

...

// init random VectorBaseImpute
let rnd = Impute.rnd (System.Random())

let rndRowWise = Impute.imputeRowWiseBy rnd Ops.isNan data
let rndColWise = Impute.imputeColWiseBy rnd Ops.isNan data
"rndRowDataMatrix imputed data

               0         1         2         3         4 
_________________________________________________________
0     |    3.000     2.000     3.000     4.000     5.000
1     |    1.000     2.000     3.000     3.000     5.000
2     |    3.000     2.000     3.000     4.000     2.000
3     |    5.000     2.000     6.000     4.000     5.000
4     |    0.500     2.000     3.000     5.000     5.000
"
"rndColDataMatrix imputed data

               0         1         2         3         4 
_________________________________________________________
0     |    3.000     2.000     3.000     4.000     5.000
1     |    1.000     2.000     3.000     4.000     5.000
2     |    5.000     2.000     3.000     4.000     5.000
3     |    5.000     2.000     6.000     4.000     5.000
4     |    0.500     2.000     3.000     5.000     5.000
"

normal imputation

...

let normalRowWise = Impute.imputeRowWiseBy Impute.normal Ops.isNan data
let normalColWise = Impute.imputeColWiseBy Impute.normal Ops.isNan data
"normalRowDataMatrix imputed data

               0         1         2         3         4 
_________________________________________________________
0     |    3.000     2.000     3.000     4.000     5.000
1     |    1.000     2.000     3.000     4.363     5.000
2     |    3.595     2.000     3.000     4.000     3.837
3     |    5.000     2.000     6.000     4.000     5.000
4     |    0.500     2.000     3.000     5.000     5.000
"
"normalColDataMatrix imputed data

               0         1         2         3         4 
_________________________________________________________
0     |    3.000     2.000     3.000     4.000     5.000
1     |    1.000     2.000     3.000     4.588     5.000
2     |    1.869     2.000     3.000     4.000     5.000
3     |    5.000     2.000     6.000     4.000     5.000
4     |    0.500     2.000     3.000     5.000     5.000
"
namespace Plotly
namespace Plotly.NET
module Axis

from Plotly.NET
module StyleParam

from Plotly.NET
Multiple items
namespace FSharp

--------------------
namespace Microsoft.FSharp
namespace FSharp.Stats
namespace FSharp.Stats.ML
val a : float list
val b : float list
val nan : float
val c : float list
val d : float list
val e : float list
val data : float list list
val missingDataMatrix : string
module FSIPrinters

from FSharp.Stats
val matrix : mat:Matrix<float> -> string
Multiple items
val matrix : ll:seq<#seq<float>> -> Matrix<float>

--------------------
type matrix = Matrix<float>
val kn : Impute.MatrixBaseImputation<float [],float>
module Impute

from FSharp.Stats.ML
type MatrixBaseImputation<'a,'b> = seq<'a> -> 'a -> int -> 'b
Multiple items
val float : value:'T -> float (requires member op_Explicit)

--------------------
type float = System.Double

--------------------
type float<'Measure> = float
val kNearestImpute : k:int -> data:seq<float []> -> arr:float [] -> index:int -> float
val imputedData : float [] []
val imputeBy : impute:Impute.MatrixBaseImputation<'a [],'a> -> isMissing:('a -> bool) -> data:seq<#seq<'a>> -> 'a [] []
module Ops

from FSharp.Stats
val isNan : num:'b -> bool (requires equality)
val imputedDataMatrix : string
val rnd : (seq<float> -> int -> float)
val rnd : rnd:System.Random -> fdata:seq<'a> -> index:int -> 'a
namespace System
Multiple items
type Random =
  new : unit -> Random + 1 overload
  member Next : unit -> int + 2 overloads
  member NextBytes : buffer:byte[] -> unit + 1 overload
  member NextDouble : unit -> float

--------------------
System.Random() : System.Random
System.Random(Seed: int) : System.Random
val rndRowWise : float [] []
val imputeRowWiseBy : impute:Impute.VectorBaseImputation<'a> -> isMissing:('a -> bool) -> data:seq<#seq<'a>> -> 'a [] []
val rndColWise : float [] []
val imputeColWiseBy : impute:Impute.VectorBaseImputation<'a> -> isMissing:('a -> bool) -> data:seq<#seq<'a>> -> 'a [] []
val rndRowDataMatrix : string
val rndColDataMatrix : string
val normalRowWise : float [] []
val normal : fdata:seq<float> -> index:int -> float
val normalColWise : float [] []
val normalRowDataMatrix : string
val normalColDataMatrix : string