Header menu logo FSharp.Stats

Imputation

Binder Notebook

Short documentation how to impute values

open FSharp.Stats
open FSharp.Stats.ML


let a = [3.;2.;3.;4.;5.;]
let b = [1.;2.;3.;nan;5.;]
let c = [nan;2.;3.;4.;nan;]
let d = [5.;2.;6.;4.;5.;]
let e = [0.5;2.;3.;5.;5.;]

let data = [a;b;c;d;e]
"rawData

               0         1         2         3         4 
_________________________________________________________
0     |    3.000     2.000     3.000     4.000     5.000
1     |    1.000     2.000     3.000       NaN     5.000
2     |      NaN     2.000     3.000     4.000       NaN
3     |    5.000     2.000     6.000     4.000     5.000
4     |    0.500     2.000     3.000     5.000     5.000
"

k-Nearest imputation

Missing data imputation based on the k-nearest neighbour algorithm:

// init kNearest MatrixBaseImpute
let kn : Imputation.MatrixBaseImputation<float[],float> = Imputation.kNearestImpute 2
let imputedData = Imputation.imputeBy kn Ops.isNan data
"k nearest neighbours imputed data

               0         1         2         3         4 
_________________________________________________________
0     |    3.000     2.000     3.000     4.000     5.000
1     |    1.000     2.000     3.000     4.500     5.000
2     |    1.750     2.000     3.000     4.000     5.000
3     |    5.000     2.000     6.000     4.000     5.000
4     |    0.500     2.000     3.000     5.000     5.000
"

random imputation

...

// init random VectorBaseImpute
let rnd = Imputation.rnd (System.Random())

let rndRowWise = Imputation.imputeRowWiseBy rnd Ops.isNan data
let rndColWise = Imputation.imputeColWiseBy rnd Ops.isNan data
"rndRowDataMatrix imputed data

               0         1         2         3         4 
_________________________________________________________
0     |    3.000     2.000     3.000     4.000     5.000
1     |    1.000     2.000     3.000     2.000     5.000
2     |    2.000     2.000     3.000     4.000     2.000
3     |    5.000     2.000     6.000     4.000     5.000
4     |    0.500     2.000     3.000     5.000     5.000
"
"rndColDataMatrix imputed data

               0         1         2         3         4 
_________________________________________________________
0     |    3.000     2.000     3.000     4.000     5.000
1     |    1.000     2.000     3.000     4.000     5.000
2     |    5.000     2.000     3.000     4.000     5.000
3     |    5.000     2.000     6.000     4.000     5.000
4     |    0.500     2.000     3.000     5.000     5.000
"

normal imputation

...

let normalRowWise = Imputation.imputeRowWiseBy Imputation.normal Ops.isNan data
let normalColWise = Imputation.imputeColWiseBy Imputation.normal Ops.isNan data
"normalRowDataMatrix imputed data

               0         1         2         3         4 
_________________________________________________________
0     |    3.000     2.000     3.000     4.000     5.000
1     |    1.000     2.000     3.000     1.579     5.000
2     |    3.499     2.000     3.000     4.000     3.112
3     |    5.000     2.000     6.000     4.000     5.000
4     |    0.500     2.000     3.000     5.000     5.000
"
"normalColDataMatrix imputed data

               0         1         2         3         4 
_________________________________________________________
0     |    3.000     2.000     3.000     4.000     5.000
1     |    1.000     2.000     3.000     3.089     5.000
2     |    3.343     2.000     3.000     4.000     5.000
3     |    5.000     2.000     6.000     4.000     5.000
4     |    0.500     2.000     3.000     5.000     5.000
"
namespace FsMath
namespace Plotly
namespace Plotly.NET
module Defaults from Plotly.NET
<summary> Contains mutable global default values. Changing these values will apply the default values to all consecutive Chart generations. </summary>
val mutable DefaultDisplayOptions: Plotly.NET.DisplayOptions
Multiple items
type DisplayOptions = inherit DynamicObj new: unit -> DisplayOptions static member addAdditionalHeadTags: additionalHeadTags: XmlNode list -> (DisplayOptions -> DisplayOptions) static member addDescription: description: XmlNode list -> (DisplayOptions -> DisplayOptions) static member combine: first: DisplayOptions -> second: DisplayOptions -> DisplayOptions static member getAdditionalHeadTags: displayOpts: DisplayOptions -> XmlNode list static member getDescription: displayOpts: DisplayOptions -> XmlNode list static member getPlotlyReference: displayOpts: DisplayOptions -> PlotlyJSReference static member init: [<Optional; DefaultParameterValue ((null :> obj))>] ?AdditionalHeadTags: XmlNode list * [<Optional; DefaultParameterValue ((null :> obj))>] ?Description: XmlNode list * [<Optional; DefaultParameterValue ((null :> obj))>] ?PlotlyJSReference: PlotlyJSReference -> DisplayOptions static member initCDNOnly: unit -> DisplayOptions ...

--------------------
new: unit -> Plotly.NET.DisplayOptions
static member Plotly.NET.DisplayOptions.init: [<System.Runtime.InteropServices.Optional; System.Runtime.InteropServices.DefaultParameterValue ((null :> obj))>] ?AdditionalHeadTags: Giraffe.ViewEngine.HtmlElements.XmlNode list * [<System.Runtime.InteropServices.Optional; System.Runtime.InteropServices.DefaultParameterValue ((null :> obj))>] ?Description: Giraffe.ViewEngine.HtmlElements.XmlNode list * [<System.Runtime.InteropServices.Optional; System.Runtime.InteropServices.DefaultParameterValue ((null :> obj))>] ?PlotlyJSReference: Plotly.NET.PlotlyJSReference -> Plotly.NET.DisplayOptions
type PlotlyJSReference = | CDN of string | Full | Require of string | NoReference
<summary> Sets how plotly is referenced in the head of html docs. </summary>
union case Plotly.NET.PlotlyJSReference.NoReference: Plotly.NET.PlotlyJSReference
Multiple items
namespace FSharp

--------------------
namespace Microsoft.FSharp
namespace FSharp.Stats
namespace FSharp.Stats.ML
val a: float list
val b: float list
val nan: float
val c: float list
val d: float list
val e: float list
val data: float list list
val missingDataMatrix: string
module FSIPrinters from FSharp.Stats
val matrix: mat: Matrix<float> -> string
val matrix: ll: #('b seq) seq -> Matrix<'b> (requires 'b :> System.Numerics.INumber<'b> and default constructor and value type and comparison and 'b :> System.ValueType)
val kn: Imputation.MatrixBaseImputation<float array,float>
module Imputation from FSharp.Stats.ML
<summary> Module for data imputation and missing value filtering </summary>
type MatrixBaseImputation<'a,'b> = 'a seq -> 'a -> int -> 'b
<summary> Type definintion for a vector based imputation The imputed values are based on the given whole dataset </summary>
Multiple items
val float: value: 'T -> float (requires member op_Explicit)

--------------------
type float = System.Double

--------------------
type float<'Measure> = float
val kNearestImpute: k: int -> data: float array seq -> arr: float array -> index: int -> float
<summary>Imputation by k-nearest neighbour</summary>
<remarks></remarks>
<param name="k"></param>
<param name="data"></param>
<param name="arr"></param>
<param name="index"></param>
<returns></returns>
<example><code></code></example>
val imputedData: float array array
val imputeBy: impute: Imputation.MatrixBaseImputation<'a array,'a> -> isMissing: ('a -> bool) -> data: #('a seq) seq -> 'a array array
<summary>Imputes rows by matrix-based imputation</summary>
<remarks></remarks>
<param name="impute"></param>
<param name="isMissing"></param>
<param name="data"></param>
<returns></returns>
<example><code></code></example>
module Ops from FSharp.Stats
<summary> Operations module </summary>
val isNan: num: 'a -> bool (requires equality)
<summary> Returs true if x is nan (generics) equality </summary>
val imputedDataMatrix: string
val rnd: (float seq -> int -> float)
val rnd: rnd: System.Random -> fdata: 'a seq -> index: int -> 'a
<summary>Imputation by random sampling from the input vector</summary>
<remarks></remarks>
<param name="rnd"></param>
<param name="fdata"></param>
<param name="index"></param>
<returns></returns>
<example><code></code></example>
namespace System
Multiple items
type Random = new: unit -> unit + 1 overload member GetItems<'T> : choices: ReadOnlySpan<'T> * length: int -> 'T array + 2 overloads member Next: unit -> int + 2 overloads member NextBytes: buffer: byte array -> unit + 1 overload member NextDouble: unit -> float member NextInt64: unit -> int64 + 2 overloads member NextSingle: unit -> float32 member Shuffle<'T> : values: Span<'T> -> unit + 1 overload static member Shared: Random
<summary>Represents a pseudo-random number generator, which is an algorithm that produces a sequence of numbers that meet certain statistical requirements for randomness.</summary>

--------------------
System.Random() : System.Random
System.Random(Seed: int) : System.Random
val rndRowWise: float array array
val imputeRowWiseBy: impute: Imputation.VectorBaseImputation<'a> -> isMissing: ('a -> bool) -> data: #('a seq) seq -> 'a array array
<summary>Imputes row-wise by vector-based imputation</summary>
<remarks></remarks>
<param name="impute"></param>
<param name="isMissing"></param>
<param name="data"></param>
<returns></returns>
<example><code></code></example>
val rndColWise: float array array
val imputeColWiseBy: impute: Imputation.VectorBaseImputation<'a> -> isMissing: ('a -> bool) -> data: #('a seq) seq -> 'a array array
<summary>Imputes column-wise by vector-based imputation</summary>
<remarks></remarks>
<param name="impute"></param>
<param name="isMissing"></param>
<param name="data"></param>
<returns></returns>
<example><code></code></example>
val rndRowDataMatrix: string
val rndColDataMatrix: string
val normalRowWise: float array array
val normal: fdata: float seq -> index: int -> float
<summary> Imputation by sampling from a gausian normal distribution based on the input vector </summary>
val normalColWise: float array array
val normalRowDataMatrix: string
val normalColDataMatrix: string

Type something to start searching.