Imputation

Binder Notebook

Short documentation how to impute values

open FSharp.Stats
open FSharp.Stats.ML


let a = [3.;2.;3.;4.;5.;]
let b = [1.;2.;3.;nan;5.;]
let c = [nan;2.;3.;4.;nan;]
let d = [5.;2.;6.;4.;5.;]
let e = [0.5;2.;3.;5.;5.;]

let data = [a;b;c;d;e]
"rawData

               0         1         2         3         4 
_________________________________________________________
0     |    3.000     2.000     3.000     4.000     5.000
1     |    1.000     2.000     3.000       NaN     5.000
2     |      NaN     2.000     3.000     4.000       NaN
3     |    5.000     2.000     6.000     4.000     5.000
4     |    0.500     2.000     3.000     5.000     5.000
"

k-Nearest imputation

Missing data imputation based on the k-nearest neighbour algorithm:

// init kNearest MatrixBaseImpute
let kn : Impute.MatrixBaseImputation<float[],float> = Impute.kNearestImpute 2
let imputedData = Impute.imputeBy kn Ops.isNan data
"k nearest neighbours imputed data

               0         1         2         3         4 
_________________________________________________________
0     |    3.000     2.000     3.000     4.000     5.000
1     |    1.000     2.000     3.000     4.500     5.000
2     |    1.750     2.000     3.000     4.000     5.000
3     |    5.000     2.000     6.000     4.000     5.000
4     |    0.500     2.000     3.000     5.000     5.000
"

random imputation

...

// init random VectorBaseImpute
let rnd = Impute.rnd (System.Random())

let rndRowWise = Impute.imputeRowWiseBy rnd Ops.isNan data
let rndColWise = Impute.imputeColWiseBy rnd Ops.isNan data
"rndRowDataMatrix imputed data

               0         1         2         3         4 
_________________________________________________________
0     |    3.000     2.000     3.000     4.000     5.000
1     |    1.000     2.000     3.000     3.000     5.000
2     |    3.000     2.000     3.000     4.000     3.000
3     |    5.000     2.000     6.000     4.000     5.000
4     |    0.500     2.000     3.000     5.000     5.000
"
"rndColDataMatrix imputed data

               0         1         2         3         4 
_________________________________________________________
0     |    3.000     2.000     3.000     4.000     5.000
1     |    1.000     2.000     3.000     4.000     5.000
2     |    1.000     2.000     3.000     4.000     5.000
3     |    5.000     2.000     6.000     4.000     5.000
4     |    0.500     2.000     3.000     5.000     5.000
"

normal imputation

...

let normalRowWise = Impute.imputeRowWiseBy Impute.normal Ops.isNan data
let normalColWise = Impute.imputeColWiseBy Impute.normal Ops.isNan data
"normalRowDataMatrix imputed data

               0         1         2         3         4 
_________________________________________________________
0     |    3.000     2.000     3.000     4.000     5.000
1     |    1.000     2.000     3.000     1.777     5.000
2     |    2.165     2.000     3.000     4.000     3.034
3     |    5.000     2.000     6.000     4.000     5.000
4     |    0.500     2.000     3.000     5.000     5.000
"
"normalColDataMatrix imputed data

               0         1         2         3         4 
_________________________________________________________
0     |    3.000     2.000     3.000     4.000     5.000
1     |    1.000     2.000     3.000     5.343     5.000
2     |    -0.64     2.000     3.000     4.000     5.000
3     |    5.000     2.000     6.000     4.000     5.000
4     |    0.500     2.000     3.000     5.000     5.000
"
namespace Plotly
namespace Plotly.NET
module Defaults from Plotly.NET
<summary> Contains mutable global default values. Changing these values will apply the default values to all consecutive Chart generations. </summary>
val mutable DefaultDisplayOptions: Plotly.NET.DisplayOptions
Multiple items
type DisplayOptions = inherit DynamicObj new: unit -> DisplayOptions static member addAdditionalHeadTags: additionalHeadTags: XmlNode list -> (DisplayOptions -> DisplayOptions) static member addDescription: description: XmlNode list -> (DisplayOptions -> DisplayOptions) static member combine: first: DisplayOptions -> second: DisplayOptions -> DisplayOptions static member getAdditionalHeadTags: displayOpts: DisplayOptions -> XmlNode list static member getDescription: displayOpts: DisplayOptions -> XmlNode list static member getPlotlyReference: displayOpts: DisplayOptions -> PlotlyJSReference static member init: ?AdditionalHeadTags: XmlNode list * ?Description: XmlNode list * ?PlotlyJSReference: PlotlyJSReference -> DisplayOptions static member initCDNOnly: unit -> DisplayOptions ...

--------------------
new: unit -> Plotly.NET.DisplayOptions
static member Plotly.NET.DisplayOptions.init: ?AdditionalHeadTags: Giraffe.ViewEngine.HtmlElements.XmlNode list * ?Description: Giraffe.ViewEngine.HtmlElements.XmlNode list * ?PlotlyJSReference: Plotly.NET.PlotlyJSReference -> Plotly.NET.DisplayOptions
type PlotlyJSReference = | CDN of string | Full | Require of string | NoReference
<summary> Sets how plotly is referenced in the head of html docs. </summary>
union case Plotly.NET.PlotlyJSReference.NoReference: Plotly.NET.PlotlyJSReference
Multiple items
namespace FSharp

--------------------
namespace Microsoft.FSharp
namespace FSharp.Stats
namespace FSharp.Stats.ML
val a: float list
val b: float list
val nan: float
<summary>Equivalent to <see cref="P:System.Double.NaN" /></summary>
val c: float list
val d: float list
val e: float list
val data: float list list
val missingDataMatrix: string
module FSIPrinters from FSharp.Stats
val matrix: mat: Matrix<float> -> string
Multiple items
val matrix: ll: seq<#seq<float>> -> Matrix<float>

--------------------
type matrix = Matrix<float>
val kn: Impute.MatrixBaseImputation<float[],float>
module Impute from FSharp.Stats.ML
<summary> Module for data imputation and missing value filtering </summary>
type MatrixBaseImputation<'a,'b> = seq<'a> -> 'a -> int -> 'b
<summary> Type definintion for a vector based imputation The imputed values are based on the given whole dataset </summary>
Multiple items
val float: value: 'T -> float (requires member op_Explicit)
<summary>Converts the argument to 64-bit float. This is a direct conversion for all primitive numeric types. For strings, the input is converted using <c>Double.Parse()</c> with InvariantCulture settings. Otherwise the operation requires an appropriate static conversion method on the input type.</summary>
<param name="value">The input value.</param>
<returns>The converted float</returns>
<example id="float-example"><code lang="fsharp"></code></example>


--------------------
[<Struct>] type float = System.Double
<summary>An abbreviation for the CLI type <see cref="T:System.Double" />.</summary>
<category>Basic Types</category>


--------------------
type float<'Measure> = float
<summary>The type of double-precision floating point numbers, annotated with a unit of measure. The unit of measure is erased in compiled code and when values of this type are analyzed using reflection. The type is representationally equivalent to <see cref="T:System.Double" />.</summary>
<category index="6">Basic Types with Units of Measure</category>
val kNearestImpute: k: int -> data: seq<float[]> -> arr: float[] -> index: int -> float
<summary>Imputation by k-nearest neighbour</summary>
<remarks></remarks>
<param name="k"></param>
<returns></returns>
<example><code></code></example>
val imputedData: float[][]
val imputeBy: impute: Impute.MatrixBaseImputation<'a[],'a> -> isMissing: ('a -> bool) -> data: seq<#seq<'a>> -> 'a[][]
<summary>Imputes rows by matrix-based imputation</summary>
<remarks></remarks>
<param name="impute"></param>
<param name="isMissing"></param>
<param name="data"></param>
<returns></returns>
<example><code></code></example>
module Ops from FSharp.Stats
<summary> Operations module (automatically opened) </summary>
val isNan: num: 'c -> bool (requires equality)
<summary> Returs true if x is nan (generics) equality </summary>
val imputedDataMatrix: string
val rnd: (seq<float> -> int -> float)
val rnd: rnd: System.Random -> fdata: seq<'a> -> index: int -> 'a
<summary>Imputation by random sampling from the input vector</summary>
<remarks></remarks>
<param name="rnd"></param>
<returns></returns>
<example><code></code></example>
namespace System
Multiple items
type Random = new: unit -> unit + 1 overload member Next: unit -> int + 2 overloads member NextBytes: buffer: byte[] -> unit + 1 overload member NextDouble: unit -> float member NextInt64: unit -> int64 + 2 overloads member NextSingle: unit -> float32 static member Shared: Random
<summary>Represents a pseudo-random number generator, which is an algorithm that produces a sequence of numbers that meet certain statistical requirements for randomness.</summary>

--------------------
System.Random() : System.Random
System.Random(Seed: int) : System.Random
val rndRowWise: float[][]
val imputeRowWiseBy: impute: Impute.VectorBaseImputation<'a> -> isMissing: ('a -> bool) -> data: seq<#seq<'a>> -> 'a[][]
<summary>Imputes row-wise by vector-based imputation</summary>
<remarks></remarks>
<param name="impute"></param>
<param name="isMissing"></param>
<param name="data"></param>
<returns></returns>
<example><code></code></example>
val rndColWise: float[][]
val imputeColWiseBy: impute: Impute.VectorBaseImputation<'a> -> isMissing: ('a -> bool) -> data: seq<#seq<'a>> -> 'a[][]
<summary>Imputes column-wise by vector-based imputation</summary>
<remarks></remarks>
<param name="impute"></param>
<param name="isMissing"></param>
<param name="data"></param>
<returns></returns>
<example><code></code></example>
val rndRowDataMatrix: string
val rndColDataMatrix: string
val normalRowWise: float[][]
val normal: fdata: seq<float> -> index: int -> float
<summary> Imputation by sampling from a gausian normal distribution based on the input vector </summary>
val normalColWise: float[][]
val normalRowDataMatrix: string
val normalColDataMatrix: string