BinderScriptNotebook

The Housefly Wing Length dataset

Table of contents

Description

Measured wing lengths of 100 houseflies in mm * 10^1.
Taken from https://seattlecentral.edu/qelp/sets/057/057.html

Original literature: Sokal, R.R. and P.E. Hunter. 1955. "A morphometric analysis of DDT-resistant and non-resistant housefly strains" Ann. Entomol. Soc. Amer. 48: 499-507.

How to use

#r "nuget: FSharp.Data"
#r "nuget: Deedle"

open FSharp.Data
open Deedle

let rawData = Http.RequestString @"https://raw.githubusercontent.com/fslaborg/datasets/main/data/HouseflyWingLength.txt"

let df = Frame.ReadCsvString(rawData, hasHeaders = false, schema = "wing length (mm * 10^1)")

df.Print()
wing length (mm * 10^1) 
0  -> 36                      
1  -> 37                      
2  -> 38                      
3  -> 38                      
4  -> 39                      
5  -> 39                      
6  -> 40                      
7  -> 40                      
8  -> 40                      
9  -> 40                      
10 -> 41                      
11 -> 41                      
12 -> 41                      
13 -> 41                      
14 -> 41                      
:     ...                     
85 -> 50                      
86 -> 50                      
87 -> 50                      
88 -> 50                      
89 -> 50                      
90 -> 51                      
91 -> 51                      
92 -> 51                      
93 -> 51                      
94 -> 52                      
95 -> 52                      
96 -> 53                      
97 -> 53                      
98 -> 54                      
99 -> 55

Examples

This example is taken from the FsLab datascience tutorial t-test (WIP)

#r "nuget: FSharp.Stats, 0.4.2"
#r "nuget: Plotly.NET, 2.0.0-preview.6"

open FSharp.Stats
open FSharp.Stats.Testing
open Plotly.NET

let seqDataHousefly =
    df
    |> Frame.getCol "wing length (mm * 10^1)"
    |> Series.values
    // We convert the values to mm
    |> Seq.map (fun x -> x / 10.)

let boxPlot = 
    Chart.BoxPlot(y = seqDataHousefly, Name = "housefly", Boxpoints = StyleParam.Boxpoints.All, Jitter = 0.2)
    |> Chart.withY_AxisStyle "wing length [mm]"
// The testing module in FSharp.Stats require vectors as input types, thus we transform our array into a vector:
let vectorDataHousefly = vector seqDataHousefly

// The expected value of our population.
let expectedValue = 4.5

// Perform the one-sample t-test with our vectorized data and our exptected value as parameters.
let oneSampleResult = TTest.oneSample vectorDataHousefly expectedValue
{ Statistic = 1.275624919
  DegreesOfFreedom = 99.0
  PValueLeft = 0.8974634108
  PValueRight = 0.1025365892
  PValue = 0.2050731784 }
Multiple items
namespace FSharp

--------------------
namespace Microsoft.FSharp
Multiple items
namespace FSharp.Data

--------------------
namespace Microsoft.FSharp.Data
namespace Deedle
val rawData : string
type Http = private new : unit -> Http static member private AppendQueryToUrl : url:string * query:(string * string) list -> string static member AsyncRequest : url:string * ?query:(string * string) list * ?headers:seq<string * string> * ?httpMethod:string * ?body:HttpRequestBody * ?cookies:seq<string * string> * ?cookieContainer:CookieContainer * ?silentHttpErrors:bool * ?silentCookieErrors:bool * ?responseEncodingOverride:string * ?customizeHttpRequest:(HttpWebRequest -> HttpWebRequest) * ?timeout:int -> Async<HttpResponse> static member AsyncRequestStream : url:string * ?query:(string * string) list * ?headers:seq<string * string> * ?httpMethod:string * ?body:HttpRequestBody * ?cookies:seq<string * string> * ?cookieContainer:CookieContainer * ?silentHttpErrors:bool * ?silentCookieErrors:bool * ?customizeHttpRequest:(HttpWebRequest -> HttpWebRequest) * ?timeout:int -> Async<HttpResponseWithStream> static member AsyncRequestString : url:string * ?query:(string * string) list * ?headers:seq<string * string> * ?httpMethod:string * ?body:HttpRequestBody * ?cookies:seq<string * string> * ?cookieContainer:CookieContainer * ?silentHttpErrors:bool * ?silentCookieErrors:bool * ?responseEncodingOverride:string * ?customizeHttpRequest:(HttpWebRequest -> HttpWebRequest) * ?timeout:int -> Async<string> static member private EncodeFormData : query:string -> string static member private InnerRequest : url:string * toHttpResponse:(string -> int -> string -> string -> 'a0 option -> Map<string,string> -> Map<string,string> -> Stream -> Async<'a1>) * ?query:(string * string) list * ?headers:seq<string * string> * ?httpMethod:string * ?body:HttpRequestBody * ?cookies:seq<string * string> * ?cookieContainer:CookieContainer * ?silentHttpErrors:bool * ?silentCookieErrors:bool * ?responseEncodingOverride:'a0 * ?customizeHttpRequest:(HttpWebRequest -> HttpWebRequest) * ?timeout:int -> Async<'a1> static member Request : url:string * ?query:(string * string) list * ?headers:seq<string * string> * ?httpMethod:string * ?body:HttpRequestBody * ?cookies:seq<string * string> * ?cookieContainer:CookieContainer * ?silentHttpErrors:bool * ?silentCookieErrors:bool * ?responseEncodingOverride:string * ?customizeHttpRequest:(HttpWebRequest -> HttpWebRequest) * ?timeout:int -> HttpResponse static member RequestStream : url:string * ?query:(string * string) list * ?headers:seq<string * string> * ?httpMethod:string * ?body:HttpRequestBody * ?cookies:seq<string * string> * ?cookieContainer:CookieContainer * ?silentHttpErrors:bool * ?silentCookieErrors:bool * ?customizeHttpRequest:(HttpWebRequest -> HttpWebRequest) * ?timeout:int -> HttpResponseWithStream static member RequestString : url:string * ?query:(string * string) list * ?headers:seq<string * string> * ?httpMethod:string * ?body:HttpRequestBody * ?cookies:seq<string * string> * ?cookieContainer:CookieContainer * ?silentHttpErrors:bool * ?silentCookieErrors:bool * ?responseEncodingOverride:string * ?customizeHttpRequest:(HttpWebRequest -> HttpWebRequest) * ?timeout:int -> string
<summary> Utilities for working with network via HTTP. Includes methods for downloading resources with specified headers, query parameters and HTTP body </summary>
Multiple items
static member Http.RequestString : url:string * ?query:(string * string) list * ?headers:seq<string * string> * ?httpMethod:string * ?body:HttpRequestBody * ?cookies:seq<string * string> * ?cookieContainer:System.Net.CookieContainer * ?silentHttpErrors:bool * ?silentCookieErrors:bool * ?responseEncodingOverride:string * ?customizeHttpRequest:(System.Net.HttpWebRequest -> System.Net.HttpWebRequest) * ?timeout:int -> string

--------------------
static member Http.RequestString : url:string * ?query:(string * string) list * ?headers:seq<string * string> * ?httpMethod:string * ?body:HttpRequestBody * ?cookies:seq<string * string> * ?cookieContainer:System.Net.CookieContainer * ?silentHttpErrors:bool * ?silentCookieErrors:bool * ?responseEncodingOverride:string * ?customizeHttpRequest:(System.Net.HttpWebRequest -> System.Net.HttpWebRequest) * ?timeout:int -> string
val df : Frame<int,string>
Multiple items
module Frame from Deedle

--------------------
type Frame = static member ReadCsv : location:string * hasHeaders:Nullable<bool> * inferTypes:Nullable<bool> * inferRows:Nullable<int> * schema:string * separators:string * culture:string * maxRows:Nullable<int> * missingValues:string [] * preferOptions:bool -> Frame<int,string> + 1 overload static member ReadReader : reader:IDataReader -> Frame<int,string> static member CustomExpanders : Dictionary<Type,Func<obj,seq<string * Type * obj>>> static member NonExpandableInterfaces : ResizeArray<Type> static member NonExpandableTypes : HashSet<Type>

--------------------
type Frame<'TRowKey,'TColumnKey (requires equality and equality)> = interface IDynamicMetaObjectProvider interface INotifyCollectionChanged interface IFsiFormattable interface IFrame new : rowIndex:IIndex<'TRowKey> * columnIndex:IIndex<'TColumnKey> * data:IVector<IVector> * indexBuilder:IIndexBuilder * vectorBuilder:IVectorBuilder -> Frame<'TRowKey,'TColumnKey> + 1 overload member AddColumn : column:'TColumnKey * series:seq<'V> -> unit + 3 overloads member AggregateRowsBy : groupBy:seq<'TColumnKey> * aggBy:seq<'TColumnKey> * aggFunc:Func<Series<'TRowKey,'a>,'b> -> Frame<int,'TColumnKey> member Clone : unit -> Frame<'TRowKey,'TColumnKey> member ColumnApply : f:Func<Series<'TRowKey,'T>,ISeries<'TRowKey>> -> Frame<'TRowKey,'TColumnKey> + 1 overload member DropColumn : column:'TColumnKey -> unit ...

--------------------
new : names:seq<'TColumnKey> * columns:seq<ISeries<'TRowKey>> -> Frame<'TRowKey,'TColumnKey>
new : rowIndex:Indices.IIndex<'TRowKey> * columnIndex:Indices.IIndex<'TColumnKey> * data:IVector<IVector> * indexBuilder:Indices.IIndexBuilder * vectorBuilder:Vectors.IVectorBuilder -> Frame<'TRowKey,'TColumnKey>
static member Frame.ReadCsvString : csvString:string * ?hasHeaders:bool * ?inferTypes:bool * ?inferRows:int * ?schema:string * ?separators:string * ?culture:string * ?maxRows:int * ?missingValues:string [] * ?preferOptions:bool -> Frame<int,string>
static member FrameExtensions.Print : frame:Frame<'K,'V> -> unit (requires equality and equality)
static member FrameExtensions.Print : frame:Frame<'K,'V> * printTypes:bool -> unit (requires equality and equality)
namespace FSharp.Stats
namespace FSharp.Stats.Testing
namespace Plotly
namespace Plotly.NET
val seqDataHousefly : seq<float>
Multiple items
module Frame from Deedle

--------------------
type Frame = inherit DynamicObj new : unit -> Frame

--------------------
type Frame<'TRowKey,'TColumnKey (requires equality and equality)> = interface IDynamicMetaObjectProvider interface INotifyCollectionChanged interface IFsiFormattable interface IFrame new : rowIndex:IIndex<'TRowKey> * columnIndex:IIndex<'TColumnKey> * data:IVector<IVector> * indexBuilder:IIndexBuilder * vectorBuilder:IVectorBuilder -> Frame<'TRowKey,'TColumnKey> + 1 overload member AddColumn : column:'TColumnKey * series:seq<'V> -> unit + 3 overloads member AggregateRowsBy : groupBy:seq<'TColumnKey> * aggBy:seq<'TColumnKey> * aggFunc:Func<Series<'TRowKey,'a>,'b> -> Frame<int,'TColumnKey> member Clone : unit -> Frame<'TRowKey,'TColumnKey> member ColumnApply : f:Func<Series<'TRowKey,'T>,ISeries<'TRowKey>> -> Frame<'TRowKey,'TColumnKey> + 1 overload member DropColumn : column:'TColumnKey -> unit ...

--------------------
new : unit -> Frame

--------------------
new : names:seq<'TColumnKey> * columns:seq<ISeries<'TRowKey>> -> Frame<'TRowKey,'TColumnKey>
new : rowIndex:Indices.IIndex<'TRowKey> * columnIndex:Indices.IIndex<'TColumnKey> * data:IVector<IVector> * indexBuilder:Indices.IIndexBuilder * vectorBuilder:Vectors.IVectorBuilder -> Frame<'TRowKey,'TColumnKey>
val getCol : column:'C -> frame:Frame<'R,'C> -> Series<'R,'V> (requires equality and equality)
Multiple items
module Series from Deedle

--------------------
type Series = static member ofNullables : values:seq<Nullable<'a0>> -> Series<int,'a0> (requires default constructor and value type and 'a0 :> ValueType) static member ofObservations : observations:seq<'c * 'd> -> Series<'c,'d> (requires equality) static member ofOptionalObservations : observations:seq<'K * 'a1 option> -> Series<'K,'a1> (requires equality) static member ofValues : values:seq<'a> -> Series<int,'a>

--------------------
type Series<'K,'V (requires equality)> = interface IFsiFormattable interface ISeries<'K> new : index:IIndex<'K> * vector:IVector<'V> * vectorBuilder:IVectorBuilder * indexBuilder:IIndexBuilder -> Series<'K,'V> + 3 overloads member After : lowerExclusive:'K -> Series<'K,'V> member Aggregate : aggregation:Aggregation<'K> * keySelector:Func<DataSegment<Series<'K,'V>>,'TNewKey> * valueSelector:Func<DataSegment<Series<'K,'V>>,OptionalValue<'R>> -> Series<'TNewKey,'R> (requires equality) + 1 overload member AsyncMaterialize : unit -> Async<Series<'K,'V>> member Before : upperExclusive:'K -> Series<'K,'V> member Between : lowerInclusive:'K * upperInclusive:'K -> Series<'K,'V> member Compare : another:Series<'K,'V> -> Series<'K,Diff<'V>> member Convert : forward:Func<'V,'R> * backward:Func<'R,'V> -> Series<'K,'R> ...

--------------------
new : pairs:seq<System.Collections.Generic.KeyValuePair<'K,'V>> -> Series<'K,'V>
new : keys:seq<'K> * values:seq<'V> -> Series<'K,'V>
new : keys:'K [] * values:'V [] -> Series<'K,'V>
new : index:Indices.IIndex<'K> * vector:IVector<'V> * vectorBuilder:Vectors.IVectorBuilder * indexBuilder:Indices.IIndexBuilder -> Series<'K,'V>
val values : series:Series<'K,'T> -> seq<'T> (requires equality)
Multiple items
module Seq from Plotly.NET

--------------------
module Seq from FSharp.Stats
<summary> Module to compute common statistical measure </summary>

--------------------
module Seq from Microsoft.FSharp.Collections
<summary>Contains operations for working with values of type <see cref="T:Microsoft.FSharp.Collections.seq`1" />.</summary>
val map : mapping:('T -> 'U) -> source:seq<'T> -> seq<'U>
<summary>Builds a new collection whose elements are the results of applying the given function to each of the elements of the collection. The given function will be applied as elements are demanded using the <c>MoveNext</c> method on enumerators retrieved from the object.</summary>
<remarks>The returned sequence may be passed between threads safely. However, individual IEnumerator values generated from the returned sequence should not be accessed concurrently.</remarks>
<param name="mapping">A function to transform items from the input sequence.</param>
<param name="source">The input sequence.</param>
<returns>The result sequence.</returns>
<exception cref="T:System.ArgumentNullException">Thrown when the input sequence is null.</exception>
val x : float
val boxPlot : GenericChart.GenericChart
type Chart = static member Area : x:seq<#IConvertible> * y:seq<#IConvertible> * ?Name:string * ?ShowMarkers:bool * ?Showlegend:bool * ?MarkerSymbol:Symbol * ?Color:string * ?Opacity:float * ?Labels:seq<#IConvertible> * ?TextPosition:TextPosition * ?TextFont:Font * ?Dash:DrawingStyle * ?Width:float -> GenericChart + 1 overload static member Bar : keys:seq<#IConvertible> * values:seq<#IConvertible> * ?Name:string * ?Showlegend:bool * ?Color:string * ?Opacity:float * ?Labels:seq<#IConvertible> * ?TextPosition:TextPosition * ?TextFont:Font * ?Marker:Marker -> GenericChart + 1 overload static member BoxPlot : ?x:'a * ?y:'b * ?Name:string * ?Showlegend:bool * ?Color:string * ?Fillcolor:'c * ?Opacity:float * ?Whiskerwidth:'d * ?Boxpoints:Boxpoints * ?Boxmean:BoxMean * ?Jitter:'e * ?Pointpos:'f * ?Orientation:Orientation * ?Marker:Marker * ?Line:Line * ?Alignmentgroup:'g * ?Offsetgroup:'h * ?Notched:bool * ?NotchWidth:float * ?QuartileMethod:QuartileMethod -> GenericChart + 1 overload static member Bubble : x:seq<#IConvertible> * y:seq<#IConvertible> * sizes:seq<#IConvertible> * ?Name:string * ?Showlegend:bool * ?MarkerSymbol:Symbol * ?Color:string * ?Opacity:float * ?Labels:seq<#IConvertible> * ?TextPosition:TextPosition * ?TextFont:Font * ?StackGroup:string * ?Orientation:Orientation * ?GroupNorm:GroupNorm * ?UseWebGL:bool -> GenericChart + 1 overload static member Candlestick : open:seq<#IConvertible> * high:seq<#IConvertible> * low:seq<#IConvertible> * close:seq<#IConvertible> * x:seq<#IConvertible> * ?Increasing:Line * ?Decreasing:Line * ?WhiskerWidth:float * ?Line:Line * ?XCalendar:Calendar -> GenericChart + 1 overload static member ChoroplethMap : locations:seq<string> * z:seq<#IConvertible> * ?Text:seq<#IConvertible> * ?Locationmode:LocationFormat * ?Autocolorscale:bool * ?Colorscale:Colorscale * ?Colorbar:Colorbar * ?Marker:Marker * ?GeoJson:'a2 * ?FeatureIdKey:string * ?Zmin:float * ?Zmax:float -> GenericChart static member ChoroplethMapbox : locations:seq<#IConvertible> * z:seq<#IConvertible> * geoJson:'a2 * ?FeatureIdKey:string * ?Text:seq<#IConvertible> * ?Below:string * ?Colorscale:Colorscale * ?Colorbar:Colorbar * ?ZAuto:bool * ?ZMin:float * ?ZMid:float * ?ZMax:float -> GenericChart static member Column : keys:seq<#IConvertible> * values:seq<#IConvertible> * ?Name:string * ?Showlegend:bool * ?Color:string * ?Opacity:float * ?Labels:seq<#IConvertible> * ?TextPosition:TextPosition * ?TextFont:Font * ?Marker:Marker -> GenericChart + 1 overload static member Contour : data:seq<#seq<'a1>> * ?X:seq<#IConvertible> * ?Y:seq<#IConvertible> * ?Name:string * ?Showlegend:bool * ?Opacity:float * ?Colorscale:Colorscale * ?Showscale:'a4 * ?zSmooth:SmoothAlg * ?Colorbar:'a5 -> GenericChart (requires 'a1 :> IConvertible) static member DensityMapbox : lon:seq<#IConvertible> * lat:seq<#IConvertible> * ?Z:seq<#IConvertible> * ?Radius:float * ?Opacity:float * ?Text:seq<#IConvertible> * ?Below:string * ?Colorscale:Colorscale * ?Colorbar:Colorbar * ?Showscale:bool * ?ZAuto:bool * ?ZMin:float * ?ZMid:float * ?ZMax:float -> GenericChart + 1 overload ...
<summary> Provides a set of static methods for creating charts. </summary>
static member Chart.BoxPlot : xy:seq<'a0 * 'a1> * ?Name:string * ?Showlegend:bool * ?Color:string * ?Fillcolor:'a2 * ?Opacity:float * ?Whiskerwidth:'a3 * ?Boxpoints:StyleParam.Boxpoints * ?Boxmean:StyleParam.BoxMean * ?Jitter:'a4 * ?Pointpos:'a5 * ?Orientation:StyleParam.Orientation * ?Marker:Marker * ?Line:Line * ?Alignmentgroup:'a6 * ?Offsetgroup:'a7 * ?Notched:bool * ?NotchWidth:float * ?QuartileMethod:StyleParam.QuartileMethod -> GenericChart.GenericChart
static member Chart.BoxPlot : ?x:'a * ?y:'b * ?Name:string * ?Showlegend:bool * ?Color:string * ?Fillcolor:'c * ?Opacity:float * ?Whiskerwidth:'d * ?Boxpoints:StyleParam.Boxpoints * ?Boxmean:StyleParam.BoxMean * ?Jitter:'e * ?Pointpos:'f * ?Orientation:StyleParam.Orientation * ?Marker:Marker * ?Line:Line * ?Alignmentgroup:'g * ?Offsetgroup:'h * ?Notched:bool * ?NotchWidth:float * ?QuartileMethod:StyleParam.QuartileMethod -> GenericChart.GenericChart
argument y: seq<float> option
<summary> Displays the distribution of data based on the five number summary: minimum, first quartile, median, third quartile, and maximum. </summary>
module StyleParam from Plotly.NET
type Boxpoints = | Outliers | All | Suspectedoutliers | False static member convert : (Boxpoints -> obj)
union case StyleParam.Boxpoints.All: StyleParam.Boxpoints
static member Chart.withY_AxisStyle : title:string * ?MinMax:(float * float) * ?Showgrid:bool * ?Showline:bool * ?Side:StyleParam.Side * ?Overlaying:StyleParam.AxisAnchorId * ?Id:int * ?Domain:(float * float) * ?Position:float * ?Zeroline:bool * ?Anchor:StyleParam.AxisAnchorId -> (GenericChart.GenericChart -> GenericChart.GenericChart)
module GenericChart from Plotly.NET
<summary> Module to represent a GenericChart </summary>
val toChartHTML : gChart:GenericChart.GenericChart -> string
<summary> Converts a GenericChart to it HTML representation. The div layer has a default size of 600 if not specified otherwise. </summary>
val vectorDataHousefly : Vector<float>
Multiple items
val vector : l:seq<float> -> Vector<float>

--------------------
type vector = Vector<float>
val expectedValue : float
val oneSampleResult : TestStatistics.TTestStatistics
module TTest from FSharp.Stats.Testing
val oneSample : sample1:Vector<float> -> mu:float -> TestStatistics.TTestStatistics