
The Concussions in Male and Female College Athletes dataset

Counts of Concussions among collegiate athletes in 5 sports for 3 years by gender.
Taken from Lawrence H. Winner, University of Florida:
- Data
- Description

Original literature: T. Covassin, C.B. Swanik, M.L. Sachs (2003). "Sex Differences and the Incidence of Concussions Among Collegiate Athletes", Journal of Athletic Training, Vol. (38)3, pp238-244

How to use

#r "nuget: FSharp.Data"
#r "nuget: Deedle"

open FSharp.Data
open Deedle
open System.Text.RegularExpressions

let rawData = Http.RequestString @""

// This data format features a char column-wise structure. To transform it into a seperator-delimited format, we have to replace the multiple spaces via Regex:
let regex = Regex("[ ]{2,}")
let rawDataAdapted = regex.Replace(rawData, "\t")

let df = Frame.ReadCsvString(rawDataAdapted, hasHeaders = false, separators = "\t", schema = "Gender, Sports, Year, Concussion, Count")

// Otherwise, the following already adapted dataset can be used:
let rawData2 = Http.RequestString @""

let df2 = Frame.ReadCsvString(rawData2, hasHeaders = false, separators = "\t", schema = "Gender, Sports, Year, Concussion, Count")

Gender Sports            Year Concussion Count 
0  -> Female Soccer            1997 False      24930 
1  -> Female Soccer            1997 True       51    
2  -> Female Soccer            1998 False      22887 
3  -> Female Soccer            1998 True       47    
4  -> Female Soccer            1999 False      27107 
5  -> Female Soccer            1999 True       60    
6  -> Female Lacrosse          1997 False      8750  
7  -> Female Lacrosse          1997 True       12    
8  -> Female Lacrosse          1998 False      7115  
9  -> Female Lacrosse          1998 True       7     
10 -> Female Lacrosse          1999 False      8524  
11 -> Female Lacrosse          1999 True       7     
12 -> Female Basketball        1997 False      29397 
13 -> Female Basketball        1997 True       16    
14 -> Female Basketball        1998 False      38144 
:     ...    ...               ...  ...        ...   
45 -> Male   Basketball        1998 True       21    
46 -> Male   Basketball        1999 False      32816 
47 -> Male   Basketball        1999 True       20    
48 -> Male   Softball/Baseball 1997 False      51329 
49 -> Male   Softball/Baseball 1997 True       22    
50 -> Male   Softball/Baseball 1998 False      49201 
51 -> Male   Softball/Baseball 1998 True       6     
52 -> Male   Softball/Baseball 1999 False      80190 
53 -> Male   Softball/Baseball 1999 True       25    
54 -> Male   Gymnastics        1997 False      227   
55 -> Male   Gymnastics        1997 True       0     
56 -> Male   Gymnastics        1998 False      221   
57 -> Male   Gymnastics        1998 True       0     
58 -> Male   Gymnastics        1999 False      1179  
59 -> Male   Gymnastics        1999 True       0


This example is taken from the FsLab datascience tutorial t-test (WIP)

#r "nuget: FSharp.Stats, 0.4.2"
#r "nuget: Plotly.NET, 2.0.0-preview.6"

open FSharp.Stats
open FSharp.Stats.Testing
open Plotly.NET

// We need to filter out the columns and rows we don't need. Thus, we filter out the rows where the athletes suffered no concussions as well as filter out the columns without the number of concussions.
let dataAthletesFemale, dataAthletesMale =
    let getAthleteGenderData gender =
        let dataAthletesOnlyConcussion =
            |> Frame.filterRows (fun r objS -> objS.GetAs "Concussion")
        let dataAthletesGenderFrame =
            |> Frame.filterRows (fun r objS -> objS.GetAs "Gender" = gender)
        |> Frame.getCol "Count" 
        |> Series.values
        |> vector
    getAthleteGenderData "Female", getAthleteGenderData "Male"

let boxPlot = 
        Chart.BoxPlot(y = dataAthletesFemale, Name = "female college athletes", Boxpoints = StyleParam.Boxpoints.All, Jitter = 0.2)
        Chart.BoxPlot(y = dataAthletesMale, Name = "male college athletes", Boxpoints = StyleParam.Boxpoints.All, Jitter = 0.2)
    |> Chart.Combine
    |> Chart.withY_AxisStyle "number of concussions over 3 years"
open FSharp.Stats.Testing

// We test both samples against each other, assuming equal variances.
let twoSampleResult = TTest.twoSample true dataAthletesFemale dataAthletesMale
{ Statistic = 0.5616104016
  DegreesOfFreedom = 28.0
  PValueLeft = 0.7105752703
  PValueRight = 0.2894247297
  PValue = 0.5788494593 }
