// can't yet format YamlFrontmatter (["title: Concussions in Male and Female College Athletes"; "category: Datasets"; "categoryindex: 1"; "index: 2"], Some { StartLine = 2 StartColumn = 0 EndLine = 6 EndColumn = 8 }) to pynb markdown

[![Binder](/datasets/img/badge-binder.svg)](https://mybinder.org/v2/gh/plotly/Plotly.NET/gh-pages?filepath=02_ConcussionsInMaleAndFemaleCollegeAthletes.ipynb)&emsp;
[![Script](/datasets/img/badge-script.svg)](/datasets/02_ConcussionsInMaleAndFemaleCollegeAthletes.fsx)&emsp;
[![Notebook](/datasets/img/badge-notebook.svg)](/datasets/02_ConcussionsInMaleAndFemaleCollegeAthletes.ipynb)

# The _Concussions in Male and Female College Athletes_ dataset

**Table of contents**

- [Description]()
- [How to use]()
- [Examples]()

## Description

Counts of Concussions among collegiate athletes in 5 sports for 3 years by gender.  
Taken from [Lawrence H. Winner, University of Florida](http://archived.stat.ufl.edu/personnel/usrpages/winner.shtml):  
- [Data](http://users.stat.ufl.edu/~winner/data/concussion.dat)  
- [Description](http://users.stat.ufl.edu/~winner/data/concussion.txt)

Original literature: T. Covassin, C.B. Swanik, M.L. Sachs (2003). "Sex Differences and the Incidence of Concussions Among Collegiate Athletes", Journal of Athletic Training, Vol. (38)3, pp238-244


## How to use




In [1]:
#r "nuget: FSharp.Data"
#r "nuget: Deedle"

open FSharp.Data
open Deedle
open System.Text.RegularExpressions

let rawData = Http.RequestString @"https://raw.githubusercontent.com/fslaborg/datasets/main/data/ConcussionsInMaleAndFemaleCollegeAthletes.dat"

// This data format features a char column-wise structure. To transform it into a seperator-delimited format, we have to replace the multiple spaces via Regex:
let regex = Regex("[ ]{2,}")
let rawDataAdapted = regex.Replace(rawData, "\t")

let df = Frame.ReadCsvString(rawDataAdapted, hasHeaders = false, separators = "\t", schema = "Gender, Sports, Year, Concussion, Count")

// Otherwise, the following already adapted dataset can be used:
let rawData2 = Http.RequestString @"https://raw.githubusercontent.com/fslaborg/datasets/main/data/ConcussionsInMaleAndFemaleCollegeAthletes_adapted.tsv"

let df2 = Frame.ReadCsvString(rawData2, hasHeaders = false, separators = "\t", schema = "Gender, Sports, Year, Concussion, Count")

df2.Print()


Gender Sports            Year Concussion Count 0  -> Female Soccer            1997 False      24930 1  -> Female Soccer            1997 True       51    2  -> Female Soccer            1998 False      22887 3  -> Female Soccer            1998 True       47    4  -> Female Soccer            1999 False      27107 5  -> Female Soccer            1999 True       60    6  -> Female Lacrosse          1997 False      8750  7  -> Female Lacrosse          1997 True       12    8  -> Female Lacrosse          1998 False      7115  9  -> Female Lacrosse          1998 True       7     10 -> Female Lacrosse          1999 False      8524  11 -> Female Lacrosse          1999 True       7     12 -> Female Basketball        1997 False      29397 13 -> Female Basketball        1997 True       16    14 -> Female Basketball        1998 False      38144 :     ...    ...               ...  ...        ...   45 -> Male   Basketball        1998 True       21    46 -> Male   Basketball        1999 False      32816

## Examples

This example is taken from the FsLab datascience tutorial [t-test]()
(WIP)




In [2]:
#r "nuget: FSharp.Stats, 0.4.2"
#r "nuget: Plotly.NET, 2.0.0-preview.6"

open FSharp.Stats
open FSharp.Stats.Testing
open Plotly.NET

// We need to filter out the columns and rows we don't need. Thus, we filter out the rows where the athletes suffered no concussions as well as filter out the columns without the number of concussions.
let dataAthletesFemale, dataAthletesMale =
    let getAthleteGenderData gender =
        let dataAthletesOnlyConcussion =
            df2
            |> Frame.filterRows (fun r objS -> objS.GetAs "Concussion")
        let dataAthletesGenderFrame =
            dataAthletesOnlyConcussion
            |> Frame.filterRows (fun r objS -> objS.GetAs "Gender" = gender)
        dataAthletesGenderFrame
        |> Frame.getCol "Count" 
        |> Series.values
        |> vector
    getAthleteGenderData "Female", getAthleteGenderData "Male"

let boxPlot = 
    [
        Chart.BoxPlot(y = dataAthletesFemale, Name = "female college athletes", Boxpoints = StyleParam.Boxpoints.All, Jitter = 0.2)
        Chart.BoxPlot(y = dataAthletesMale, Name = "male college athletes", Boxpoints = StyleParam.Boxpoints.All, Jitter = 0.2)
    ]
    |> Chart.Combine
    |> Chart.withY_AxisStyle "number of concussions over 3 years"


In [None]:
boxPlot


In [4]:
open FSharp.Stats.Testing

// We test both samples against each other, assuming equal variances.
let twoSampleResult = TTest.twoSample true dataAthletesFemale dataAthletesMale


{ Statistic = 0.5616104016  DegreesOfFreedom = 28.0  PValueLeft = 0.7105752703  PValueRight = 0.2894247297  PValue = 0.5788494593 }