{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "using CSV, DataFramesMeta, Statistics\n", "\n", "# --- Plotting Functions - Makie ----\n", "# using CairoMakie;\n", "# set_theme!(theme_ggplot2())\n", "\n", "# --- Plotting Functions - GadFly ----\n", "using Gadfly\n", "Gadfly.push_theme(:dark)\n", "set_default_plot_size(15cm, 15cm)" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# Tidy Tuesday Example in Julia\n", "\n", "Today we'll demonstrate a quick exploration and model that you might perform with TidyTuesday data for practice in Julia. " ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## Getting Data\n", "\n", "### Option 1: Download the CSV from the `tidytuesday` repo\n", "\n", "Users of Julia 1.6+ can use `Base.download` to get the data from the subfolders in [the official `tidytuesday` repo](https://github.com/rfordatascience/tidytuesday) and then `CSV.read` to read it into memory as a `DataFrame`." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
28×7 DataFrame
3 rows omitted
Rowvariablemeanminmedianmaxnmissingeltype
SymbolUnion…AnyUnion…AnyInt64Type
1year2017.6820152018.020190Int64
2unitid1.8436e5100654181738.08000010Int64
3institution_nameASA CollegeYuba College0String
4city_txtAberdeenYuma45Union{Missing, String31}
5state_cdAKWY45Union{Missing, String3}
6zip_text4.20049e760449104.099775750045Union{Missing, Int64}
7classification_code7.7793316.0200Int64
8classification_nameCCCAAUSCAA0String
9classification_otherACCAuscaa130642Union{Missing, String}
10ef_male_count2126.250986.0359540Int64
11ef_female_count2496.2101248.0303250Int64
12ef_total_count4622.4602259.0662790Int64
13sector_cd2.210312.0990Int64
17partic_women20.7083116.032763442Union{Missing, Int64}
18partic_coed_men11.046918.0130131560Union{Missing, Int64}
19partic_coed_women14.1669110.091131560Union{Missing, Int64}
20sum_partic_men14.492400.03310Int64
21sum_partic_women10.862206.03270Int64
22rev_men809011.065158126.015614720870462Union{Missing, Int64}
23rev_women2.79346e50138318.02144036563444Union{Missing, Int64}
24total_rev_menwomen7.95231e51302.28776e515614720845193Union{Missing, Int64}
25exp_men6.62386e565159666.06971805970462Union{Missing, Int64}
26exp_women3.31594e565141800.0948516263442Union{Missing, Int64}
27total_exp_menwomen7.32422e5130234559.06971805945191Union{Missing, Int64}
28sportsAll Track CombinedWrestling0String31
" ], "text/latex": [ "\\begin{tabular}{r|cccccc}\n", "\t& variable & mean & min & median & max & \\\\\n", "\t\\hline\n", "\t& Symbol & Union… & Any & Union… & Any & \\\\\n", "\t\\hline\n", "\t1 & year & 2017.68 & 2015 & 2018.0 & 2019 & $\\dots$ \\\\\n", "\t2 & unitid & 1.8436e5 & 100654 & 181738.0 & 800001 & $\\dots$ \\\\\n", "\t3 & institution\\_name & & ASA College & & Yuba College & $\\dots$ \\\\\n", "\t4 & city\\_txt & & Aberdeen & & Yuma & $\\dots$ \\\\\n", "\t5 & state\\_cd & & AK & & WY & $\\dots$ \\\\\n", "\t6 & zip\\_text & 4.20049e7 & 604 & 49104.0 & 997757500 & $\\dots$ \\\\\n", "\t7 & classification\\_code & 7.77933 & 1 & 6.0 & 20 & $\\dots$ \\\\\n", "\t8 & classification\\_name & & CCCAA & & USCAA & $\\dots$ \\\\\n", "\t9 & classification\\_other & & ACCA & & uscaa & $\\dots$ \\\\\n", "\t10 & ef\\_male\\_count & 2126.25 & 0 & 986.0 & 35954 & $\\dots$ \\\\\n", "\t11 & ef\\_female\\_count & 2496.21 & 0 & 1248.0 & 30325 & $\\dots$ \\\\\n", "\t12 & ef\\_total\\_count & 4622.46 & 0 & 2259.0 & 66279 & $\\dots$ \\\\\n", "\t13 & sector\\_cd & 2.2103 & 1 & 2.0 & 99 & $\\dots$ \\\\\n", "\t14 & sector\\_name & & Private for-profit, 2-year & & Public, 4-year or above & $\\dots$ \\\\\n", "\t15 & sportscode & 16.3427 & 1 & 16.0 & 38 & $\\dots$ \\\\\n", "\t16 & partic\\_men & 30.8617 & 1 & 22.0 & 331 & $\\dots$ \\\\\n", "\t17 & partic\\_women & 20.7083 & 1 & 16.0 & 327 & $\\dots$ \\\\\n", "\t18 & partic\\_coed\\_men & 11.0469 & 1 & 8.0 & 130 & $\\dots$ \\\\\n", "\t19 & partic\\_coed\\_women & 14.1669 & 1 & 10.0 & 91 & $\\dots$ \\\\\n", "\t20 & sum\\_partic\\_men & 14.4924 & 0 & 0.0 & 331 & $\\dots$ \\\\\n", "\t21 & sum\\_partic\\_women & 10.8622 & 0 & 6.0 & 327 & $\\dots$ \\\\\n", "\t22 & rev\\_men & 809011.0 & 65 & 158126.0 & 156147208 & $\\dots$ \\\\\n", "\t23 & rev\\_women & 2.79346e5 & 0 & 138318.0 & 21440365 & $\\dots$ \\\\\n", "\t24 & total\\_rev\\_menwomen & 7.95231e5 & 130 & 2.28776e5 & 156147208 & $\\dots$ \\\\\n", "\t25 & exp\\_men & 6.62386e5 & 65 & 159666.0 & 69718059 & $\\dots$ \\\\\n", "\t26 & exp\\_women & 3.31594e5 & 65 & 141800.0 & 9485162 & $\\dots$ \\\\\n", "\t27 & total\\_exp\\_menwomen & 7.32422e5 & 130 & 234559.0 & 69718059 & $\\dots$ \\\\\n", "\t28 & sports & & All Track Combined & & Wrestling & $\\dots$ \\\\\n", "\\end{tabular}\n" ], "text/plain": [ "\u001b[1m28×7 DataFrame\u001b[0m\n", "\u001b[1m Row \u001b[0m│\u001b[1m variable \u001b[0m\u001b[1m mean \u001b[0m\u001b[1m min \u001b[0m\u001b[1m median \u001b[0m\u001b[1m\u001b[0m ⋯\n", " │\u001b[90m Symbol \u001b[0m\u001b[90m Union… \u001b[0m\u001b[90m Any \u001b[0m\u001b[90m Union… \u001b[0m\u001b[90m\u001b[0m ⋯\n", "─────┼──────────────────────────────────────────────────────────────────────────\n", " 1 │ year 2017.68 2015 2018.0 ⋯\n", " 2 │ unitid 1.8436e5 100654 181738.0\n", " 3 │ institution_name \u001b[90m \u001b[0m ASA College \u001b[90m \u001b[0m\n", " 4 │ city_txt \u001b[90m \u001b[0m Aberdeen \u001b[90m \u001b[0m\n", " 5 │ state_cd \u001b[90m \u001b[0m AK \u001b[90m \u001b[0m ⋯\n", " 6 │ zip_text 4.20049e7 604 49104.0\n", " 7 │ classification_code 7.77933 1 6.0\n", " 8 │ classification_name \u001b[90m \u001b[0m CCCAA \u001b[90m \u001b[0m\n", " 9 │ classification_other \u001b[90m \u001b[0m ACCA \u001b[90m \u001b[0m ⋯\n", " 10 │ ef_male_count 2126.25 0 986.0\n", " 11 │ ef_female_count 2496.21 0 1248.0\n", " ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋱\n", " 19 │ partic_coed_women 14.1669 1 10.0\n", " 20 │ sum_partic_men 14.4924 0 0.0 ⋯\n", " 21 │ sum_partic_women 10.8622 0 6.0\n", " 22 │ rev_men 809011.0 65 158126.0\n", " 23 │ rev_women 2.79346e5 0 138318.0\n", " 24 │ total_rev_menwomen 7.95231e5 130 2.28776e5 ⋯\n", " 25 │ exp_men 6.62386e5 65 159666.0\n", " 26 │ exp_women 3.31594e5 65 141800.0\n", " 27 │ total_exp_menwomen 7.32422e5 130 234559.0\n", " 28 │ sports \u001b[90m \u001b[0m All Track Combined \u001b[90m \u001b[0m ⋯\n", "\u001b[36m 3 columns and 7 rows omitted\u001b[0m" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "filepath = \"https://github.com/rfordatascience/tidytuesday/blob/master/data/2022/2022-03-29/sports.csv?raw=true\"\n", "# Declare `missingstring` to convert 'NA' and 'NAN' values to the `missing` type\n", "df = CSV.read(download(filepath), DataFrame; missingstring=[\"NA\", \"NAN\"])\n", "df |> describe" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
5×28 DataFrame
Rowyearunitidinstitution_namecity_txtstate_cdzip_textclassification_codeclassification_nameclassification_otheref_male_countef_female_countef_total_countsector_cdsector_namesportscodepartic_menpartic_womenpartic_coed_menpartic_coed_womensum_partic_mensum_partic_womenrev_menrev_womentotal_rev_menwomenexp_menexp_womentotal_exp_menwomensports
Int64Int64StringString31?String3?Int64?Int64StringString?Int64Int64Int64Int64String?Int64Int64?Int64?Int64?Int64?Int64Int64Int64?Int64?Int64?Int64?Int64?Int64?String31
12015100654Alabama A & M UniversityNormalAL357622NCAA Division I-FCSmissing1923230042231Public, 4-year or above131missingmissingmissing310345592missing345592397818missing397818Baseball
22015100654Alabama A & M UniversityNormalAL357622NCAA Division I-FCSmissing1923230042231Public, 4-year or above21916missingmissing1916121109574883319599288178687424601560328Basketball
32015100654Alabama A & M UniversityNormalAL357622NCAA Division I-FCSmissing1923230042231Public, 4-year or above36146missingmissing6146183333315574498907246949251184498133All Track Combined
42015100654Alabama A & M UniversityNormalAL357622NCAA Division I-FCSmissing1923230042231Public, 4-year or above799missingmissingmissing9902808949missing28089493059353missing3059353Football
52015100654Alabama A & M UniversityNormalAL357622NCAA Division I-FCSmissing1923230042231Public, 4-year or above89missingmissingmissing9078270missing7827083913missing83913Golf
" ], "text/latex": [ "\\begin{tabular}{r|cccccccc}\n", "\t& year & unitid & institution\\_name & city\\_txt & state\\_cd & zip\\_text & classification\\_code & \\\\\n", "\t\\hline\n", "\t& Int64 & Int64 & String & String31? & String3? & Int64? & Int64 & \\\\\n", "\t\\hline\n", "\t1 & 2015 & 100654 & Alabama A \\& M University & Normal & AL & 35762 & 2 & $\\dots$ \\\\\n", "\t2 & 2015 & 100654 & Alabama A \\& M University & Normal & AL & 35762 & 2 & $\\dots$ \\\\\n", "\t3 & 2015 & 100654 & Alabama A \\& M University & Normal & AL & 35762 & 2 & $\\dots$ \\\\\n", "\t4 & 2015 & 100654 & Alabama A \\& M University & Normal & AL & 35762 & 2 & $\\dots$ \\\\\n", "\t5 & 2015 & 100654 & Alabama A \\& M University & Normal & AL & 35762 & 2 & $\\dots$ \\\\\n", "\\end{tabular}\n" ], "text/plain": [ "\u001b[1m5×28 DataFrame\u001b[0m\n", "\u001b[1m Row \u001b[0m│\u001b[1m year \u001b[0m\u001b[1m unitid \u001b[0m\u001b[1m institution_name \u001b[0m\u001b[1m city_txt \u001b[0m\u001b[1m state_cd \u001b[0m\u001b[1m zip_text \u001b[0m\u001b[1m\u001b[0m ⋯\n", " │\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m String \u001b[0m\u001b[90m String31? \u001b[0m\u001b[90m String3? \u001b[0m\u001b[90m Int64? \u001b[0m\u001b[90m\u001b[0m ⋯\n", "─────┼──────────────────────────────────────────────────────────────────────────\n", " 1 │ 2015 100654 Alabama A & M University Normal AL 35762 ⋯\n", " 2 │ 2015 100654 Alabama A & M University Normal AL 35762\n", " 3 │ 2015 100654 Alabama A & M University Normal AL 35762\n", " 4 │ 2015 100654 Alabama A & M University Normal AL 35762\n", " 5 │ 2015 100654 Alabama A & M University Normal AL 35762 ⋯\n", "\u001b[36m 22 columns omitted\u001b[0m" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "first(df, 5)" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "### Option 2: Use `RCall` and the `tidytuesdayR` library\n", "\n", "We can use the `tidytuesdayR` R library and your local R installation with the `RCall` library to call R from Julia. \n", "\n", "Example: \n", "\n", "```julia\n", "YEAR=2022;\n", "WEEK=13;\n", "\n", "# TidytuesdayR\n", "tt_data = R\"tt_data <- tidytuesdayR::tt_load($YEAR, week=$WEEK)\";\n", "\n", "# R --> Julia\n", "df= rcopy(tt_data[\"sports\"])\n", "```" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## EDA\n", "\n", "For plotting there's lots of options. I initially used GadFly but switched to Makie recently. \n", "\n", "Here's an example with GadFly using the data from 2022 Week 13.\n", "\n", "This dataset comes from the 'Equity in Athletics Data Analysis', from Data is Plural\n", "So we'll want to make comparisons of sports, colleges, and genders. \n", "\n", "Let's look at the kinds of sports there are:" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": "\n\n\n \n \n \n\n\n \n \n \n\n\n \n \n \n sports\n \n \n \n \n \n \n Baseball\n \n \n \n \n Basketball\n \n \n \n \n All Track Combined\n \n \n \n \n Football\n \n \n \n \n Golf\n \n \n \n \n Soccer\n \n \n \n \n Softball\n \n \n \n \n Tennis\n \n \n \n \n Volleyball\n \n \n \n \n Bowling\n \n \n \n \n Rifle\n \n \n \n \n Beach Volleyball\n \n \n \n \n Ice Hockey\n \n \n \n \n Lacrosse\n \n \n \n \n Gymnastics\n \n \n \n \n Rowing\n \n \n \n \n Swimming and Diving\n \n \n \n \n Track and Field, X-Country\n \n \n \n \n Equestrian\n \n \n \n \n Track and Field, Indoor\n \n \n \n \n Track and Field, Outdoor\n \n \n \n \n Wrestling\n \n \n \n \n Other Sports\n \n \n \n \n Rodeo\n \n \n \n \n Skiing\n \n \n \n \n Swimming\n \n \n \n \n Water Polo\n \n \n \n \n Archery\n \n \n \n \n Field Hockey\n \n \n \n \n Fencing\n \n \n \n \n Sailing\n \n \n \n \n Badminton\n \n \n \n \n Squash\n \n \n \n \n Diving\n \n \n \n \n Synchronized Swimming\n \n \n \n \n Table Tennis\n \n \n \n \n Weight Lifting\n \n \n \n \n Team Handball\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n 0\n \n \n \n \n 5.00×10³\n \n \n \n \n 1.00×10⁴\n \n \n \n \n \n \n Types of Sports\n \n \n \n\n\n \n \n \n\n\n", "text/html": [ "\n", "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " sports\n", " \n", " \n", " \n", " \n", " \n", " \n", " Baseball\n", " \n", " \n", " \n", " \n", " Basketball\n", " \n", " \n", " \n", " \n", " All Track Combined\n", " \n", " \n", " \n", " \n", " Football\n", " \n", " \n", " \n", " \n", " Golf\n", " \n", " \n", " \n", " \n", " Soccer\n", " \n", " \n", " \n", " \n", " Softball\n", " \n", " \n", " \n", " \n", " Tennis\n", " \n", " \n", " \n", " \n", " Volleyball\n", " \n", " \n", " \n", " \n", " Bowling\n", " \n", " \n", " \n", " \n", " Rifle\n", " \n", " \n", " \n", " \n", " Beach Volleyball\n", " \n", " \n", " \n", " \n", " Ice Hockey\n", " \n", " \n", " \n", " \n", " Lacrosse\n", " \n", " \n", " \n", " \n", " Gymnastics\n", " \n", " \n", " \n", " \n", " Rowing\n", " \n", " \n", " \n", " \n", " Swimming and Diving\n", " \n", " \n", " \n", " \n", " Track and Field, X-Country\n", " \n", " \n", " \n", " \n", " Equestrian\n", " \n", " \n", " \n", " \n", " Track and Field, Indoor\n", " \n", " \n", " \n", " \n", " Track and Field, Outdoor\n", " \n", " \n", " \n", " \n", " Wrestling\n", " \n", " \n", " \n", " \n", " Other Sports\n", " \n", " \n", " \n", " \n", " Rodeo\n", " \n", " \n", " \n", " \n", " Skiing\n", " \n", " \n", " \n", " \n", " Swimming\n", " \n", " \n", " \n", " \n", " Water Polo\n", " \n", " \n", " \n", " \n", " Archery\n", " \n", " \n", " \n", " \n", " Field Hockey\n", " \n", " \n", " \n", " \n", " Fencing\n", " \n", " \n", " \n", " \n", " Sailing\n", " \n", " \n", " \n", " \n", " Badminton\n", " \n", " \n", " \n", " \n", " Squash\n", " \n", " \n", " \n", " \n", " Diving\n", " \n", " \n", " \n", " \n", " Synchronized Swimming\n", " \n", " \n", " \n", " \n", " Table Tennis\n", " \n", " \n", " \n", " \n", " Weight Lifting\n", " \n", " \n", " \n", " \n", " Team Handball\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " h,j,k,l,arrows,drag to pan\n", " \n", " \n", " \n", " \n", " i,o,+,-,scroll,shift-drag to zoom\n", " \n", " \n", " \n", " \n", " r,dbl-click to reset\n", " \n", " \n", " \n", " \n", " c for coordinates\n", " \n", " \n", " \n", " \n", " ? for help\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " ?\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " -1.50×10⁴\n", " \n", " \n", " \n", " \n", " -1.00×10⁴\n", " \n", " \n", " \n", " \n", " -5.00×10³\n", " \n", " \n", " \n", " \n", " 0\n", " \n", " \n", " \n", " \n", " 5.00×10³\n", " \n", " \n", " \n", " \n", " 1.00×10⁴\n", " \n", " \n", " \n", " \n", " 1.50×10⁴\n", " \n", " \n", " \n", " \n", " 2.00×10⁴\n", " \n", " \n", " \n", " \n", " 2.50×10⁴\n", " \n", " \n", " \n", " \n", " -1.00×10⁴\n", " \n", " \n", " \n", " \n", " -9.00×10³\n", " \n", " \n", " \n", " \n", " -8.00×10³\n", " \n", " \n", " \n", " \n", " -7.00×10³\n", " \n", " \n", " \n", " \n", " -6.00×10³\n", " \n", " \n", " \n", " \n", " -5.00×10³\n", " \n", " \n", " \n", " \n", " -4.00×10³\n", " \n", " \n", " \n", " \n", " -3.00×10³\n", " \n", " \n", " \n", " \n", " -2.00×10³\n", " \n", " \n", " \n", " \n", " -1.00×10³\n", " \n", " \n", " \n", " \n", " 0\n", " \n", " \n", " \n", " \n", " 1.00×10³\n", " \n", " \n", " \n", " \n", " 2.00×10³\n", " \n", " \n", " \n", " \n", " 3.00×10³\n", " \n", " \n", " \n", " \n", " 4.00×10³\n", " \n", " \n", " \n", " \n", " 5.00×10³\n", " \n", " \n", " \n", " \n", " 6.00×10³\n", " \n", " \n", " \n", " \n", " 7.00×10³\n", " \n", " \n", " \n", " \n", " 8.00×10³\n", " \n", " \n", " \n", " \n", " 9.00×10³\n", " \n", " \n", " \n", " \n", " 1.00×10⁴\n", " \n", " \n", " \n", " \n", " 1.10×10⁴\n", " \n", " \n", " \n", " \n", " 1.20×10⁴\n", " \n", " \n", " \n", " \n", " 1.30×10⁴\n", " \n", " \n", " \n", " \n", " 1.40×10⁴\n", " \n", " \n", " \n", " \n", " 1.50×10⁴\n", " \n", " \n", " \n", " \n", " 1.60×10⁴\n", " \n", " \n", " \n", " \n", " 1.70×10⁴\n", " \n", " \n", " \n", " \n", " 1.80×10⁴\n", " \n", " \n", " \n", " \n", " 1.90×10⁴\n", " \n", " \n", " \n", " \n", " 2.00×10⁴\n", " \n", " \n", " \n", " \n", " -1.0×10⁴\n", " \n", " \n", " \n", " \n", " 0\n", " \n", " \n", " \n", " \n", " 1.0×10⁴\n", " \n", " \n", " \n", " \n", " 2.0×10⁴\n", " \n", " \n", " \n", " \n", " -1.000×10⁴\n", " \n", " \n", " \n", " \n", " -9.500×10³\n", " \n", " \n", " \n", " \n", " -9.000×10³\n", " \n", " \n", " \n", " \n", " -8.500×10³\n", " \n", " \n", " \n", " \n", " -8.000×10³\n", " \n", " \n", " \n", " \n", " -7.500×10³\n", " \n", " \n", " \n", " \n", " -7.000×10³\n", " \n", " \n", " \n", " \n", " -6.500×10³\n", " \n", " \n", " \n", " \n", " -6.000×10³\n", " \n", " \n", " \n", " \n", " -5.500×10³\n", " \n", " \n", " \n", " \n", " -5.000×10³\n", " \n", " \n", " \n", " \n", " -4.500×10³\n", " \n", " \n", " \n", " \n", " -4.000×10³\n", " \n", " \n", " \n", " \n", " -3.500×10³\n", " \n", " \n", " \n", " \n", " -3.000×10³\n", " \n", " \n", " \n", " \n", " -2.500×10³\n", " \n", " \n", " \n", " \n", " -2.000×10³\n", " \n", " \n", " \n", " \n", " -1.500×10³\n", " \n", " \n", " \n", " \n", " -1.000×10³\n", " \n", " \n", " \n", " \n", " -5.000×10²\n", " \n", " \n", " \n", " \n", " 0\n", " \n", " \n", " \n", " \n", " 5.000×10²\n", " \n", " \n", " \n", " \n", " 1.000×10³\n", " \n", " \n", " \n", " \n", " 1.500×10³\n", " \n", " \n", " \n", " \n", " 2.000×10³\n", " \n", " \n", " \n", " \n", " 2.500×10³\n", " \n", " \n", " \n", " \n", " 3.000×10³\n", " \n", " \n", " \n", " \n", " 3.500×10³\n", " \n", " \n", " \n", " \n", " 4.000×10³\n", " \n", " \n", " \n", " \n", " 4.500×10³\n", " \n", " \n", " \n", " \n", " 5.000×10³\n", " \n", " \n", " \n", " \n", " 5.500×10³\n", " \n", " \n", " \n", " \n", " 6.000×10³\n", " \n", " \n", " \n", " \n", " 6.500×10³\n", " \n", " \n", " \n", " \n", " 7.000×10³\n", " \n", " \n", " \n", " \n", " 7.500×10³\n", " \n", " \n", " \n", " \n", " 8.000×10³\n", " \n", " \n", " \n", " \n", " 8.500×10³\n", " \n", " \n", " \n", " \n", " 9.000×10³\n", " \n", " \n", " \n", " \n", " 9.500×10³\n", " \n", " \n", " \n", " \n", " 1.000×10⁴\n", " \n", " \n", " \n", " \n", " 1.050×10⁴\n", " \n", " \n", " \n", " \n", " 1.100×10⁴\n", " \n", " \n", " \n", " \n", " 1.150×10⁴\n", " \n", " \n", " \n", " \n", " 1.200×10⁴\n", " \n", " \n", " \n", " \n", " 1.250×10⁴\n", " \n", " \n", " \n", " \n", " 1.300×10⁴\n", " \n", " \n", " \n", " \n", " 1.350×10⁴\n", " \n", " \n", " \n", " \n", " 1.400×10⁴\n", " \n", " \n", " \n", " \n", " 1.450×10⁴\n", " \n", " \n", " \n", " \n", " 1.500×10⁴\n", " \n", " \n", " \n", " \n", " 1.550×10⁴\n", " \n", " \n", " \n", " \n", " 1.600×10⁴\n", " \n", " \n", " \n", " \n", " 1.650×10⁴\n", " \n", " \n", " \n", " \n", " 1.700×10⁴\n", " \n", " \n", " \n", " \n", " 1.750×10⁴\n", " \n", " \n", " \n", " \n", " 1.800×10⁴\n", " \n", " \n", " \n", " \n", " 1.850×10⁴\n", " \n", " \n", " \n", " \n", " 1.900×10⁴\n", " \n", " \n", " \n", " \n", " 1.950×10⁴\n", " \n", " \n", " \n", " \n", " 2.000×10⁴\n", " \n", " \n", " \n", " \n", " \n", " \n", " Types of Sports\n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "Plot(...)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "@chain df begin\n", " plot(x=:sports, Geom.histogram,\n", " Guide.title(\"Types of Sports\"))\n", "end\n" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "What about gender differences in different sports, by total participation?" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": "\n\n\n \n \n \n\n\n \n \n \n\n\n \n \n \n partic_difference\n \n \n \n \n \n \n -3.0×10⁴\n \n \n \n \n -2.0×10⁴\n \n \n \n \n -1.0×10⁴\n \n \n \n \n 0\n \n \n \n \n 1.0×10⁴\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n Equestrian\n \n \n \n \n Volleyball\n \n \n \n \n Rowing\n \n \n \n \n Swimming and Diving\n \n \n \n \n Other Sports\n \n \n \n \n Sailing\n \n \n \n \n Swimming\n \n \n \n \n Beach Volleyball\n \n \n \n \n Archery\n \n \n \n \n Diving\n \n \n \n \n Table Tennis\n \n \n \n \n Weight Lifting\n \n \n \n \n Gymnastics\n \n \n \n \n Rifle\n \n \n \n \n Skiing\n \n \n \n \n Squash\n \n \n \n \n Fencing\n \n \n \n \n Bowling\n \n \n \n \n Water Polo\n \n \n \n \n Rodeo\n \n \n \n \n Wrestling\n \n \n \n \n Track and Field, X-Country\n \n \n \n \n Ice Hockey\n \n \n \n \n Tennis\n \n \n \n \n Track and Field, Indoor\n \n \n \n \n All Track Combined\n \n \n \n \n Track and Field, Outdoor\n \n \n \n \n Golf\n \n \n \n \n Basketball\n \n \n \n \n Lacrosse\n \n \n \n \n Soccer\n \n \n \n \n \n \n sports\n \n \n \n \n \n \n Gender Participation Differences by Sport(Left: Skews Male, Right: Skews Female)\n \n \n \n\n\n \n \n \n\n\n", "text/html": [ "\n", "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " partic_difference\n", " \n", " \n", " \n", " \n", " \n", " \n", " -8.0×10⁴\n", " \n", " \n", " \n", " \n", " -7.0×10⁴\n", " \n", " \n", " \n", " \n", " -6.0×10⁴\n", " \n", " \n", " \n", " \n", " -5.0×10⁴\n", " \n", " \n", " \n", " \n", " -4.0×10⁴\n", " \n", " \n", " \n", " \n", " -3.0×10⁴\n", " \n", " \n", " \n", " \n", " -2.0×10⁴\n", " \n", " \n", " \n", " \n", " -1.0×10⁴\n", " \n", " \n", " \n", " \n", " 0\n", " \n", " \n", " \n", " \n", " 1.0×10⁴\n", " \n", " \n", " \n", " \n", " 2.0×10⁴\n", " \n", " \n", " \n", " \n", " 3.0×10⁴\n", " \n", " \n", " \n", " \n", " 4.0×10⁴\n", " \n", " \n", " \n", " \n", " 5.0×10⁴\n", " \n", " \n", " \n", " \n", " 6.0×10⁴\n", " \n", " \n", " \n", " \n", " -7.00×10⁴\n", " \n", " \n", " \n", " \n", " -6.50×10⁴\n", " \n", " \n", " \n", " \n", " -6.00×10⁴\n", " \n", " \n", " \n", " \n", " -5.50×10⁴\n", " \n", " \n", " \n", " \n", " -5.00×10⁴\n", " \n", " \n", " \n", " \n", " -4.50×10⁴\n", " \n", " \n", " \n", " \n", " -4.00×10⁴\n", " \n", " \n", " \n", " \n", " -3.50×10⁴\n", " \n", " \n", " \n", " \n", " -3.00×10⁴\n", " \n", " \n", " \n", " \n", " -2.50×10⁴\n", " \n", " \n", " \n", " \n", " -2.00×10⁴\n", " \n", " \n", " \n", " \n", " -1.50×10⁴\n", " \n", " \n", " \n", " \n", " -1.00×10⁴\n", " \n", " \n", " \n", " \n", " -5.00×10³\n", " \n", " \n", " \n", " \n", " 0\n", " \n", " \n", " \n", " \n", " 5.00×10³\n", " \n", " \n", " \n", " \n", " 1.00×10⁴\n", " \n", " \n", " \n", " \n", " 1.50×10⁴\n", " \n", " \n", " \n", " \n", " 2.00×10⁴\n", " \n", " \n", " \n", " \n", " 2.50×10⁴\n", " \n", " \n", " \n", " \n", " 3.00×10⁴\n", " \n", " \n", " \n", " \n", " 3.50×10⁴\n", " \n", " \n", " \n", " \n", " 4.00×10⁴\n", " \n", " \n", " \n", " \n", " 4.50×10⁴\n", " \n", " \n", " \n", " \n", " 5.00×10⁴\n", " \n", " \n", " \n", " \n", " -1.0×10⁵\n", " \n", " \n", " \n", " \n", " -5.0×10⁴\n", " \n", " \n", " \n", " \n", " 0\n", " \n", " \n", " \n", " \n", " 5.0×10⁴\n", " \n", " \n", " \n", " \n", " -7.00×10⁴\n", " \n", " \n", " \n", " \n", " -6.80×10⁴\n", " \n", " \n", " \n", " \n", " -6.60×10⁴\n", " \n", " \n", " \n", " \n", " -6.40×10⁴\n", " \n", " \n", " \n", " \n", " -6.20×10⁴\n", " \n", " \n", " \n", " \n", " -6.00×10⁴\n", " \n", " \n", " \n", " \n", " -5.80×10⁴\n", " \n", " \n", " \n", " \n", " -5.60×10⁴\n", " \n", " \n", " \n", " \n", " -5.40×10⁴\n", " \n", " \n", " \n", " \n", " -5.20×10⁴\n", " \n", " \n", " \n", " \n", " -5.00×10⁴\n", " \n", " \n", " \n", " \n", " -4.80×10⁴\n", " \n", " \n", " \n", " \n", " -4.60×10⁴\n", " \n", " \n", " \n", " \n", " -4.40×10⁴\n", " \n", " \n", " \n", " \n", " -4.20×10⁴\n", " \n", " \n", " \n", " \n", " -4.00×10⁴\n", " \n", " \n", " \n", " \n", " -3.80×10⁴\n", " \n", " \n", " \n", " \n", " -3.60×10⁴\n", " \n", " \n", " \n", " \n", " -3.40×10⁴\n", " \n", " \n", " \n", " \n", " -3.20×10⁴\n", " \n", " \n", " \n", " \n", " -3.00×10⁴\n", " \n", " \n", " \n", " \n", " -2.80×10⁴\n", " \n", " \n", " \n", " \n", " -2.60×10⁴\n", " \n", " \n", " \n", " \n", " -2.40×10⁴\n", " \n", " \n", " \n", " \n", " -2.20×10⁴\n", " \n", " \n", " \n", " \n", " -2.00×10⁴\n", " \n", " \n", " \n", " \n", " -1.80×10⁴\n", " \n", " \n", " \n", " \n", " -1.60×10⁴\n", " \n", " \n", " \n", " \n", " -1.40×10⁴\n", " \n", " \n", " \n", " \n", " -1.20×10⁴\n", " \n", " \n", " \n", " \n", " -1.00×10⁴\n", " \n", " \n", " \n", " \n", " -8.00×10³\n", " \n", " \n", " \n", " \n", " -6.00×10³\n", " \n", " \n", " \n", " \n", " -4.00×10³\n", " \n", " \n", " \n", " \n", " -2.00×10³\n", " \n", " \n", " \n", " \n", " 0\n", " \n", " \n", " \n", " \n", " 2.00×10³\n", " \n", " \n", " \n", " \n", " 4.00×10³\n", " \n", " \n", " \n", " \n", " 6.00×10³\n", " \n", " \n", " \n", " \n", " 8.00×10³\n", " \n", " \n", " \n", " \n", " 1.00×10⁴\n", " \n", " \n", " \n", " \n", " 1.20×10⁴\n", " \n", " \n", " \n", " \n", " 1.40×10⁴\n", " \n", " \n", " \n", " \n", " 1.60×10⁴\n", " \n", " \n", " \n", " \n", " 1.80×10⁴\n", " \n", " \n", " \n", " \n", " 2.00×10⁴\n", " \n", " \n", " \n", " \n", " 2.20×10⁴\n", " \n", " \n", " \n", " \n", " 2.40×10⁴\n", " \n", " \n", " \n", " \n", " 2.60×10⁴\n", " \n", " \n", " \n", " \n", " 2.80×10⁴\n", " \n", " \n", " \n", " \n", " 3.00×10⁴\n", " \n", " \n", " \n", " \n", " 3.20×10⁴\n", " \n", " \n", " \n", " \n", " 3.40×10⁴\n", " \n", " \n", " \n", " \n", " 3.60×10⁴\n", " \n", " \n", " \n", " \n", " 3.80×10⁴\n", " \n", " \n", " \n", " \n", " 4.00×10⁴\n", " \n", " \n", " \n", " \n", " 4.20×10⁴\n", " \n", " \n", " \n", " \n", " 4.40×10⁴\n", " \n", " \n", " \n", " \n", " 4.60×10⁴\n", " \n", " \n", " \n", " \n", " 4.80×10⁴\n", " \n", " \n", " \n", " \n", " 5.00×10⁴\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " h,j,k,l,arrows,drag to pan\n", " \n", " \n", " \n", " \n", " i,o,+,-,scroll,shift-drag to zoom\n", " \n", " \n", " \n", " \n", " r,dbl-click to reset\n", " \n", " \n", " \n", " \n", " c for coordinates\n", " \n", " \n", " \n", " \n", " ? for help\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " ?\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " Equestrian\n", " \n", " \n", " \n", " \n", " Volleyball\n", " \n", " \n", " \n", " \n", " Rowing\n", " \n", " \n", " \n", " \n", " Swimming and Diving\n", " \n", " \n", " \n", " \n", " Other Sports\n", " \n", " \n", " \n", " \n", " Sailing\n", " \n", " \n", " \n", " \n", " Swimming\n", " \n", " \n", " \n", " \n", " Beach Volleyball\n", " \n", " \n", " \n", " \n", " Archery\n", " \n", " \n", " \n", " \n", " Diving\n", " \n", " \n", " \n", " \n", " Table Tennis\n", " \n", " \n", " \n", " \n", " Weight Lifting\n", " \n", " \n", " \n", " \n", " Gymnastics\n", " \n", " \n", " \n", " \n", " Rifle\n", " \n", " \n", " \n", " \n", " Skiing\n", " \n", " \n", " \n", " \n", " Squash\n", " \n", " \n", " \n", " \n", " Fencing\n", " \n", " \n", " \n", " \n", " Bowling\n", " \n", " \n", " \n", " \n", " Water Polo\n", " \n", " \n", " \n", " \n", " Rodeo\n", " \n", " \n", " \n", " \n", " Wrestling\n", " \n", " \n", " \n", " \n", " Track and Field, X-Country\n", " \n", " \n", " \n", " \n", " Ice Hockey\n", " \n", " \n", " \n", " \n", " Tennis\n", " \n", " \n", " \n", " \n", " Track and Field, Indoor\n", " \n", " \n", " \n", " \n", " All Track Combined\n", " \n", " \n", " \n", " \n", " Track and Field, Outdoor\n", " \n", " \n", " \n", " \n", " Golf\n", " \n", " \n", " \n", " \n", " Basketball\n", " \n", " \n", " \n", " \n", " Lacrosse\n", " \n", " \n", " \n", " \n", " Soccer\n", " \n", " \n", " \n", " \n", " \n", " \n", " sports\n", " \n", " \n", " \n", " \n", " \n", " \n", " Gender Participation Differences by Sport(Left: Skews Male, Right: Skews Female)\n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "Plot(...)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "@chain df begin\n", " @rsubset(:sum_partic_women > 0, :sum_partic_men > 0)\n", " @by(:sports, \n", " :partic_difference=(sum(:sum_partic_women) - sum(:sum_partic_men)))\n", " @orderby(-:partic_difference)\n", " plot(x=:partic_difference, y=:sports, \n", " Geom.bar(orientation=:horizontal), \n", " Guide.title(\"Gender Participation Differences by Sport\\n(Left: Skews Male, Right: Skews Female)\"))\n", "end" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "What about revenues?" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": "\n\n\n \n \n \n\n\n \n \n \n\n\n \n \n \n difference\n \n \n \n \n \n \n -6.00×10⁹\n \n \n \n \n -4.00×10⁹\n \n \n \n \n -2.00×10⁹\n \n \n \n \n 0\n \n \n \n \n 2.00×10⁹\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n Tennis\n \n \n \n \n All Track Combined\n \n \n \n \n Lacrosse\n \n \n \n \n Ice Hockey\n \n \n \n \n Basketball\n \n \n \n \n \n \n sports\n \n \n \n \n \n \n Which sports have the highest absolute revenue imbalance?\n \n \n \n\n\n \n \n \n\n\n", "text/html": [ "\n", "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " difference\n", " \n", " \n", " \n", " \n", " \n", " \n", " -1.60×10¹⁰\n", " \n", " \n", " \n", " \n", " -1.40×10¹⁰\n", " \n", " \n", " \n", " \n", " -1.20×10¹⁰\n", " \n", " \n", " \n", " \n", " -1.00×10¹⁰\n", " \n", " \n", " \n", " \n", " -8.00×10⁹\n", " \n", " \n", " \n", " \n", " -6.00×10⁹\n", " \n", " \n", " \n", " \n", " -4.00×10⁹\n", " \n", " \n", " \n", " \n", " -2.00×10⁹\n", " \n", " \n", " \n", " \n", " 0\n", " \n", " \n", " \n", " \n", " 2.00×10⁹\n", " \n", " \n", " \n", " \n", " 4.00×10⁹\n", " \n", " \n", " \n", " \n", " 6.00×10⁹\n", " \n", " \n", " \n", " \n", " 8.00×10⁹\n", " \n", " \n", " \n", " \n", " 1.00×10¹⁰\n", " \n", " \n", " \n", " \n", " 1.20×10¹⁰\n", " \n", " \n", " \n", " \n", " -1.400×10¹⁰\n", " \n", " \n", " \n", " \n", " -1.350×10¹⁰\n", " \n", " \n", " \n", " \n", " -1.300×10¹⁰\n", " \n", " \n", " \n", " \n", " -1.250×10¹⁰\n", " \n", " \n", " \n", " \n", " -1.200×10¹⁰\n", " \n", " \n", " \n", " \n", " -1.150×10¹⁰\n", " \n", " \n", " \n", " \n", " -1.100×10¹⁰\n", " \n", " \n", " \n", " \n", " -1.050×10¹⁰\n", " \n", " \n", " \n", " \n", " -1.000×10¹⁰\n", " \n", " \n", " \n", " \n", " -9.500×10⁹\n", " \n", " \n", " \n", " \n", " -9.000×10⁹\n", " \n", " \n", " \n", " \n", " -8.500×10⁹\n", " \n", " \n", " \n", " \n", " -8.000×10⁹\n", " \n", " \n", " \n", " \n", " -7.500×10⁹\n", " \n", " \n", " \n", " \n", " -7.000×10⁹\n", " \n", " \n", " \n", " \n", " -6.500×10⁹\n", " \n", " \n", " \n", " \n", " -6.000×10⁹\n", " \n", " \n", " \n", " \n", " -5.500×10⁹\n", " \n", " \n", " \n", " \n", " -5.000×10⁹\n", " \n", " \n", " \n", " \n", " -4.500×10⁹\n", " \n", " \n", " \n", " \n", " -4.000×10⁹\n", " \n", " \n", " \n", " \n", " -3.500×10⁹\n", " \n", " \n", " \n", " \n", " -3.000×10⁹\n", " \n", " \n", " \n", " \n", " -2.500×10⁹\n", " \n", " \n", " \n", " \n", " -2.000×10⁹\n", " \n", " \n", " \n", " \n", " -1.500×10⁹\n", " \n", " \n", " \n", " \n", " -1.000×10⁹\n", " \n", " \n", " \n", " \n", " -5.000×10⁸\n", " \n", " \n", " \n", " \n", " 0\n", " \n", " \n", " \n", " \n", " 5.000×10⁸\n", " \n", " \n", " \n", " \n", " 1.000×10⁹\n", " \n", " \n", " \n", " \n", " 1.500×10⁹\n", " \n", " \n", " \n", " \n", " 2.000×10⁹\n", " \n", " \n", " \n", " \n", " 2.500×10⁹\n", " \n", " \n", " \n", " \n", " 3.000×10⁹\n", " \n", " \n", " \n", " \n", " 3.500×10⁹\n", " \n", " \n", " \n", " \n", " 4.000×10⁹\n", " \n", " \n", " \n", " \n", " 4.500×10⁹\n", " \n", " \n", " \n", " \n", " 5.000×10⁹\n", " \n", " \n", " \n", " \n", " 5.500×10⁹\n", " \n", " \n", " \n", " \n", " 6.000×10⁹\n", " \n", " \n", " \n", " \n", " 6.500×10⁹\n", " \n", " \n", " \n", " \n", " 7.000×10⁹\n", " \n", " \n", " \n", " \n", " 7.500×10⁹\n", " \n", " \n", " \n", " \n", " 8.000×10⁹\n", " \n", " \n", " \n", " \n", " 8.500×10⁹\n", " \n", " \n", " \n", " \n", " 9.000×10⁹\n", " \n", " \n", " \n", " \n", " 9.500×10⁹\n", " \n", " \n", " \n", " \n", " 1.000×10¹⁰\n", " \n", " \n", " \n", " \n", " -2.0×10¹⁰\n", " \n", " \n", " \n", " \n", " -1.0×10¹⁰\n", " \n", " \n", " \n", " \n", " 0\n", " \n", " \n", " \n", " \n", " 1.0×10¹⁰\n", " \n", " \n", " \n", " \n", " -1.400×10¹⁰\n", " \n", " \n", " \n", " \n", " -1.350×10¹⁰\n", " \n", " \n", " \n", " \n", " -1.300×10¹⁰\n", " \n", " \n", " \n", " \n", " -1.250×10¹⁰\n", " \n", " \n", " \n", " \n", " -1.200×10¹⁰\n", " \n", " \n", " \n", " \n", " -1.150×10¹⁰\n", " \n", " \n", " \n", " \n", " -1.100×10¹⁰\n", " \n", " \n", " \n", " \n", " -1.050×10¹⁰\n", " \n", " \n", " \n", " \n", " -1.000×10¹⁰\n", " \n", " \n", " \n", " \n", " -9.500×10⁹\n", " \n", " \n", " \n", " \n", " -9.000×10⁹\n", " \n", " \n", " \n", " \n", " -8.500×10⁹\n", " \n", " \n", " \n", " \n", " -8.000×10⁹\n", " \n", " \n", " \n", " \n", " -7.500×10⁹\n", " \n", " \n", " \n", " \n", " -7.000×10⁹\n", " \n", " \n", " \n", " \n", " -6.500×10⁹\n", " \n", " \n", " \n", " \n", " -6.000×10⁹\n", " \n", " \n", " \n", " \n", " -5.500×10⁹\n", " \n", " \n", " \n", " \n", " -5.000×10⁹\n", " \n", " \n", " \n", " \n", " -4.500×10⁹\n", " \n", " \n", " \n", " \n", " -4.000×10⁹\n", " \n", " \n", " \n", " \n", " -3.500×10⁹\n", " \n", " \n", " \n", " \n", " -3.000×10⁹\n", " \n", " \n", " \n", " \n", " -2.500×10⁹\n", " \n", " \n", " \n", " \n", " -2.000×10⁹\n", " \n", " \n", " \n", " \n", " -1.500×10⁹\n", " \n", " \n", " \n", " \n", " -1.000×10⁹\n", " \n", " \n", " \n", " \n", " -5.000×10⁸\n", " \n", " \n", " \n", " \n", " 0\n", " \n", " \n", " \n", " \n", " 5.000×10⁸\n", " \n", " \n", " \n", " \n", " 1.000×10⁹\n", " \n", " \n", " \n", " \n", " 1.500×10⁹\n", " \n", " \n", " \n", " \n", " 2.000×10⁹\n", " \n", " \n", " \n", " \n", " 2.500×10⁹\n", " \n", " \n", " \n", " \n", " 3.000×10⁹\n", " \n", " \n", " \n", " \n", " 3.500×10⁹\n", " \n", " \n", " \n", " \n", " 4.000×10⁹\n", " \n", " \n", " \n", " \n", " 4.500×10⁹\n", " \n", " \n", " \n", " \n", " 5.000×10⁹\n", " \n", " \n", " \n", " \n", " 5.500×10⁹\n", " \n", " \n", " \n", " \n", " 6.000×10⁹\n", " \n", " \n", " \n", " \n", " 6.500×10⁹\n", " \n", " \n", " \n", " \n", " 7.000×10⁹\n", " \n", " \n", " \n", " \n", " 7.500×10⁹\n", " \n", " \n", " \n", " \n", " 8.000×10⁹\n", " \n", " \n", " \n", " \n", " 8.500×10⁹\n", " \n", " \n", " \n", " \n", " 9.000×10⁹\n", " \n", " \n", " \n", " \n", " 9.500×10⁹\n", " \n", " \n", " \n", " \n", " 1.000×10¹⁰\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " h,j,k,l,arrows,drag to pan\n", " \n", " \n", " \n", " \n", " i,o,+,-,scroll,shift-drag to zoom\n", " \n", " \n", " \n", " \n", " r,dbl-click to reset\n", " \n", " \n", " \n", " \n", " c for coordinates\n", " \n", " \n", " \n", " \n", " ? for help\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " ?\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " Tennis\n", " \n", " \n", " \n", " \n", " All Track Combined\n", " \n", " \n", " \n", " \n", " Lacrosse\n", " \n", " \n", " \n", " \n", " Ice Hockey\n", " \n", " \n", " \n", " \n", " Basketball\n", " \n", " \n", " \n", " \n", " \n", " \n", " sports\n", " \n", " \n", " \n", " \n", " \n", " \n", " Which sports have the highest absolute revenue imbalance?\n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "Plot(...)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "@chain df begin\n", " @rsubset(:sum_partic_women > 0, :sum_partic_men > 0)\n", " @by([:sports],\n", " :difference=(sum(skipmissing(:rev_women)) - sum(skipmissing(:rev_men))),\n", " :abs_difference=abs(sum(skipmissing(:rev_women)) - sum(skipmissing(:rev_men))))\n", " @orderby(:abs_difference)\n", " @rsubset(abs(:abs_difference)>1e6)\n", " last(5)\n", " plot(x=:difference, y=:sports,\n", " Geom.bar(orientation=:horizontal),\n", " Guide.title(\"Which sports have the highest absolute revenue imbalance?\"))\n", "end" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "### Option: Use `DuckDB`" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "using DuckDB\n", "\n", "# create a new in-memory dabase\n", "con = DBInterface.connect(DuckDB.DB)\n", "\n", "# register our dataframe `df` as a view in the database\n", "DuckDB.register_data_frame(con, df, \"my_df\")" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "Now we can run queries against this dataframe with SQL:" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
5×2 DataFrame
Rowsportsabs_difference
String?Int128?
1Soccer28342
2Lacrosse25637
3Basketball18583
4Golf11932
5Track and Field, Outdoor9489
" ], "text/latex": [ "\\begin{tabular}{r|cc}\n", "\t& sports & abs\\_difference\\\\\n", "\t\\hline\n", "\t& String? & Int128?\\\\\n", "\t\\hline\n", "\t1 & Soccer & 28342 \\\\\n", "\t2 & Lacrosse & 25637 \\\\\n", "\t3 & Basketball & 18583 \\\\\n", "\t4 & Golf & 11932 \\\\\n", "\t5 & Track and Field, Outdoor & 9489 \\\\\n", "\\end{tabular}\n" ], "text/plain": [ "\u001b[1m5×2 DataFrame\u001b[0m\n", "\u001b[1m Row \u001b[0m│\u001b[1m sports \u001b[0m\u001b[1m abs_difference \u001b[0m\n", " │\u001b[90m String? \u001b[0m\u001b[90m Int128? \u001b[0m\n", "─────┼──────────────────────────────────────────\n", " 1 │ Soccer 28342\n", " 2 │ Lacrosse 25637\n", " 3 │ Basketball 18583\n", " 4 │ Golf 11932\n", " 5 │ Track and Field, Outdoor 9489" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "queryStr = \"\"\"\n", "SELECT \n", "sports\n", ", abs(sum(sum_partic_men) - sum(sum_partic_women)) as abs_difference\n", "FROM my_df\n", "WHERE\n", "sum_partic_men > 0\n", "and sum_partic_women > 0\n", "GROUP BY\n", " sports\n", "ORDER BY\n", " abs_difference DESC\n", "\"\"\"\n", "\n", "# run a SQL query over the DataFrame and save it as a dataframe\n", "results = DBInterface.execute(con, queryStr) |> DataFrame\n", "first(results, 5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Julia 1.8.3", "language": "julia", "name": "julia-1.8" }, "language_info": { "file_extension": ".jl", "mimetype": "application/julia", "name": "julia", "version": "1.8.3" } }, "nbformat": 4, "nbformat_minor": 4 }