{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"anime_id 14055.982035\n",
"episodes 13.939156\n",
"rating 6.507956\n",
"members 18924.950769\n",
"dtype: float64"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"base_url = 'https://raw.githubusercontent.com/practical-jupyter/sample-data/master/anime/'\n",
"anime_master_csv = os.path.join(base_url, 'anime_master.csv')\n",
"df = pd.read_csv(anime_master_csv)\n",
"\n",
"df.mean()#平均値を算出"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"190668879"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['members'].sum()#total"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"12"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['members'].min()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1013917"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['members'].max()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" anime_id | \n",
" episodes | \n",
" rating | \n",
" members | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 10075.0 | \n",
" 10075.0 | \n",
" 10075.0 | \n",
" 10075.0 | \n",
"
\n",
" \n",
" mean | \n",
" 14056.0 | \n",
" 13.9 | \n",
" 6.5 | \n",
" 18925.0 | \n",
"
\n",
" \n",
" std | \n",
" 11294.9 | \n",
" 50.8 | \n",
" 1.1 | \n",
" 57117.5 | \n",
"
\n",
" \n",
" min | \n",
" 1.0 | \n",
" 1.0 | \n",
" 1.7 | \n",
" 12.0 | \n",
"
\n",
" \n",
" 25% | \n",
" 3431.0 | \n",
" 1.0 | \n",
" 5.9 | \n",
" 177.0 | \n",
"
\n",
" \n",
" 50% | \n",
" 10526.0 | \n",
" 1.0 | \n",
" 6.6 | \n",
" 1227.0 | \n",
"
\n",
" \n",
" 75% | \n",
" 24438.0 | \n",
" 13.0 | \n",
" 7.3 | \n",
" 10254.0 | \n",
"
\n",
" \n",
" max | \n",
" 34519.0 | \n",
" 1818.0 | \n",
" 10.0 | \n",
" 1013917.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" anime_id episodes rating members\n",
"count 10075.0 10075.0 10075.0 10075.0\n",
"mean 14056.0 13.9 6.5 18925.0\n",
"std 11294.9 50.8 1.1 57117.5\n",
"min 1.0 1.0 1.7 12.0\n",
"25% 3431.0 1.0 5.9 177.0\n",
"50% 10526.0 1.0 6.6 1227.0\n",
"75% 24438.0 13.0 7.3 10254.0\n",
"max 34519.0 1818.0 10.0 1013917.0"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.describe().round(1) #基本統計量"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" anime_id | \n",
" episodes | \n",
" rating | \n",
" members | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 10075.0 | \n",
" 10075.0 | \n",
" 10075.0 | \n",
" 10075.0 | \n",
"
\n",
" \n",
" mean | \n",
" 14056.0 | \n",
" 13.9 | \n",
" 6.5 | \n",
" 18925.0 | \n",
"
\n",
" \n",
" std | \n",
" 11294.9 | \n",
" 50.8 | \n",
" 1.1 | \n",
" 57117.5 | \n",
"
\n",
" \n",
" min | \n",
" 1.0 | \n",
" 1.0 | \n",
" 1.7 | \n",
" 12.0 | \n",
"
\n",
" \n",
" 10% | \n",
" 1259.4 | \n",
" 1.0 | \n",
" 5.1 | \n",
" 74.0 | \n",
"
\n",
" \n",
" 50% | \n",
" 10526.0 | \n",
" 1.0 | \n",
" 6.6 | \n",
" 1227.0 | \n",
"
\n",
" \n",
" 90% | \n",
" 31190.0 | \n",
" 37.0 | \n",
" 7.8 | \n",
" 47587.6 | \n",
"
\n",
" \n",
" max | \n",
" 34519.0 | \n",
" 1818.0 | \n",
" 10.0 | \n",
" 1013917.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" anime_id episodes rating members\n",
"count 10075.0 10075.0 10075.0 10075.0\n",
"mean 14056.0 13.9 6.5 18925.0\n",
"std 11294.9 50.8 1.1 57117.5\n",
"min 1.0 1.0 1.7 12.0\n",
"10% 1259.4 1.0 5.1 74.0\n",
"50% 10526.0 1.0 6.6 1227.0\n",
"90% 31190.0 37.0 7.8 47587.6\n",
"max 34519.0 1818.0 10.0 1013917.0"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.describe(percentiles=[0.1, 0.9]).round(1)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" genre | \n",
" type | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 10075 | \n",
" 10075 | \n",
"
\n",
" \n",
" unique | \n",
" 2735 | \n",
" 6 | \n",
"
\n",
" \n",
" top | \n",
" Comedy | \n",
" TV | \n",
"
\n",
" \n",
" freq | \n",
" 500 | \n",
" 3330 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" genre type\n",
"count 10075 10075\n",
"unique 2735 6\n",
"top Comedy TV\n",
"freq 500 3330"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[['genre', 'type']].describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}