{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "anime_id 14055.982035\n", "episodes 13.939156\n", "rating 6.507956\n", "members 18924.950769\n", "dtype: float64" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "base_url = 'https://raw.githubusercontent.com/practical-jupyter/sample-data/master/anime/'\n", "anime_master_csv = os.path.join(base_url, 'anime_master.csv')\n", "df = pd.read_csv(anime_master_csv)\n", "\n", "df.mean()#平均値を算出" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "190668879" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['members'].sum()#total" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "12" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['members'].min()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1013917" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['members'].max()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
anime_idepisodesratingmembers
count10075.010075.010075.010075.0
mean14056.013.96.518925.0
std11294.950.81.157117.5
min1.01.01.712.0
25%3431.01.05.9177.0
50%10526.01.06.61227.0
75%24438.013.07.310254.0
max34519.01818.010.01013917.0
\n", "
" ], "text/plain": [ " anime_id episodes rating members\n", "count 10075.0 10075.0 10075.0 10075.0\n", "mean 14056.0 13.9 6.5 18925.0\n", "std 11294.9 50.8 1.1 57117.5\n", "min 1.0 1.0 1.7 12.0\n", "25% 3431.0 1.0 5.9 177.0\n", "50% 10526.0 1.0 6.6 1227.0\n", "75% 24438.0 13.0 7.3 10254.0\n", "max 34519.0 1818.0 10.0 1013917.0" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.describe().round(1) #基本統計量" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
anime_idepisodesratingmembers
count10075.010075.010075.010075.0
mean14056.013.96.518925.0
std11294.950.81.157117.5
min1.01.01.712.0
10%1259.41.05.174.0
50%10526.01.06.61227.0
90%31190.037.07.847587.6
max34519.01818.010.01013917.0
\n", "
" ], "text/plain": [ " anime_id episodes rating members\n", "count 10075.0 10075.0 10075.0 10075.0\n", "mean 14056.0 13.9 6.5 18925.0\n", "std 11294.9 50.8 1.1 57117.5\n", "min 1.0 1.0 1.7 12.0\n", "10% 1259.4 1.0 5.1 74.0\n", "50% 10526.0 1.0 6.6 1227.0\n", "90% 31190.0 37.0 7.8 47587.6\n", "max 34519.0 1818.0 10.0 1013917.0" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.describe(percentiles=[0.1, 0.9]).round(1)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
genretype
count1007510075
unique27356
topComedyTV
freq5003330
\n", "
" ], "text/plain": [ " genre type\n", "count 10075 10075\n", "unique 2735 6\n", "top Comedy TV\n", "freq 500 3330" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[['genre', 'type']].describe()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 2 }