{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install matplotlib\n",
"%matplotlib inline\n",
"!pip install pandas"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Création d'un jeu de donnée simple (dataframe)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Colonne 1 | \n",
" Colonne 2 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Colonne 1 Colonne 2\n",
"0 1 2"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"pd.DataFrame({'Colonne 1': [1], 'Colonne 2': [2]})"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Modification des libellés"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Colonne 1 | \n",
" Colonne 2 | \n",
"
\n",
" \n",
" \n",
" \n",
" Ligne 1 | \n",
" 35 | \n",
" 1 | \n",
"
\n",
" \n",
" Ligne 2 | \n",
" 41 | \n",
" 2 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Colonne 1 Colonne 2\n",
"Ligne 1 35 1\n",
"Ligne 2 41 2"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame({'Colonne 1': [35, 41], 'Colonne 2': [1, 2]}, index=['Ligne 1', 'Ligne 2'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Création d'une série"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"Index1 Valeur1\n",
"Index2 Valeur2\n",
"Index3 Valeur3\n",
"Index4 Valeur4\n",
"Name: Ma série, dtype: object"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.Series([\"Valeur1\", \"Valeur2\", \"Valeur3\", \"Valeur4\"], index=[\"Index1\", \"Index2\", \"Index3\", \"Index4\"], name='Ma série')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Lecture d'un fichier CSV"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Première lignes"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" classe | \n",
" survie | \n",
" nom | \n",
" sexe | \n",
" age | \n",
" tarif | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" Allen, Miss. Elisabeth Walton | \n",
" 2 | \n",
" 29.0 | \n",
" 211.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" Allison, Master. Hudson Trevor | \n",
" 1 | \n",
" 1.0 | \n",
" 152.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 1 | \n",
" 0 | \n",
" Allison, Miss. Helen Loraine | \n",
" 2 | \n",
" 2.0 | \n",
" 152.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 1 | \n",
" 0 | \n",
" Allison, Mr. Hudson Joshua Creighton | \n",
" 1 | \n",
" 30.0 | \n",
" 152.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 1 | \n",
" 0 | \n",
" Allison, Mrs. Hudson Bessie Waldo Daniels | \n",
" 2 | \n",
" 25.0 | \n",
" 152.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" classe survie nom sexe age \\\n",
"0 1 1 Allen, Miss. Elisabeth Walton 2 29.0 \n",
"1 1 1 Allison, Master. Hudson Trevor 1 1.0 \n",
"2 1 0 Allison, Miss. Helen Loraine 2 2.0 \n",
"3 1 0 Allison, Mr. Hudson Joshua Creighton 1 30.0 \n",
"4 1 0 Allison, Mrs. Hudson Bessie Waldo Daniels 2 25.0 \n",
"\n",
" tarif \n",
"0 211.0 \n",
"1 152.0 \n",
"2 152.0 \n",
"3 152.0 \n",
"4 152.0 "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"csv = pd.read_csv('titanic.csv',sep=';')\n",
"csv.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## métadonnées"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" classe | \n",
" survie | \n",
" sexe | \n",
" age | \n",
" tarif | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 1309.000000 | \n",
" 1309.000000 | \n",
" 1309.000000 | \n",
" 1046.000000 | \n",
" 1308.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 2.294882 | \n",
" 0.381971 | \n",
" 1.355997 | \n",
" 29.897706 | \n",
" 33.363150 | \n",
"
\n",
" \n",
" std | \n",
" 0.837836 | \n",
" 0.486055 | \n",
" 0.478997 | \n",
" 14.414973 | \n",
" 51.751529 | \n",
"
\n",
" \n",
" min | \n",
" 1.000000 | \n",
" 0.000000 | \n",
" 1.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 2.000000 | \n",
" 0.000000 | \n",
" 1.000000 | \n",
" 21.000000 | \n",
" 8.000000 | \n",
"
\n",
" \n",
" 50% | \n",
" 3.000000 | \n",
" 0.000000 | \n",
" 1.000000 | \n",
" 28.000000 | \n",
" 14.000000 | \n",
"
\n",
" \n",
" 75% | \n",
" 3.000000 | \n",
" 1.000000 | \n",
" 2.000000 | \n",
" 39.000000 | \n",
" 31.000000 | \n",
"
\n",
" \n",
" max | \n",
" 3.000000 | \n",
" 1.000000 | \n",
" 2.000000 | \n",
" 80.000000 | \n",
" 512.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" classe survie sexe age tarif\n",
"count 1309.000000 1309.000000 1309.000000 1046.000000 1308.000000\n",
"mean 2.294882 0.381971 1.355997 29.897706 33.363150\n",
"std 0.837836 0.486055 0.478997 14.414973 51.751529\n",
"min 1.000000 0.000000 1.000000 0.000000 0.000000\n",
"25% 2.000000 0.000000 1.000000 21.000000 8.000000\n",
"50% 3.000000 0.000000 1.000000 28.000000 14.000000\n",
"75% 3.000000 1.000000 2.000000 39.000000 31.000000\n",
"max 3.000000 1.000000 2.000000 80.000000 512.000000"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"csv.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Dernières lignes"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" classe | \n",
" survie | \n",
" nom | \n",
" sexe | \n",
" age | \n",
" tarif | \n",
"
\n",
" \n",
" \n",
" \n",
" 1304 | \n",
" 3 | \n",
" 0 | \n",
" Zabour, Miss. Hileni | \n",
" 2 | \n",
" 15.0 | \n",
" 14.0 | \n",
"
\n",
" \n",
" 1305 | \n",
" 3 | \n",
" 0 | \n",
" Zabour, Miss. Thamine | \n",
" 2 | \n",
" NaN | \n",
" 14.0 | \n",
"
\n",
" \n",
" 1306 | \n",
" 3 | \n",
" 0 | \n",
" Zakarian, Mr. Mapriededer | \n",
" 1 | \n",
" 27.0 | \n",
" 7.0 | \n",
"
\n",
" \n",
" 1307 | \n",
" 3 | \n",
" 0 | \n",
" Zakarian, Mr. Ortin | \n",
" 1 | \n",
" 27.0 | \n",
" 7.0 | \n",
"
\n",
" \n",
" 1308 | \n",
" 3 | \n",
" 0 | \n",
" Zimmerman, Mr. Leo | \n",
" 1 | \n",
" 29.0 | \n",
" 8.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" classe survie nom sexe age tarif\n",
"1304 3 0 Zabour, Miss. Hileni 2 15.0 14.0\n",
"1305 3 0 Zabour, Miss. Thamine 2 NaN 14.0\n",
"1306 3 0 Zakarian, Mr. Mapriededer 1 27.0 7.0\n",
"1307 3 0 Zakarian, Mr. Ortin 1 27.0 7.0\n",
"1308 3 0 Zimmerman, Mr. Leo 1 29.0 8.0"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"csv.tail()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## dimensions du dataframe"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"(1309, 6)"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"csv.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Affichage du fichier csv"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" classe | \n",
" survie | \n",
" nom | \n",
" sexe | \n",
" age | \n",
" tarif | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" Allen, Miss. Elisabeth Walton | \n",
" 2 | \n",
" 29.0 | \n",
" 211.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" Allison, Master. Hudson Trevor | \n",
" 1 | \n",
" 1.0 | \n",
" 152.0 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 1307 | \n",
" 3 | \n",
" 0 | \n",
" Zakarian, Mr. Ortin | \n",
" 1 | \n",
" 27.0 | \n",
" 7.0 | \n",
"
\n",
" \n",
" 1308 | \n",
" 3 | \n",
" 0 | \n",
" Zimmerman, Mr. Leo | \n",
" 1 | \n",
" 29.0 | \n",
" 8.0 | \n",
"
\n",
" \n",
"
\n",
"
1309 rows × 6 columns
\n",
"
"
],
"text/plain": [
" classe survie nom sexe age tarif\n",
"0 1 1 Allen, Miss. Elisabeth Walton 2 29.0 211.0\n",
"1 1 1 Allison, Master. Hudson Trevor 1 1.0 152.0\n",
"... ... ... ... ... ... ...\n",
"1307 3 0 Zakarian, Mr. Ortin 1 27.0 7.0\n",
"1308 3 0 Zimmerman, Mr. Leo 1 29.0 8.0\n",
"\n",
"[1309 rows x 6 columns]"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.set_option(\"display.max_rows\", 5)\n",
"csv"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Accès aux données (index)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Accès à un vecteur (colonne)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"0 Allen, Miss. Elisabeth Walton\n",
"1 Allison, Master. Hudson Trevor\n",
" ... \n",
"1307 Zakarian, Mr. Ortin\n",
"1308 Zimmerman, Mr. Leo\n",
"Name: nom, Length: 1309, dtype: object"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"csv.nom"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 Allen, Miss. Elisabeth Walton\n",
"1 Allison, Master. Hudson Trevor\n",
" ... \n",
"1307 Zakarian, Mr. Ortin\n",
"1308 Zimmerman, Mr. Leo\n",
"Name: nom, Length: 1309, dtype: object"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"csv[\"nom\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Accès à une cellule"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"'Allen, Miss. Elisabeth Walton'"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"csv.nom[0]"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Allen, Miss. Elisabeth Walton'"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"csv[\"nom\"][0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Récupération des 4 premières colonnes & 3 premières lignes"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" classe | \n",
" survie | \n",
" nom | \n",
" sexe | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" Allen, Miss. Elisabeth Walton | \n",
" 2 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" Allison, Master. Hudson Trevor | \n",
" 1 | \n",
"
\n",
" \n",
" 2 | \n",
" 1 | \n",
" 0 | \n",
" Allison, Miss. Helen Loraine | \n",
" 2 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" classe survie nom sexe\n",
"0 1 1 Allen, Miss. Elisabeth Walton 2\n",
"1 1 1 Allison, Master. Hudson Trevor 1\n",
"2 1 0 Allison, Miss. Helen Loraine 2"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"csv.iloc[:3, :4]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Filtrage sur colonnes (via labels)\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" nom | \n",
" sexe | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Allen, Miss. Elisabeth Walton | \n",
" 2 | \n",
"
\n",
" \n",
" 1 | \n",
" Allison, Master. Hudson Trevor | \n",
" 1 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 1307 | \n",
" Zakarian, Mr. Ortin | \n",
" 1 | \n",
"
\n",
" \n",
" 1308 | \n",
" Zimmerman, Mr. Leo | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
1309 rows × 2 columns
\n",
"
"
],
"text/plain": [
" nom sexe\n",
"0 Allen, Miss. Elisabeth Walton 2\n",
"1 Allison, Master. Hudson Trevor 1\n",
"... ... ...\n",
"1307 Zakarian, Mr. Ortin 1\n",
"1308 Zimmerman, Mr. Leo 1\n",
"\n",
"[1309 rows x 2 columns]"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"csv.loc[:, ('nom', 'sexe')]\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}