{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ " Τμήμα Πληροφορικής και Τηλεπικοινωνιών - Άρτα \n", " Πανεπιστήμιο Ιωαννίνων \n", "\n", " Γκόγκος Χρήστος \n", " http://chgogos.github.io/\n", " \n", " Τελευταία ενημέρωση: 2/4/2022" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Excel\n", "\n", "Επεξεργασία δεδομένων που βρίσκονται σε αρχεία Excel. \n", "\n", "Η ανάγνωση και εγγραφή αρχείων Excel γίνεται με τη χρήση επιλέον modules που πρέπει να εγκατασταθούν. Δείτε το [www.python-excel.org](https://www.python-excel.org/).\n", "\n", "Στα ακόλουθα παραδείγματα θα χρησιμοποιηθεί το openpyxl και το pandas.\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Φόρτωση αρχείων excel με το openpyxl\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['cal_housing']\n", "****************************************\n", "longitude latitude housingMedianAge totalRooms totalBedrooms population households medianIncome medianHouseValue \n", "-122.23 37.88 41 880 129 322 126 8.3252 452600 \n", "-122.22 37.86 21 7099 1106 2401 1138 8.3014 358500 \n", "-122.24 37.85 52 1467 190 496 177 7.2574 352100 \n", "-122.25 37.85 52 1274 235 558 219 5.6431 341300 \n", "****************************************\n", "longitude -122.23 -122.22 -122.24 -122.25 \n", "latitude 37.88 37.86 37.85 37.85 \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\chgogos\\AppData\\Local\\Temp\\ipykernel_39616\\1282899620.py:5: DeprecationWarning: Call to deprecated function get_sheet_names (Use wb.sheetnames).\n", " all_sheets = book.get_sheet_names()\n" ] } ], "source": [ "import openpyxl\n", "\n", "path = \"../../../datasets/cal_housing.xlsx\"\n", "book = openpyxl.load_workbook(path)\n", "all_sheets = book.get_sheet_names()\n", "print(all_sheets)\n", "sheet = book.active\n", "\n", "print(\"*\" * 40)\n", "\n", "# διάσχιση κατά γραμμές (για τις 5 πρώτες γραμμές του φύλλου)\n", "for row in sheet.iter_rows(min_row=1, max_row=5):\n", " for cell in row:\n", " print(cell.value, end=\" \")\n", " print()\n", "\n", "print(\"*\" * 40)\n", "\n", "# διάσχιση κατά στήλες (στις 2 πρώτες στήλες, για τις 5 πρώτες γραμμές του φύλλου)\n", "for row in sheet.iter_cols(min_row=1, max_row=5, min_col=1, max_col=2):\n", " for cell in row:\n", " print(cell.value, end=\" \")\n", " print()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Φόρτωση αρχείων excel με το pandas\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['california_housing']\n", "\n", "RangeIndex: 16383 entries, 0 to 16382\n", "Data columns (total 9 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 longitude 16383 non-null float64\n", " 1 latitude 16383 non-null float64\n", " 2 housingMedianAge 16383 non-null int64 \n", " 3 totalRooms 16383 non-null int64 \n", " 4 totalBedrooms 16383 non-null int64 \n", " 5 population 16383 non-null int64 \n", " 6 households 16383 non-null int64 \n", " 7 medianIncome 16383 non-null float64\n", " 8 medianHouseValue 16383 non-null int64 \n", "dtypes: float64(3), int64(6)\n", "memory usage: 1.1 MB\n", "None\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
longitudelatitudehousingMedianAgetotalRoomstotalBedroomspopulationhouseholdsmedianIncomemedianHouseValue
0-122.2337.88418801293221268.3252452600
1-122.2237.862170991106240111388.3014358500
2-122.2437.855214671904961777.2574352100
3-122.2537.855212742355582195.6431341300
4-122.2537.855216272805652593.8462342200
..............................
16378-121.2938.0212200642618493962.543799000
16379-121.3038.0216271762133436432.5473106300
16380-121.3038.0311286665414045252.505095000
16381-121.3038.02415153844913482.852387500
16382-121.2938.004139232217843092.3750124500
\n", "

16383 rows × 9 columns

\n", "
" ], "text/plain": [ " longitude latitude housingMedianAge totalRooms totalBedrooms \\\n", "0 -122.23 37.88 41 880 129 \n", "1 -122.22 37.86 21 7099 1106 \n", "2 -122.24 37.85 52 1467 190 \n", "3 -122.25 37.85 52 1274 235 \n", "4 -122.25 37.85 52 1627 280 \n", "... ... ... ... ... ... \n", "16378 -121.29 38.02 12 2006 426 \n", "16379 -121.30 38.02 16 2717 621 \n", "16380 -121.30 38.03 11 2866 654 \n", "16381 -121.30 38.02 4 1515 384 \n", "16382 -121.29 38.00 4 1392 322 \n", "\n", " population households medianIncome medianHouseValue \n", "0 322 126 8.3252 452600 \n", "1 2401 1138 8.3014 358500 \n", "2 496 177 7.2574 352100 \n", "3 558 219 5.6431 341300 \n", "4 565 259 3.8462 342200 \n", "... ... ... ... ... \n", "16378 1849 396 2.5437 99000 \n", "16379 3343 643 2.5473 106300 \n", "16380 1404 525 2.5050 95000 \n", "16381 491 348 2.8523 87500 \n", "16382 1784 309 2.3750 124500 \n", "\n", "[16383 rows x 9 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# χρειάζεται να εγκατασταθεί το module xlrd\n", "\n", "import pandas as pd\n", "\n", "path = \"../../../datasets/cal_housing.xls\"\n", "xl = pd.ExcelFile(path)\n", "print(xl.sheet_names)\n", "\n", "df = xl.parse('california_housing')\n", "print(df.info())\n", "df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3.8.0 64-bit", "metadata": { "interpreter": { "hash": "2334294e789e06453e89f7d5c3c2573679701f9bb5b76ad2158f91cca5c9244c" } }, "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.13" }, "orig_nbformat": 2 }, "nbformat": 4, "nbformat_minor": 2 }