{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Practice - read last line from external file \n", "Python Workbook, chp5 \n", "August 7, 2020 (past several days, internet lookup has better solutions)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 0. Check python version, active conda env " ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Python 3.8.2\r\n" ] } ], "source": [ "!python --version" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "# conda environments:\r\n", "#\r\n", "base /home/jyoon/conda3\r\n", "dlpy * /home/jyoon/conda3/envs/dlpy\r\n", "fluentpy /home/jyoon/conda3/envs/fluentpy\r\n", "\r\n" ] } ], "source": [ "!conda env list \n", "# * is next to active conda env. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1. Start practice" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Hello! Welcome to demofile.txt\n", "This file is for testing purposes.\n", "Good Luck!\n", "\n" ] } ], "source": [ "f = open('demofile.txt', 'rt')\n", "blob = f.read() # read all \n", "print(blob)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "False\n" ] } ], "source": [ "print(f.closed) # check if f is closed at this point." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Hello! Welcome to demofile.txt\n", "\n" ] } ], "source": [ "f = open('demofile.txt') # Open again to go back to file start point. \n", "line = f.readline() # read first line, no S\n", "print(line)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Good Luck!\n", "\n" ] } ], "source": [ "f = open('demofile.txt')\n", "lines = f.readlines() # read all lines, iterable list. With S\n", "print(lines[-1]) # print last line\n", "f.close()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2. Binary format reads 'rb' " ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "b'Hello! Welcome to demofile.txt\\n' b'uck!\\n'\n" ] } ], "source": [ "# Try binary format reading. \n", "with open('demofile.txt', 'rb') as f: # binary format\n", " first = f.readline()\n", " f.seek(-5, 2) # Go to pointer 5th bytes from end of file \n", " #last = f.read() # does readline work? May not read all of the last line. \n", " last = f.readline() # readline does work. Still not all of the line. \n", " print(first, last)\n" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "b'Hello! Welcome to demofile.txt\\n'\n", " b'Good Luck!\\n'\n" ] } ], "source": [ "# Seek from end of file needs a loop. \n", "with open('demofile.txt', 'rb') as f: # binary format\n", " first = f.readline()\n", " \n", " f.seek(-2, 2) # Go to pointer 2 bytes relative to end \n", " while f.read(1) != b\"\\n\": # while read one byte is not equal to binary '\\n'\n", " f.seek(-2, 1) # go back 1 byte. \n", " # after while loop, this is beginning of last line \n", " last = f.read() # read all bytes from here. \n", " # last = f.readline() gives same output. \n", " \n", " print(str(first)+\"\\n\", str(last))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3. Good for short files, readlines() all at once, save, index [ ] into each line" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Hello! Welcome to demofile.txt\n", " Good Luck!\n", "\n", "True\n" ] } ], "source": [ "# Good for short text files. \n", "with open('demofile.txt', 'rt') as f: \n", " lines = f.readlines()\n", " first = lines[0]\n", " last = lines[-1]\n", " print(first, last)\n", "\n", "print(f.closed) # check file is cloased after \"with\" block end. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 4. seek(0) to go back to start, works with 'rt' " ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "lines: ['Hello! Welcome to demofile.txt\\n', 'This file is for testing purposes.\\n', 'Good Luck!\\n']\n", "blob: Hello! Welcome to demofile.txt\n", "This file is for testing purposes.\n", "Good Luck!\n", "\n", "one: ['Hello! Welcome to demofile.txt\\n']\n", "two: This file is for testing purposes.\n", "\n" ] } ], "source": [ "f2 = open('demofile.txt') \n", "lines = f2.readlines()\n", "print(\"lines: \", lines)\n", "\n", "f2.seek(0) # Goto beginning of file\n", "blob= f2.read()\n", "print(\"blob: \", blob)\n", "\n", "f2.seek(0)\n", "one = f2.readlines(1)\n", "print(\"one: \", one)\n", "\n", "f2.seek(0)\n", "two = f2.readlines()[1] # Works! prints 2nd item. nifty. \n", "print(\"two: \", two)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 5. Only binary read 'rb' can use relative indexing " ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "b'Hello! Welcome to demofile.txt\\n'\n", "b'Good Luck!\\n'\n", "77\n", "66\n", "b'Good Luck!\\n'\n" ] } ], "source": [ "# seek can be used with text format? \n", "# Yes, but can't do relative location other than 0, current point. \n", "# If the file is opened in text mode (without ‘b’), only offsets returned by tell() are legal. \n", "# none of the relative indexing from back/current location works without 'b' binary format. \n", "\n", "with open('demofile.txt', 'rb') as f: # need to be in binary format\n", " first = f.readline()\n", " print(first)\n", " \n", " f.seek(0, 0) \n", " last = f.readlines()[2]\n", " print(last)\n", " \n", " #f.seek(-2, 2) # not work, only 0 offset works with 2, 1 mode. \n", " f.seek(0, 2) # end of file work with text format file. \n", " print(f.tell())\n", " f.seek(-11, 1)\n", " print(f.tell())\n", " last = f.read()\n", " print(last) \n" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "77\n", "0\n", "Hello! Welcome to demofile.txt\n", " 31\n", "Good Luck!\n", " 77\n", "['This file is for testing purposes.\\n', 'Good Luck!\\n'] 77\n", "This file is for testing purposes.\n", " 66\n", "Good Luck!\n", "\n" ] } ], "source": [ "with open('demofile.txt') as f: # text format again\n", " f.seek(0, 2) # end of file \n", " loc = f.tell(); print(loc)\n", " \n", " f.seek(0, 0) # go back to beginning of tile \n", " print(f.tell())\n", " first = f.readline()\n", " print(first, f.tell())\n", " \n", " last = f.readlines()[-1]\n", " print(last, f.tell())\n", " \n", " f.seek(31, 0)\n", " two = f.readlines()\n", " print(two, f.tell())\n", " \n", " f.seek(31, 0)\n", " second = f.readline()\n", " print(second, f.tell())\n", " \n", " third = f.readline()\n", " print(third)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 6. Doen't solve large file last line problem, but another way to move around 'rt' format." ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "77\n", "11\n", "66\n", "Good Luck!\n", "\n" ] } ], "source": [ "with open('demofile.txt') as f: # text format again\n", " \n", " # num_ln = len(f.readlines()); print(num_ln)\n", " \n", " num_ch_all = len(f.read())\n", " print(num_ch_all)\n", " \n", " f.seek(0) # back to beginning\n", " num_ch_last = len(f.readlines()[-1]); \n", " print(num_ch_last)\n", " \n", " offset = num_ch_all - num_ch_last # offset from file beginning\n", " f.seek(offset, 0)\n", " print(f.tell())\n", " print(f.read())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3.8.2 64-bit ('dlpy': conda)", "language": "python", "name": "python38264bitdlpycondaee83557e2f484524a67e008c475209c4" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.2" } }, "nbformat": 4, "nbformat_minor": 2 }