{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Files\n", " - data is usually stored in secondary storage medium such as hard drive, flash drive, cd-rw, etc. using named locations called files\n", " - files can be organized into folders\n", " - use open() built-in function to work with files" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "help(open)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## write data to a file\n", "- open file with a name\n", "- write data\n", "- close file" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# old school\n", "fw = open('test.txt', 'a') # w is for write mode\n", "fw.write('words\\n=====\\n')\n", "fw.write('apple\\nball\\ncat\\ndog\\n')\n", "fw.write('elephant\\n')\n", "fw.close()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# newer and better syntax\n", "with open('words.txt', 'w') as fw:\n", " fw.write('apple\\nball\\ncat\\ndog\\n')\n", " fw.write('elephant\\n')\n", " fw.write('zebra\\n')\n", "# file will be automatically closed when with block is finished executing" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## read data from a file\n", "- open file with its name; can provide relative or absolute path\n", "- read in various ways; one line at a time, all lines, bytes, whole file, etc.\n", "- use data\n", "- close file" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# read whole file as list of lines\n", "fr = open('words.txt', 'r') # 'r' or read mode by default; file must exist\n", "data = fr.readlines()\n", "fr.close()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "help(fr)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['apple\\n', 'ball\\n', 'cat\\n', 'dog\\n', 'elephant\\n', 'zebra\\n']" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": true }, "outputs": [], "source": [ "data.sort(reverse=True)\n", "with open('sorted_words.txt', 'w') as newFile: \n", " for word in data:\n", " newFile.write(word)\n", " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## fetch a page from the web" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('teaching.html', )" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import urllib.request\n", "url = 'http://org.coloradomesa.edu/~rbasnet/teaching.html'\n", "localfile = 'teaching.html'\n", "urllib.request.urlretrieve(url, localfile)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## reading the whole file at once" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# read /usr/share/dict/words file in linux\n", "# windows path might be \" c:/temp/words.txt\" or c:\\\\temp\\words.txt\"" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "There are 2537 words in the file.\n" ] } ], "source": [ "with open(localfile) as f:\n", " data = f.read()\n", "words = data.split(' ')\n", "print('There are {0} words in the file.'.format(len(words)))" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['\\n\\n\\n\\nTeaching', 'Interests', 'and', 'Current', 'Semester', 'Schedule\\n