{ "cells": [ { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "# Built-in Data Structures, Functions, " ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Data Structures and Sequences" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Tuple" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "tup = 4, 5, 6\n", "tup" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "nested_tup = (4, 5, 6), (7, 8)\n", "nested_tup" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "tuple([4, 0, 2])\n", "tup = tuple('string')\n", "tup" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "tup[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "tup = tuple(['foo', [1, 2], True])\n", "tup[2] = False" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "tup[1].append(3)\n", "tup" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "(4, None, 'foo') + (6, 0) + ('bar',)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "('foo', 'bar') * 4" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "#### Unpacking tuples" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "tup = (4, 5, 6)\n", "a, b, c = tup\n", "b" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "tup = 4, 5, (6, 7)\n", "a, b, (c, d) = tup\n", "d" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "tmp = a\n", "a = b\n", "b = tmp" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "a, b = 1, 2\n", "a\n", "b\n", "b, a = a, b\n", "a\n", "b" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "seq = [(1, 2, 3), (4, 5, 6), (7, 8, 9)]\n", "for a, b, c in seq:\n", " print('a={0}, b={1}, c={2}'.format(a, b, c))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "values = 1, 2, 3, 4, 5\n", "a, b, *rest = values\n", "a, b\n", "rest" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "a, b, *_ = values" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "#### Tuple methods" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "a = (1, 2, 2, 2, 3, 4, 2)\n", "a.count(2)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### List" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "a_list = [2, 3, 7, None]\n", "tup = ('foo', 'bar', 'baz')\n", "b_list = list(tup)\n", "b_list\n", "b_list[1] = 'peekaboo'\n", "b_list" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "gen = range(10)\n", "gen\n", "list(gen)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "#### Adding and removing elements" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "b_list.append('dwarf')\n", "b_list" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "b_list.insert(1, 'red')\n", "b_list" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "b_list.pop(2)\n", "b_list" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "b_list.append('foo')\n", "b_list\n", "b_list.remove('foo')\n", "b_list" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "'dwarf' in b_list" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "'dwarf' not in b_list" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "#### Concatenating and combining lists" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "[4, None, 'foo'] + [7, 8, (2, 3)]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "x = [4, None, 'foo']\n", "x.extend([7, 8, (2, 3)])\n", "x" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "everything = []\n", "for chunk in list_of_lists:\n", " everything.extend(chunk)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "everything = []\n", "for chunk in list_of_lists:\n", " everything = everything + chunk" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "#### Sorting" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "a = [7, 2, 5, 1, 3]\n", "a.sort()\n", "a" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "b = ['saw', 'small', 'He', 'foxes', 'six']\n", "b.sort(key=len)\n", "b" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "#### Binary search and maintaining a sorted list" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "import bisect\n", "c = [1, 2, 2, 2, 3, 4, 7]\n", "bisect.bisect(c, 2)\n", "bisect.bisect(c, 5)\n", "bisect.insort(c, 6)\n", "c" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "#### Slicing" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "seq = [7, 2, 3, 7, 5, 6, 0, 1]\n", "seq[1:5]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "seq[3:4] = [6, 3]\n", "seq" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "seq[:5]\n", "seq[3:]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "seq[-4:]\n", "seq[-6:-2]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "seq[::2]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "seq[::-1]" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Built-in Sequence Functions" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "#### enumerate" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "i = 0\n", "for value in collection:\n", " # do something with value\n", " i += 1" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "for i, value in enumerate(collection):\n", " # do something with value" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "some_list = ['foo', 'bar', 'baz']\n", "mapping = {}\n", "for i, v in enumerate(some_list):\n", " mapping[v] = i\n", "mapping" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "#### sorted" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "sorted([7, 1, 2, 6, 0, 3, 2])\n", "sorted('horse race')" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "#### zip" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "seq1 = ['foo', 'bar', 'baz']\n", "seq2 = ['one', 'two', 'three']\n", "zipped = zip(seq1, seq2)\n", "list(zipped)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "seq3 = [False, True]\n", "list(zip(seq1, seq2, seq3))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "for i, (a, b) in enumerate(zip(seq1, seq2)):\n", " print('{0}: {1}, {2}'.format(i, a, b))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "pitchers = [('Nolan', 'Ryan'), ('Roger', 'Clemens'),\n", " ('Curt', 'Schilling')]\n", "first_names, last_names = zip(*pitchers)\n", "first_names\n", "last_names" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "#### reversed" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "list(reversed(range(10)))" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### dict" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "empty_dict = {}\n", "d1 = {'a' : 'some value', 'b' : [1, 2, 3, 4]}\n", "d1" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "d1[7] = 'an integer'\n", "d1\n", "d1['b']" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "'b' in d1" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "d1[5] = 'some value'\n", "d1\n", "d1['dummy'] = 'another value'\n", "d1\n", "del d1[5]\n", "d1\n", "ret = d1.pop('dummy')\n", "ret\n", "d1" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "list(d1.keys())\n", "list(d1.values())" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "d1.update({'b' : 'foo', 'c' : 12})\n", "d1" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "#### Creating dicts from sequences" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "mapping = {}\n", "for key, value in zip(key_list, value_list):\n", " mapping[key] = value" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "mapping = dict(zip(range(5), reversed(range(5))))\n", "mapping" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "#### Default values" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "if key in some_dict:\n", " value = some_dict[key]\n", "else:\n", " value = default_value" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "value = some_dict.get(key, default_value)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "words = ['apple', 'bat', 'bar', 'atom', 'book']\n", "by_letter = {}\n", "for word in words:\n", " letter = word[0]\n", " if letter not in by_letter:\n", " by_letter[letter] = [word]\n", " else:\n", " by_letter[letter].append(word)\n", "by_letter" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "for word in words:\n", " letter = word[0]\n", " by_letter.setdefault(letter, []).append(word)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "from collections import defaultdict\n", "by_letter = defaultdict(list)\n", "for word in words:\n", " by_letter[word[0]].append(word)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "#### Valid dict key types" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "hash('string')\n", "hash((1, 2, (2, 3)))\n", "hash((1, 2, [2, 3])) # fails because lists are mutable" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "d = {}\n", "d[tuple([1, 2, 3])] = 5\n", "d" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### set" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "set([2, 2, 2, 1, 3, 3])\n", "{2, 2, 2, 1, 3, 3}" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "a = {1, 2, 3, 4, 5}\n", "b = {3, 4, 5, 6, 7, 8}" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "a.union(b)\n", "a | b" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "a.intersection(b)\n", "a & b" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "c = a.copy()\n", "c |= b\n", "c\n", "d = a.copy()\n", "d &= b\n", "d" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "my_data = [1, 2, 3, 4]\n", "my_set = {tuple(my_data)}\n", "my_set" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "a_set = {1, 2, 3, 4, 5}\n", "{1, 2, 3}.issubset(a_set)\n", "a_set.issuperset({1, 2, 3})" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "{1, 2, 3} == {3, 2, 1}" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### List, Set, and Dict Comprehensions" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "[" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "result = []\n", "for val in collection:\n", " if " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "strings = ['a', 'as', 'bat', 'car', 'dove', 'python']\n", "[x.upper() for x in strings if len(x) > 2]" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "dict_comp = {" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "set_comp = {" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "unique_lengths = {len(x) for x in strings}\n", "unique_lengths" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "set(map(len, strings))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "loc_mapping = {val : index for index, val in enumerate(strings)}\n", "loc_mapping" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "#### Nested list comprehensions" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "all_data = [['John', 'Emily', 'Michael', 'Mary', 'Steven'],\n", " ['Maria', 'Juan', 'Javier', 'Natalia', 'Pilar']]" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "names_of_interest = []\n", "for names in all_data:\n", " enough_es = [name for name in names if name.count('e') >= 2]\n", " names_of_interest.extend(enough_es)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "result = [name for names in all_data for name in names\n", " if name.count('e') >= 2]\n", "result" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "some_tuples = [(1, 2, 3), (4, 5, 6), (7, 8, 9)]\n", "flattened = [x for tup in some_tuples for x in tup]\n", "flattened" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "flattened = []\n", "\n", "for tup in some_tuples:\n", " for x in tup:\n", " flattened.append(x)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "[[x for x in tup] for tup in some_tuples]" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Functions" ] }, { "cell_type": "code", "metadata": { "deletable": true, "editable": true }, "source": [ "def my_function(x, y, z=1.5):\n", " if z > 1:\n", " return z * (x + y)\n", " else:\n", " return z / (x + y)" ] }, { "cell_type": "code", "metadata": { "deletable": true, "editable": true }, "source": [ "my_function(5, 6, z=0.7)\n", "my_function(3.14, 7, 3.5)\n", "my_function(10, 20)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Namespaces, Scope, and Local Functions" ] }, { "cell_type": "code", "metadata": { "deletable": true, "editable": true }, "source": [ "def func():\n", " a = []\n", " for i in range(5):\n", " a.append(i)" ] }, { "cell_type": "code", "metadata": { "deletable": true, "editable": true }, "source": [ "a = []\n", "def func():\n", " for i in range(5):\n", " a.append(i)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "a = None\n", "def bind_a_variable():\n", " global a\n", " a = []\n", "bind_a_variable()\n", "print(a)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Returning Multiple Values" ] }, { "cell_type": "code", "metadata": { "deletable": true, "editable": true }, "source": [ "def f():\n", " a = 5\n", " b = 6\n", " c = 7\n", " return a, b, c\n", "\n", "a, b, c = f()" ] }, { "cell_type": "code", "metadata": { "deletable": true, "editable": true }, "source": [ "return_value = f()" ] }, { "cell_type": "code", "metadata": { "deletable": true, "editable": true }, "source": [ "def f():\n", " a = 5\n", " b = 6\n", " c = 7\n", " return {'a' : a, 'b' : b, 'c' : c}" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Functions Are Objects" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "states = [' Alabama ', 'Georgia!', 'Georgia', 'georgia', 'FlOrIda',\n", " 'south carolina##', 'West virginia?']" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "import re\n", "\n", "def clean_strings(strings):\n", " result = []\n", " for value in strings:\n", " value = value.strip()\n", " value = re.sub('[!#?]', '', value)\n", " value = value.title()\n", " result.append(value)\n", " return result" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "clean_strings(states)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "def remove_punctuation(value):\n", " return re.sub('[!#?]', '', value)\n", "\n", "clean_ops = [str.strip, remove_punctuation, str.title]\n", "\n", "def clean_strings(strings, ops):\n", " result = []\n", " for value in strings:\n", " for function in ops:\n", " value = function(value)\n", " result.append(value)\n", " return result" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "clean_strings(states, clean_ops)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "for x in map(remove_punctuation, states):\n", " print(x)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Anonymous (Lambda) Functions" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "def short_function(x):\n", " return x * 2\n", "\n", "equiv_anon = lambda x: x * 2" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "def apply_to_list(some_list, f):\n", " return [f(x) for x in some_list]\n", "\n", "ints = [4, 0, 1, 5, 6]\n", "apply_to_list(ints, lambda x: x * 2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "strings = ['foo', 'card', 'bar', 'aaaa', 'abab']" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "strings.sort(key=lambda x: len(set(x)))\n", "strings" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Currying: Partial Argument Application" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "def add_numbers(x, y):\n", " return x + y" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "add_five = lambda y: add_numbers(5, y)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "from functools import partial\n", "add_five = partial(add_numbers, 5)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Generators" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "some_dict = {'a': 1, 'b': 2, 'c': 3}\n", "for key in some_dict:\n", " print(key)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "dict_iterator = iter(some_dict)\n", "dict_iterator" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "list(dict_iterator)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "def squares(n=10):\n", " print('Generating squares from 1 to {0}'.format(n ** 2))\n", " for i in range(1, n + 1):\n", " yield i ** 2" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "gen = squares()\n", "gen" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "for x in gen:\n", " print(x, end=' ')" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "#### Generator expresssions" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "gen = (x ** 2 for x in range(100))\n", "gen" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "def _make_gen():\n", " for x in range(100):\n", " yield x ** 2\n", "gen = _make_gen()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "sum(x ** 2 for x in range(100))\n", "dict((i, i **2) for i in range(5))" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "#### itertools module" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "import itertools\n", "first_letter = lambda x: x[0]\n", "names = ['Alan', 'Adam', 'Wes', 'Will', 'Albert', 'Steven']\n", "for letter, names in itertools.groupby(names, first_letter):\n", " print(letter, list(names)) # names is a generator" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Errors and Exception Handling" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "float('1.2345')\n", "float('something')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "def attempt_float(x):\n", " try:\n", " return float(x)\n", " except:\n", " return x" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "attempt_float('1.2345')\n", "attempt_float('something')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "float((1, 2))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "def attempt_float(x):\n", " try:\n", " return float(x)\n", " except ValueError:\n", " return x" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "attempt_float((1, 2))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "def attempt_float(x):\n", " try:\n", " return float(x)\n", " except (TypeError, ValueError):\n", " return x" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "f = open(path, 'w')\n", "\n", "try:\n", " write_to_file(f)\n", "finally:\n", " f.close()" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "f = open(path, 'w')\n", "\n", "try:\n", " write_to_file(f)\n", "except:\n", " print('Failed')\n", "else:\n", " print('Succeeded')\n", "finally:\n", " f.close()" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "#### Exceptions in IPython" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "In [10]: %run examples/ipython_bug.py\n", "---------------------------------------------------------------------------\n", "AssertionError Traceback (most recent call last)\n", "/home/wesm/code/pydata-book/examples/ipython_bug.py in ()\n", " 13 throws_an_exception()\n", " 14\n", "---> 15 calling_things()\n", "\n", "/home/wesm/code/pydata-book/examples/ipython_bug.py in calling_things()\n", " 11 def calling_things():\n", " 12 works_fine()\n", "---> 13 throws_an_exception()\n", " 14\n", " 15 calling_things()\n", "\n", "/home/wesm/code/pydata-book/examples/ipython_bug.py in throws_an_exception()\n", " 7 a = 5\n", " 8 b = 6\n", "----> 9 assert(a + b == 10)\n", " 10\n", " 11 def calling_things():\n", "\n", "AssertionError:" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Files and the Operating System" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "%pushd book-materials" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "path = 'examples/segismundo.txt'\n", "f = open(path)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "for line in f:\n", " pass" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "lines = [x.rstrip() for x in open(path)]\n", "lines" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "f.close()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "with open(path) as f:\n", " lines = [x.rstrip() for x in f]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "f = open(path)\n", "f.read(10)\n", "f2 = open(path, 'rb') # Binary mode\n", "f2.read(10)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "f.tell()\n", "f2.tell()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "import sys\n", "sys.getdefaultencoding()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "f.seek(3)\n", "f.read(1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "f.close()\n", "f2.close()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "with open('tmp.txt', 'w') as handle:\n", " handle.writelines(x for x in open(path) if len(x) > 1)\n", "with open('tmp.txt') as f:\n", " lines = f.readlines()\n", "lines" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "import os\n", "os.remove('tmp.txt')" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Bytes and Unicode with Files" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "with open(path) as f:\n", " chars = f.read(10)\n", "chars" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "with open(path, 'rb') as f:\n", " data = f.read(10)\n", "data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "data.decode('utf8')\n", "data[:4].decode('utf8')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "sink_path = 'sink.txt'\n", "with open(path) as source:\n", " with open(sink_path, 'xt', encoding='iso-8859-1') as sink:\n", " sink.write(source.read())\n", "with open(sink_path, encoding='iso-8859-1') as f:\n", " print(f.read(10))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "os.remove(sink_path)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "f = open(path)\n", "f.read(5)\n", "f.seek(4)\n", "f.read(1)\n", "f.close()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "%popd" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Conclusion" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.0" } }, "nbformat": 4, "nbformat_minor": 0 }