{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "This is a work-in-progress for some future cohpy meeting." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Lastly, uses names to refer to parts of a string that match a regular expression is fantastic! It makes the code much more readable than using meaningless numerical indexes.\n", "\n", "This was new to a Python expert who now loves it,\n", "although his still prefers to eschew regular expressions\n", "because they are hard to read.\n", "\n", "Imagine the following contrived example,\n", "where I want to get the hash of an git object from its filepath." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "s = '.git/objects/8e/28241360c472576e8caa944253d4af368d9081'\n", "\n", "import re\n", "git_pattern = re.compile(r'''\n", " .*/ # anything and a slash\n", " (\n", " ([0-9a-fA-F]{2}) # 2 hexadecimal digits\n", " / # separated by a slash\n", " ([0-9a-fA-F]{38}) # 38 hexadecimal digits\n", " )$''', flags=re.VERBOSE)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "<_sre.SRE_Match object; span=(0, 54), match='.git/objects/8e/28241360c472576e8caa944253d4af368>" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m = git_pattern.match(s)\n", "m" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'.git/objects/8e/28241360c472576e8caa944253d4af368d9081'" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m.group(0)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'8e/28241360c472576e8caa944253d4af368d9081'" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m.group(1)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'8e'" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m.group(2)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'28241360c472576e8caa944253d4af368d9081'" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m.group(3)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Help on function compile in module re:\n", "\n", "compile(pattern, flags=0)\n", " Compile a regular expression pattern, returning a pattern object.\n", "\n" ] } ], "source": [ "help(re.compile)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [], "source": [ "s = '.git/objects/8e/28241360c472576e8caa944253d4af368d9081'\n", "\n", "import re\n", "git_pattern = re.compile(r'''\n", " .*/ # anything and a slash\n", " (?P\n", " (?P [0-9a-fA-F]{2}) # 2 hexadecimal digits\n", " / # separated by a slash\n", " (?P [0-9a-fA-F]{38}) # 38 hexadecimal digits\n", " )$''', flags=re.VERBOSE)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "<_sre.SRE_Match object; span=(0, 54), match='.git/objects/8e/28241360c472576e8caa944253d4af368>" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m = git_pattern.match(s)\n", "m" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'8e'" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m.group(2)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'8e'" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m.group('hash_directory')" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'28241360c472576e8caa944253d4af368d9081'" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m.group(3)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'28241360c472576e8caa944253d4af368d9081'" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m.group('hash_filename')" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'8e28241360c472576e8caa944253d4af368d9081'" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hash = m.group('hash_directory') + m.group('hash_filename')\n", "hash" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'8e28241360c472576e8caa944253d4af368d9081'" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hash = ''.join(map(m.group, ('hash_directory', 'hash_filename')))\n", "hash" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'8e28241360c472576e8caa944253d4af368d9081'" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hash = ''.join(map(lambda s: m.group('hash_%s' % s), ('directory', 'filename')))\n", "hash" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'8e/28241360c472576e8caa944253d4af368d9081'" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m.group(1)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'8e/28241360c472576e8caa944253d4af368d9081'" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m.group('hash_with_slash')" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'8e28241360c472576e8caa944253d4af368d9081'" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hash = ''.join(c for c in m.group('hash_with_slash') if c != '/')\n", "hash" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'.git/objects/8e/28241360c472576e8caa944253d4af368d9081'" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": true }, "outputs": [], "source": [ "s = '8e/28241360c472576e8caa944253d4af368d9081'\n", "m = git_pattern.match(s)\n", "m" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'None'" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "repr(m)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 0 }