{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'\\nThis is an intro to regular expressions\\n\\nI use https://regex101.com/#python to check my work!\\n'" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\"\"\"\n", "This is an intro to regular expressions\n", "\n", "I use https://regex101.com/#python to check my work!\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import re" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# flow:\n", "# create a re pattern object\n", "# search (or match) it against text\n", "# organize the captures patterns in groups" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'2'" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# \\d matches a SINGLE number\n", "text = \"Hello! My name is Sinan. It is 2014 and it's amazing.\"\n", "\n", "# create a re pattern object\n", "pattern1 = re.compile(\"\\d\")\n", "\n", "# search (or match) it against text\n", "re.search(pattern1, text) # == a search object\n", "\n", "# organize the captures patterns in groups\n", "# use group to get each instance in the regular expression\n", "# \\d is just ONE number, so it only finds the \"2\" in \"2014\"\n", "re.search(pattern1, text).group(0)\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'2014'" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# adding a + means \"at least one\" but potentially more\n", "pattern2 = re.compile(\"\\d+\")\n", "re.search(pattern2, text).group(0) # == '2014'" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'c'" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# use square brackets [] to match one of the items present\n", "alphabet = 'abcdefg'\n", "pattern3 = re.compile('[cfg]')\n", "re.search(pattern3, alphabet).group(0)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'906-365-6776'" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mystery_pattern = re.compile(\"\\d+-\\d+-\\d+\")\n", "# take a few minutes, and discuss, what application could this mystery_pattern have\n", "re.search(mystery_pattern, \"my phone number is 906-365-6776 dude\").group(0)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'dmzhvbekuhvbc dfljghwco87rc6geinsr6t4gi7rgwefiuvbekuhvbdfljghwco87rc6geinsr6t4gi7rgwefiu ywgsfybcstzvgbrtybte'" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# . matches ANYTHING\n", "all_of_the_text = \"dmzhvbekuhvbc dfljghwco87rc6geinsr6t4gi7rgwefiuvbekuhvbdfljghwco87rc6geinsr6t4gi7rgwefiu ywgsfybcstzvgbrtybte\"\n", "anything_pattern = re.compile(\".+\")\n", "re.search(anything_pattern, all_of_the_text).group(0)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'sinan+test@legionanalytics.com'" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# \\w matches any word character, alphanumeric\n", "# if you want to match an actual period, do \\.\n", "email_pattern = re.compile(\"[\\w\\.\\+]+@\\w+\\.com\")\n", "re.search(email_pattern, \"my email address is sinan+test@legionanalytics.com\").group(0)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [sfdat26-env]", "language": "python", "name": "Python [sfdat26-env]" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 0 }