{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'HUEHUEHUE'"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(\"hue\" * 3).upper()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "was was was\n",
      "was was is\n"
     ]
    }
   ],
   "source": [
    "str = \"is is is\";\n",
    "print(str.replace(\"is\", \"was\"))\n",
    "print(str.replace(\"is\", \"was\", 2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'world'"
      ]
     },
     "execution_count": 132,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\" \\tworld\\n  \".strip()    # strip, lstrip, rstrip"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "b'946809' ### 946809\n"
     ]
    }
   ],
   "source": [
    "print(str(b'946809'), end = \" ### \" )\n",
    "print(b'946809'.decode('utf8') )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'3.56'"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\"%.2f\" % float(\"3.5555\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "bytes"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type('foo'.encode('utf-8'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "ParseResult(scheme='http', netloc='imgqn.xxx.com', path='/upload_files/2015/05/29/yyy.jpg!730x0.jpg', params='', query='', fragment='')"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from urllib.parse import urlparse\n",
    "\n",
    "def get_filename_from_url(url):\n",
    "    return urlparse(url).path.split('/')[-1]\n",
    "\n",
    "url = \"http://imgqn.xxx.com/upload_files/2015/05/29/yyy.jpg!730x0.jpg\"\n",
    "urlparse(url)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 240,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from bs4 import BeautifulSoup  # analyze html\n",
    "#http://www.crummy.com/software/BeautifulSoup/bs4/doc/\n",
    "soup = BeautifulSoup(html_doc)\n",
    "soup.p['class']\n",
    "soup.find_all('a')\n",
    "soup.find_all('img', src=True):\n",
    "soup.find_all(\"div\", { \"class\" : \"xxx\"})  \n",
    "soup.find(id=\"link3\")\n",
    "# <a class=\"sister\" href=\"http://example.com/tillie\" id=\"link3\">Tillie</a>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Regex\n",
    "https://www.regex101.com/  \n",
    "http://regexr.com/  \n",
    "https://docs.python.org/3/library/re.html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'your name your name'"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import re\n",
    "re.sub(r'(?i)\\b(u|you+)\\b', \"your name\", 'u YOU')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "re.match(\"c\", \"abcdef\") # checks for a match only at the beginning of the string, No match"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<_sre.SRE_Match object; span=(2, 3), match='c'>"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "re.search(\"c\", \"abcdef\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<_sre.SRE_Match object; span=(0, 1), match='1'>"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "m = re.search(\"\\w\",'1dfsde2') # \\w match  as well as numbers and the underscore\n",
    "if m: \n",
    "    print(m.group(0))\n",
    "m    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "import re\n",
    "pattern=\"BEGIN:VCARD.*?END:VCARD\"\n",
    "result = re.findall(pattern,content,re.DOTALL) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Positive Lookbehind:\ta*c a b c\n",
      "Negative Lookbehind:\tabc a * c\n",
      "Positive Lookahead:\ta*c a b c\n",
      "Negative Lookahead:\tabc a * c\n"
     ]
    }
   ],
   "source": [
    "import re\n",
    "print( 'Positive Lookbehind:\\t' + re.sub(u'(?<=a)b', \"*\", 'abc a b c') )\n",
    "print( 'Negative Lookbehind:\\t' + re.sub(u'(?<!a)b', \"*\", 'abc a b c') )\n",
    "print( 'Positive Lookahead:\\t'  + re.sub(u'b(?=c)', \"*\", 'abc a b c') )\n",
    "print( 'Negative Lookahead:\\t'  + re.sub(u'b(?!c)', \"*\", 'abc a b c') )"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}