{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Intro to Python "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'int'>\n",
      "3\n",
      "4\n",
      "2\n",
      "6\n",
      "9\n",
      "4\n",
      "8\n",
      "<class 'float'>\n",
      "2.5 3.5 5.0 6.25\n"
     ]
    }
   ],
   "source": [
    "x = 3\n",
    "print(type(x)) # Prints \"<class 'int'>\"\n",
    "print(x)       # Prints \"3\"\n",
    "print(x + 1)   # Addition; prints \"4\"\n",
    "print(x - 1)   # Subtraction; prints \"2\"\n",
    "print(x * 2)   # Multiplication; prints \"6\"\n",
    "print(x ** 2)  # Exponentiation; prints \"9\"\n",
    "x += 1\n",
    "print(x)  # Prints \"4\"\n",
    "x *= 2\n",
    "print(x)  # Prints \"8\"\n",
    "y = 2.5\n",
    "print(type(y)) # Prints \"<class 'float'>\"\n",
    "print(y, y + 1, y * 2, y ** 2) # Prints \"2.5 3.5 5.0 6.25\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'str'>\n"
     ]
    }
   ],
   "source": [
    "x = 'c'\n",
    "print(type(x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'bool'>\n",
      "False\n",
      "True\n",
      "False\n",
      "True\n"
     ]
    }
   ],
   "source": [
    "t = True\n",
    "f = False\n",
    "print(type(t)) # Prints \"<class 'bool'>\"\n",
    "print(t and f) # Logical AND; prints \"False\"\n",
    "print(t or f)  # Logical OR; prints \"True\"\n",
    "print(not t)   # Logical NOT; prints \"False\"\n",
    "print(t != f)  # Logical XOR; prints \"True\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "2 == 23"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "helloworld\n",
      "10\n",
      "helloworld world\n",
      "helloworld world 12\n"
     ]
    }
   ],
   "source": [
    "hello = 'helloworld'    # String literals can use single quotes\n",
    "world = \"world\"    # or double quotes; it does not matter.\n",
    "print(hello)       # Prints \"hello\"\n",
    "print(len(hello))  # String length; prints \"5\"\n",
    "hw = hello + ' ' + world  # String concatenation\n",
    "print(hw)  # prints \"hello world\"\n",
    "hw12 = '%s %s %d' % (hello, world, 12)  # sprintf style string formatting\n",
    "print(hw12)  # prints \"hello world 12\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Hello\n",
      "HELLO\n",
      "  hello\n",
      " hello \n",
      "he(ell)(ell)o\n",
      "world\n"
     ]
    }
   ],
   "source": [
    "s = \"hello\"\n",
    "print(s.capitalize())  # Capitalize a string; prints \"Hello\"\n",
    "print(s.upper())       # Convert a string to uppercase; prints \"HELLO\"\n",
    "print(s.rjust(7))      # Right-justify a string, padding with spaces; prints \"  hello\"\n",
    "print(s.center(7))     # Center a string, padding with spaces; prints \" hello \"\n",
    "print(s.replace('l', '(ell)'))  # Replace all instances of one substring with another;\n",
    "                                # prints \"he(ell)(ell)o\"\n",
    "print('  world '.strip())  # Strip leading and trailing whitespace; prints \"world\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[3, 1, 2] 2\n",
      "2\n",
      "[3, 1, 'foo']\n",
      "[3, 1, 'foo', 'bar']\n",
      "bar [3, 1, 'foo']\n"
     ]
    }
   ],
   "source": [
    "xs = [3, 1, 2]    # Create a list\n",
    "print(xs, xs[2])  # Prints \"[3, 1, 2] 2\"\n",
    "print(xs[-1])     # Negative indices count from the end of the list; prints \"2\"\n",
    "xs[2] = 'foo'     # Lists can contain elements of different types\n",
    "print(xs)         # Prints \"[3, 1, 'foo']\"\n",
    "xs.append('bar')  # Add a new element to the end of the list\n",
    "print(xs)         # Prints \"[3, 1, 'foo', 'bar']\"\n",
    "x = xs.pop()      # Remove and return the last element of the list\n",
    "print(x, xs)      # Prints \"bar [3, 1, 'foo']\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xs = [3, 1, 2,5,7]\n",
    "xs[-len(xs)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[3, 1, 2, 'f', 7]"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xs[3] = 'f'\n",
    "xs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Final List :  [123, 'xyz', 'zara', 2009, 'abc']\n"
     ]
    }
   ],
   "source": [
    "aList = [123, 'xyz', 'zara', 'abc']\n",
    "aList.insert( 3, 2009)\n",
    "print (\"Final List : \", aList)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0, 1, 2, 3, 4]\n",
      "[2, 3]\n",
      "[2, 3, 4]\n",
      "[0, 1]\n",
      "[0, 1, 2, 3, 4]\n",
      "[0, 1, 2, 3]\n",
      "[0, 1, 8, 9, 4]\n"
     ]
    }
   ],
   "source": [
    "nums = list(range(5))     # range is a built-in function that creates a list of integers\n",
    "print(nums)               # Prints \"[0, 1, 2, 3, 4]\"\n",
    "print(nums[2:4])          # Get a slice from index 2 to 4 (exclusive); prints \"[2, 3]\"\n",
    "print(nums[2:])           # Get a slice from index 2 to the end; prints \"[2, 3, 4]\"\n",
    "print(nums[:2])           # Get a slice from the start to index 2 (exclusive); prints \"[0, 1]\"\n",
    "print(nums[:])            # Get a slice of the whole list; prints \"[0, 1, 2, 3, 4]\"\n",
    "print(nums[:-1])          # Slice indices can be negative; prints \"[0, 1, 2, 3]\"\n",
    "nums[2:4] = [8, 9]        # Assign a new sublist to a slice\n",
    "print(nums) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[0, 1, 2, 3, 4, 5]"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nums = list(range(6))  \n",
    "nums"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "([0, 1], [2, 3], [4, 5])"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nums[:2], nums[2:4], nums[4:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cat\n",
      "dog\n",
      "monkey\n"
     ]
    }
   ],
   "source": [
    "animals = ['cat', 'dog', 'monkey']\n",
    "\n",
    "for animal in animals: print(animal)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 cat\n",
      "1 dog\n",
      "2 monkey\n"
     ]
    }
   ],
   "source": [
    "animals = ['cat', 'dog', 'monkey']\n",
    "for idx, animal in enumerate(animals):\n",
    "    print(idx, animal)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0, 1, 4, 9, 16]\n"
     ]
    }
   ],
   "source": [
    "nums = [0, 1, 2, 3, 4]\n",
    "squares = []\n",
    "for x in nums:\n",
    "    squares.append(x ** 2)\n",
    "print(squares)   # Prints [0, 1, 4, 9, 16]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## List Comprehension \n",
    "\n",
    "Fast and pythonic way of doing operations oin lists"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0, 1, 4, 9, 16]\n"
     ]
    }
   ],
   "source": [
    "nums = [0, 1, 2, 3, 4]\n",
    "squares = [x ** 2 for x in nums]\n",
    "print(squares)   # Prints [0, 1, 4, 9, 16]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1, 9]\n"
     ]
    }
   ],
   "source": [
    "nums = [0, 1, 2, 3, 4]\n",
    "odd_squares = [x ** 2 for x in nums if x % 2 == 1]\n",
    "print(odd_squares)  # Prints \"[0, 4, 16]\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[1, 9]"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nums = [0, 1, 2, 3, 4]\n",
    "squares = []\n",
    "for x in nums:\n",
    "    if x ** 2 % 2:\n",
    "        squares.append(x ** 2)\n",
    "squares"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cute\n",
      "True\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'cat': 'cute', 'dog': 'furry', 'fish': 'wet'}"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d = {'cat': 'cute', 'dog': 'furry'}  # Create a new dictionary with some data\n",
    "print(d['cat'])       # Get an entry from a dictionary; prints \"cute\"\n",
    "print('cat' in d)     # Check if a dictionary has a given key; prints \"True\"\n",
    "d['fish'] = 'wet'     # Set an entry in a dictionary\n",
    "d"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "None\n"
     ]
    }
   ],
   "source": [
    "# print(d['monkey'])  # KeyError: 'monkey' not a key of d\n",
    "print(d.get('monkey'))  # Get an element with a default; prints \"N/A\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'wet'"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d['fish']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "None\n"
     ]
    }
   ],
   "source": [
    "try:\n",
    "    d['monkey']\n",
    "except:\n",
    "    print('None')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "None\n"
     ]
    }
   ],
   "source": [
    "print(d.get('monkey'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "person\n",
      "cat\n",
      "spider\n"
     ]
    }
   ],
   "source": [
    "d = {'person': 2, 'cat': 4, 'spider': 8}\n",
    "for key in d.keys():\n",
    "    print(key)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2\n",
      "4\n",
      "8\n"
     ]
    }
   ],
   "source": [
    "for val in d.values():\n",
    "    print(val)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "person 2\n",
      "cat 4\n",
      "spider 8\n"
     ]
    }
   ],
   "source": [
    "for key, val in d.items():\n",
    "    print(key, val)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{0: 0, 1: 1, 2: 32, 3: 243, 4: 1024}\n"
     ]
    }
   ],
   "source": [
    "nums = [0, 1, 2, 3, 4]\n",
    "even_num_to_square = {x: x ** 5 for x in nums}\n",
    "print(even_num_to_square)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "True\n",
      "False\n"
     ]
    }
   ],
   "source": [
    "animals = {'cat', 'dog'}\n",
    "print('cat' in animals)   # Check if an element is in a set; prints \"True\"\n",
    "print('fish' in animals)  # prints \"False\"\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Question: Compute Jaccard Similarity\n",
    "\n",
    "$$\n",
    "Sim(A,B) = \\frac{|A \\cap  B|}{|A \\cup  B|}\n",
    "$$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [],
   "source": [
    "A = {'cat', 'dog'} \n",
    "B = {'cat', 'fish'} "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'cat'}"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "A_intersect_B = {a for a in A if a in B} # set comprehension\n",
    "A_intersect_B"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "A_intersect_B_size = len(A_intersect_B)\n",
    "A_intersect_B_size"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "A_union_B_size = len(A) + len(B) - len(A_intersect_B)\n",
    "A_union_B_size"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [],
   "source": [
    "def jaccard_similarty(A,B):\n",
    "    \"\"\"This function computes jaccard similarity\"\"\"\n",
    "    A_intersect_B = {a for a in A if a in B}\n",
    "    A_intersect_B_size = len(A_intersect_B)\n",
    "    A_union_B_size = len(A) + len(B) - len(A_intersect_B)\n",
    "    return A_intersect_B_size / A_union_B_size\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.3333333333333333"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "jaccard_similarty(A,B)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(5, 6, 5)"
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "t = (5, 6, 5)\n",
    "t"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "'tuple' object does not support item assignment",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-78-38eb177fe087>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mt\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m9\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m: 'tuple' object does not support item assignment"
     ]
    }
   ],
   "source": [
    "t[2] = 9"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "negative\n",
      "zero\n",
      "positive\n"
     ]
    }
   ],
   "source": [
    "def sign(x):\n",
    "    if x > 0:\n",
    "        return 'positive'\n",
    "    elif x < 0:\n",
    "        return 'negative'\n",
    "    else:\n",
    "        return 'zero'\n",
    "\n",
    "for x in [-15, 0, 81]:\n",
    "    print(sign(x))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Greeter():\n",
    "    # Constructor\n",
    "    def __init__(self, name):\n",
    "        self.name = name  # Create an instance variable\n",
    "    # Instance method\n",
    "    def greet(self, loud=False):\n",
    "        if loud:\n",
    "            print('HELLO, ', self.name.upper())\n",
    "        else:\n",
    "            print('Hello, ', self.name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Hello,  Fred\n"
     ]
    }
   ],
   "source": [
    "g = Greeter('Fred')  # Construct an instance of the Greeter class\n",
    "g.greet()            # Call an instance method; prints \"Hello, Fred\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "HELLO,  FRED\n"
     ]
    }
   ],
   "source": [
    "g.greet(loud=True)   # Call an instance method; prints \"HELLO, FRED!\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}