{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#########################################################################\n", "## Introduction to Database #1 - Applying Filtering and Targeting data\n", "## Atul Singh\n", "## www.datagenx.net\n", "#########################################################################" ] }, { "cell_type": "code", "execution_count": 69, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# import\n", "from sqlalchemy import create_engine, Table, MetaData, select, or_, and_, desc, func\n", "# pip install psycopg2 #PostgreSQL driver\n", "import psycopg2\n", "import pandas as pd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Connecting PostgreSQL" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['Employee']\n" ] } ], "source": [ "# dialect and driver ('postgresql+psycopg2://'), \n", "# followed by the username and password ('student:datacamp'), \n", "# followed by the host and port ('@postgresql.csrrinzqubik.us-east-1.rds.amazonaws.com:5432/'), \n", "# and finally, the database name ('census').\n", "\n", "# creating engine\n", "engine = create_engine(\"postgresql+psycopg2://postgres:postgres@localhost:5432/test\")\n", "\n", "# reading table names from database\n", "print(engine.table_names())" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "### Importing metadata with Table and MetaData object\n", "metadata = MetaData()\n", "\n", "# getting albums metadata\n", "emp = Table('Employee', metadata, autoload=True, autoload_with=engine)\n", "connection = engine.connect()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Table('Employee', MetaData(bind=None), Column('FirstName', VARCHAR(), table=), Column('LastName', VARCHAR(), table=), Column('Gender', String(), table=), Column('EmpId', INTEGER(), table=), schema=None)\n" ] } ], "source": [ "print(repr(emp))" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Table('Employee', MetaData(bind=None), Column('FirstName', VARCHAR(), table=), Column('LastName', VARCHAR(), table=), Column('Gender', String(), table=), Column('EmpId', INTEGER(), table=), schema=None)\n" ] } ], "source": [ "# we can get the same metadata details from metadata.tables dictionaty\n", "print(repr(metadata.tables['Employee']))" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [], "source": [ "## Let's query on table albums\n", "stmt = select([emp])\n", "stmt = stmt.where(emp.columns.Gender == 'F')" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "#results = connection.execute(stmt).fetchall()\n", "result_proxy = connection.execute(stmt)\n", "print(result_proxy)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[('Priya', 'Rajawat', 'F', 3), ('Divya', 'Patel', 'F', 4)]\n" ] } ], "source": [ "results = result_proxy.fetchall()\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['FirstName', 'LastName', 'Gender', 'EmpId']\n" ] } ], "source": [ "# get all the column names\n", "print(result_proxy.keys())" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2\n" ] } ], "source": [ "# get total no of rows in tables\n", "print(len(results))" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('Priya', 'Rajawat', 'F', 3)\n" ] } ], "source": [ "# get first row\n", "result1 = results[0]\n", "print(result1)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['FirstName', 'LastName', 'Gender', 'EmpId']\n" ] } ], "source": [ "# print column names\n", "print(result1.keys())" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "F\n" ] } ], "source": [ "# access particulat column data\n", "print(result1.Gender)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3 Priya Rajawat F\n", "4 Divya Patel F\n" ] } ], "source": [ "# printing the results\n", "for result in results:\n", " print(result.EmpId, result[0], result.LastName, result.Gender)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### More complex Join and other conjuntion" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": true }, "outputs": [], "source": [ "gend = ['M']" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Now I have to fetch all the records which match with Gender list - gender\n", "stmt = select([emp])\n", "stmt = stmt.where(emp.columns.Gender.in_(gend))" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('Atul', 'Singh', 'M', 1)\n", "('Rahul', 'Singh', 'M', 2)\n" ] } ], "source": [ "# Here no need to use fetch all funct\n", "for result in connection.execute(stmt):\n", " print(result)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('Atul', 'Singh', 'M', 1)\n", "('Rahul', 'Singh', 'M', 2)\n", "('Priya', 'Rajawat', 'F', 3)\n", "('Divya', 'Patel', 'F', 4)\n" ] } ], "source": [ "# same way we can use or_(), and_(), not_(), like() and between()\n", "# let's see a example of or_()\n", "stmt = select([emp])\n", "stmt = stmt.where(or_(emp.columns.Gender=='M', emp.columns.Gender=='F'))\n", "for result in connection.execute(stmt):\n", " print(result)" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('Atul', 'Singh', 'M', 1)\n", "('Rahul', 'Singh', 'M', 2)\n" ] } ], "source": [ "# and statement\n", "stmt = select([emp])\n", "stmt = stmt.where(and_(emp.columns.Gender=='M', emp.columns.LastName=='Singh'))\n", "for result in connection.execute(stmt):\n", " print(result)" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('Divya', 'Patel', 'F', 4)\n" ] } ], "source": [ "# like statement\n", "stmt = select([emp])\n", "stmt = stmt.where(emp.columns.LastName.like('P%'))\n", "for result in connection.execute(stmt):\n", " print(result)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Overview of Ordering" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[('Divya', 'Patel', 'F', 4), ('Priya', 'Rajawat', 'F', 3), ('Atul', 'Singh', 'M', 1), ('Rahul', 'Singh', 'M', 2)]\n" ] } ], "source": [ "stmt = select([emp])\n", "stmt = stmt.order_by(emp.columns.LastName)\n", "results = connection.execute(stmt).fetchall()\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[('Divya', 'Patel'), ('Priya', 'Rajawat'), ('Atul', 'Singh'), ('Rahul', 'Singh')]\n" ] } ], "source": [ "# selecting any two columns\n", "stmt = select([emp.columns.FirstName, emp.columns.LastName])\n", "stmt = stmt.order_by(emp.columns.LastName)\n", "results = connection.execute(stmt).fetchall()\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[('Atul', 'Singh'), ('Rahul', 'Singh'), ('Priya', 'Rajawat'), ('Divya', 'Patel')]\n" ] } ], "source": [ "# selecting two columns in reverse order\n", "stmt = select([emp.columns.FirstName, emp.columns.LastName])\n", "stmt = stmt.order_by(desc(emp.columns.LastName))\n", "results = connection.execute(stmt).fetchall()\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 49, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[('Atul', 'Singh'), ('Rahul', 'Singh'), ('Priya', 'Rajawat'), ('Divya', 'Patel')]\n" ] } ], "source": [ "# one column in ascending order and one in desc\n", "stmt = select([emp.columns.FirstName, emp.columns.LastName])\n", "stmt = stmt.order_by(desc(emp.columns.LastName), emp.columns.FirstName)\n", "results = connection.execute(stmt).fetchall()\n", "print(results)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Aggregating the data" ] }, { "cell_type": "code", "execution_count": 51, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# import func\n", "# from sqlalchemy import func" ] }, { "cell_type": "code", "execution_count": 57, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "# counting distinct gender in table Employee\n", "stmt = select([func.count(emp.columns.Gender.distinct())])\n", "results = connection.execute(stmt)\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 58, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2\n" ] } ], "source": [ "results = results.scalar()\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 62, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[('M', 2), ('F', 2)]\n", "['Gender', 'count_1']\n" ] } ], "source": [ "## taking the gender count per records\n", "stmt = select([emp.columns.Gender, func.count(emp.columns.Gender)])\n", "stmt = stmt.group_by(emp.columns.Gender)\n", "results = connection.execute(stmt).fetchall()\n", "print(results)\n", "print(results[0].keys())" ] }, { "cell_type": "code", "execution_count": 68, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[('M', 2), ('F', 2)]\n", "['Gender', 'Gender_Count']\n" ] } ], "source": [ "# labeling the count columns\n", "stmt = select([emp.columns.Gender, func.count(emp.columns.Gender).label('Gender_Count')])\n", "stmt = stmt.group_by(emp.columns.Gender)\n", "results = connection.execute(stmt).fetchall()\n", "print(results)\n", "print(results[0].keys())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Conveting results to DataFrame" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# importing pandas\n", "# import pandas as pd" ] }, { "cell_type": "code", "execution_count": 70, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Gender Gender_Count\n", "0 M 2\n", "1 F 2\n" ] } ], "source": [ "df = pd.DataFrame(results)\n", "df.columns = results[0].keys()\n", "print(df)" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "### Connecting to Oracle\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# installation of Oracle Driver\n", "# pip install cx_Oracle\n", "import cx_Oracle" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "12.1.0.2.0\n" ] } ], "source": [ "con = cx_Oracle.connect('C##ATUL/atul@127.0.0.1/smpl') #user/password@host/network_alias\n", "print(con.version)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ ">" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# creating query \n", "cur = con.cursor()\n", "cur.execute('SELECT COURSE_ID, TITLE, DEPT_NAME, CREDITS FROM COURSE WHERE DEPT_NAME=\\'Comp. Sci.\\'')" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('539', 'International Finance', 'Comp. Sci.', 3)\n" ] } ], "source": [ "# fetching single records with fetchone()\n", "print(cur.fetchone())" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[('274', 'Corporate Law', 'Comp. Sci.', 4), ('949', 'Japanese', 'Comp. Sci.', 3)]\n" ] } ], "source": [ "# fetching mamy rows\n", "print(cur.fetchmany(numRows=2))" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[('647', 'Service-Oriented Architectures', 'Comp. Sci.', 4), ('747', 'International Practicum', 'Comp. Sci.', 4), ('584', 'Computability Theory', 'Comp. Sci.', 3), ('276', 'Game Design', 'Comp. Sci.', 4), ('359', 'Game Programming', 'Comp. Sci.', 4), ('284', 'Topology', 'Comp. Sci.', 4), ('571', 'Plastics', 'Comp. Sci.', 4)]\n" ] } ], "source": [ "# fetching all rows\n", "print(cur.fetchall())" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# printing results \n", "for res in cur:\n", " print(res) # this will not print any records " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "con.close()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "############################################################\n", "## Atul Singh | www.datagenx.net | lnked.in/atulsingh\n", "############################################################" ] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 1 }