{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Sql Examples\n", "\n", "Note: if you are interested in this kind of thing, definitely have a look at how these two libraries implement it...\n", "\n", "* [ibis](https://github.com/ibis-project/ibis/) - [see here](https://github.com/ibis-project/ibis/tree/master/ibis/sql)\n", "* [dbplyr](https://github.com/tidyverse/dbplyr)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from sqlalchemy import sql\n", "from sqlalchemy import Table, Column, Integer, String, MetaData, ForeignKey\n", "from sqlalchemy import create_engine\n", "engine = create_engine('sqlite:///:memory:', echo=False)\n", "\n", "\n", "metadata = MetaData()\n", "users = Table('users', metadata,\n", " Column('id', Integer, primary_key=True),\n", " Column('name', String),\n", " Column('fullname', String),\n", ")\n", "\n", "\n", "addresses = Table('addresses', metadata,\n", " Column('id', Integer, primary_key=True),\n", " Column('user_id', None, ForeignKey('users.id')),\n", " Column('email_address', String, nullable=False)\n", " )\n", "\n", "metadata.create_all(engine)\n", "\n", "conn = engine.connect()\n", "\n", "ins = users.insert().values(name='jack', fullname='Jack Jones')\n", "result = conn.execute(ins)\n", "\n", "\n", "ins = users.insert()\n", "conn.execute(ins, id=2, name='wendy', fullname='Wendy Williams')\n", "\n", "\n", "res = conn.execute(addresses.insert(), [\n", " {'user_id': 1, 'email_address' : 'jack@yahoo.com'},\n", " {'user_id': 1, 'email_address' : 'jack@msn.com'},\n", " {'user_id': 2, 'email_address' : 'www@www.org'},\n", " {'user_id': 2, 'email_address' : 'wendy@aol.com'},\n", "])\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SELECT users.id, users.name, users.fullname \n", "FROM users\n" ] } ], "source": [ "from siuba.sql import filter, mutate, select, LazyTbl, arrange, lift_inner_cols\n", "from siuba.siu import _\n", "\n", "tbl_users = LazyTbl(conn, users)\n", "\n", "print(tbl_users.last_op)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Select" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SELECT users.fullname, users.name \n", "FROM users\n" ] } ], "source": [ "tbl = select(tbl_users, _.fullname, -_.id)\n", "print(tbl.last_op)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Filter" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SELECT anon_1.id, anon_1.name, anon_1.fullname \n", "FROM (SELECT anon_2.id AS id, anon_2.name AS name, anon_2.fullname AS fullname \n", "FROM (SELECT users.id AS id, users.name AS name, users.fullname AS fullname \n", "FROM users) AS anon_2) AS anon_1 \n", "WHERE anon_1.fullname = :fullname_1\n" ] } ], "source": [ "tbl = filter(tbl_users, _.fullname == \"michael\")\n", "print(tbl.last_op)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SELECT anon_1.id, anon_1.name, anon_1.fullname \n", "FROM (SELECT anon_2.id AS id, anon_2.name AS name, anon_2.fullname AS fullname \n", "FROM (SELECT users.id AS id, users.name AS name, users.fullname AS fullname \n", "FROM users) AS anon_2) AS anon_1 \n", "WHERE (anon_1.fullname LIKE :fullname_1 || '%')\n" ] } ], "source": [ "# currently, you can use any method exposed by sqlalchemy ColumnElement class\n", "# but I plan to implement a standard set of functions like in dplyr, so pandas\n", "# or sql queries psshh no matter.\n", "tbl = filter(tbl_users, _.fullname.startswith(\"m\"))\n", "print(tbl.last_op)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Mutate" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SELECT users.id, users.name, users.fullname, users.id + users.name AS wow \n", "FROM users\n" ] } ], "source": [ "# simple\n", "tbl = mutate(tbl_users, wow = _.id + _.name)\n", "print(tbl.last_op)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SELECT anon_1.id, anon_1.name, anon_1.fullname, anon_1.wow, anon_1.wow + :wow_1 AS wow2 \n", "FROM (SELECT users.id AS id, users.name AS name, users.fullname AS fullname, users.id + :id_1 AS wow \n", "FROM users) AS anon_1\n" ] } ], "source": [ "# using previous col created in mutate\n", "tbl = mutate(tbl_users, wow = _.id + 1, wow2 = _.wow + 2)\n", "print(tbl.last_op)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SELECT users.name, users.fullname, users.id + :id_1 AS id \n", "FROM users\n" ] } ], "source": [ "# replacing column\n", "tbl = mutate(tbl_users, id = _.id + 1)\n", "print(tbl.last_op)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SELECT anon_1.name, anon_1.fullname, anon_1.id, anon_1.id + :id_1 AS id2 \n", "FROM (SELECT users.name AS name, users.fullname AS fullname, users.id + :id_2 AS id \n", "FROM users) AS anon_1\n" ] } ], "source": [ "# replacing column, then referring to replacement\n", "tbl = mutate(tbl_users, id = _.id + 1, id2 = _.id + 2)\n", "print(tbl.last_op)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Arrange" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SELECT users.id, users.name, users.fullname \n", "FROM users ORDER BY users.id + users.name\n" ] } ], "source": [ "tbl = arrange(tbl_users, _.id + _.name)\n", "print(tbl.last_op)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.7" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": true } }, "nbformat": 4, "nbformat_minor": 2 }