{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%env\n", "CONF_NUM_PARITIONS=10\n", "OUTPUT_PATH=/home/jovyan/examples/tpch/parquet\n", "INPUT_PATH=/home/jovyan/examples/tpch/tbl\n", "SCHEMA_PATH=/home/jovyan/examples/tpch/schema" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# customer" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "{\n", " \"type\": \"DelimitedExtract\",\n", " \"name\": \"DelimitedExtract\",\n", " \"environments\": [\n", " \"production\",\n", " \"test\"\n", " ],\n", " \"inputURI\": ${INPUT_PATH}\"/customer.tbl\",\n", " \"outputView\": \"customer_raw\",\n", " \"header\": false,\n", " \"delimiter\": \"Pipe\"\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "{\n", " \"type\": \"TypingTransform\",\n", " \"name\": \"TypingTransform\",\n", " \"environments\": [\n", " \"production\",\n", " \"test\"\n", " ],\n", " \"inputView\": \"customer_raw\",\n", " \"schemaURI\": ${SCHEMA_PATH}\"/customer.schema.json\",\n", " \"outputView\": \"customer\",\n", " \"failMode\": \"failfast\"\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%sql name=\"customer\" outputView=customer_output environments=production,test\n", "SELECT\n", " c_custkey\n", " ,c_name\n", " ,c_address\n", " ,c_nationkey\n", " ,c_phone\n", " ,c_acctbal\n", " ,c_mktsegment\n", " ,c_comment\n", "FROM customer" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "{\n", " \"type\": \"ParquetLoad\",\n", " \"name\": \"ParquetLoad\",\n", " \"environments\": [\n", " \"production\",\n", " \"test\"\n", " ],\n", " \"inputView\": \"customer_output\",\n", " \"outputURI\": ${OUTPUT_PATH}\"/customer\",\n", " \"numPartitions\": ${CONF_NUM_PARITIONS}\n", "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# part" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "{\n", " \"type\": \"DelimitedExtract\",\n", " \"name\": \"DelimitedExtract\",\n", " \"environments\": [\n", " \"production\",\n", " \"test\"\n", " ],\n", " \"inputURI\": ${INPUT_PATH}\"/part.tbl\",\n", " \"outputView\": \"part_raw\",\n", " \"header\": false,\n", " \"delimiter\": \"Pipe\"\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "{\n", " \"type\": \"TypingTransform\",\n", " \"name\": \"TypingTransform\",\n", " \"environments\": [\n", " \"production\",\n", " \"test\"\n", " ],\n", " \"inputView\": \"part_raw\",\n", " \"schemaURI\": ${SCHEMA_PATH}\"/part.schema.json\",\n", " \"outputView\": \"part\",\n", " \"failMode\": \"failfast\"\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%sql name=\"part\" outputView=part_output environments=production,test\n", "SELECT\n", " p_partkey\n", " ,p_name\n", " ,p_mfgr\n", " ,p_brand\n", " ,p_type\n", " ,p_size\n", " ,p_container\n", " ,p_retailprice\n", " ,p_comment\n", "FROM part" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "{\n", " \"type\": \"ParquetLoad\",\n", " \"name\": \"ParquetLoad\",\n", " \"environments\": [\n", " \"production\",\n", " \"test\"\n", " ],\n", " \"inputView\": \"part_output\",\n", " \"outputURI\": ${OUTPUT_PATH}\"/part\",\n", " \"numPartitions\": ${CONF_NUM_PARITIONS}\n", "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# lineitem" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "{\n", " \"type\": \"DelimitedExtract\",\n", " \"name\": \"DelimitedExtract\",\n", " \"environments\": [\n", " \"production\",\n", " \"test\"\n", " ],\n", " \"inputURI\": ${INPUT_PATH}\"/lineitem.tbl\",\n", " \"outputView\": \"lineitem_raw\",\n", " \"header\": false,\n", " \"delimiter\": \"Pipe\"\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "{\n", " \"type\": \"TypingTransform\",\n", " \"name\": \"TypingTransform\",\n", " \"environments\": [\n", " \"production\",\n", " \"test\"\n", " ],\n", " \"inputView\": \"lineitem_raw\",\n", " \"schemaURI\": ${SCHEMA_PATH}\"/lineitem.schema.json\",\n", " \"outputView\": \"lineitem\",\n", " \"failMode\": \"failfast\",\n", " \"persist\": true\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%sql name=\"lineitem\" outputView=lineitem_output environments=production,test\n", "SELECT\n", " l_orderkey\n", " ,l_partkey\n", " ,l_suppkey\n", " ,l_linenumber\n", " ,l_quantity\n", " ,l_extendedprice\n", " ,l_discount\n", " ,l_tax\n", " ,l_returnflag\n", " ,l_linestatus\n", " ,l_shipdate\n", " ,l_commitdate\n", " ,l_receiptdate\n", " ,l_shipinstruct\n", " ,l_shipmode\n", " ,l_comment\n", "FROM lineitem" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "{\n", " \"type\": \"ParquetLoad\",\n", " \"name\": \"ParquetLoad\",\n", " \"environments\": [\n", " \"production\",\n", " \"test\"\n", " ],\n", " \"inputView\": \"lineitem_output\",\n", " \"outputURI\": ${OUTPUT_PATH}\"/lineitem\",\n", " \"numPartitions\": ${CONF_NUM_PARITIONS} \n", "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# supplier" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "{\n", " \"type\": \"DelimitedExtract\",\n", " \"name\": \"DelimitedExtract\",\n", " \"environments\": [\n", " \"production\",\n", " \"test\"\n", " ],\n", " \"inputURI\": ${INPUT_PATH}\"/supplier.tbl\",\n", " \"outputView\": \"supplier_raw\",\n", " \"header\": false,\n", " \"delimiter\": \"Pipe\"\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "{\n", " \"type\": \"TypingTransform\",\n", " \"name\": \"TypingTransform\",\n", " \"environments\": [\n", " \"production\",\n", " \"test\"\n", " ],\n", " \"inputView\": \"supplier_raw\",\n", " \"schemaURI\": ${SCHEMA_PATH}\"/supplier.schema.json\",\n", " \"outputView\": \"supplier\",\n", " \"failMode\": \"failfast\"\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%sql name=\"supplier\" outputView=supplier_output environments=production,test\n", "SELECT\n", " s_suppkey\n", " ,s_name\n", " ,s_address\n", " ,s_nationkey\n", " ,s_phone\n", " ,s_acctbal\n", " ,s_comment\n", "FROM supplier" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "{\n", " \"type\": \"ParquetLoad\",\n", " \"name\": \"ParquetLoad\",\n", " \"environments\": [\n", " \"production\",\n", " \"test\"\n", " ],\n", " \"inputView\": \"supplier_output\",\n", " \"outputURI\": ${OUTPUT_PATH}\"/supplier\",\n", " \"numPartitions\": ${CONF_NUM_PARITIONS}\n", "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# partsupp" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "{\n", " \"type\": \"DelimitedExtract\",\n", " \"name\": \"DelimitedExtract\",\n", " \"environments\": [\n", " \"production\",\n", " \"test\"\n", " ],\n", " \"inputURI\": ${INPUT_PATH}\"/partsupp.tbl\",\n", " \"outputView\": \"partsupp_raw\",\n", " \"header\": false,\n", " \"delimiter\": \"Pipe\"\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "{\n", " \"type\": \"TypingTransform\",\n", " \"name\": \"TypingTransform\",\n", " \"environments\": [\n", " \"production\",\n", " \"test\"\n", " ],\n", " \"inputView\": \"partsupp_raw\",\n", " \"schemaURI\": ${SCHEMA_PATH}\"/partsupp.schema.json\",\n", " \"outputView\": \"partsupp\",\n", " \"failMode\": \"failfast\"\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%sql name=\"partsupp\" outputView=partsupp_output environments=production,test\n", "SELECT\n", " ps_partkey\n", " ,ps_suppkey\n", " ,ps_availqty\n", " ,ps_supplycost\n", " ,ps_comment\n", "FROM partsupp" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "{\n", " \"type\": \"ParquetLoad\",\n", " \"name\": \"ParquetLoad\",\n", " \"environments\": [\n", " \"production\",\n", " \"test\"\n", " ],\n", " \"inputView\": \"partsupp_output\",\n", " \"outputURI\": ${OUTPUT_PATH}\"/partsupp\",\n", " \"numPartitions\": ${CONF_NUM_PARITIONS}\n", "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# orders" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "{\n", " \"type\": \"DelimitedExtract\",\n", " \"name\": \"DelimitedExtract\",\n", " \"environments\": [\n", " \"production\",\n", " \"test\"\n", " ],\n", " \"inputURI\": ${INPUT_PATH}\"/orders.tbl\",\n", " \"outputView\": \"orders_raw\",\n", " \"header\": false,\n", " \"delimiter\": \"Pipe\"\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "{\n", " \"type\": \"TypingTransform\",\n", " \"name\": \"TypingTransform\",\n", " \"environments\": [\n", " \"production\",\n", " \"test\"\n", " ],\n", " \"inputView\": \"orders_raw\",\n", " \"schemaURI\": ${SCHEMA_PATH}\"/orders.schema.json\",\n", " \"outputView\": \"orders\",\n", " \"failMode\": \"failfast\"\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%sql name=\"orders\" outputView=orders_output environments=production,test\n", "SELECT\n", " o_orderkey\n", " ,o_custkey\n", " ,o_orderstatus\n", " ,o_totalprice\n", " ,o_orderdate\n", " ,o_orderpriority\n", " ,o_clerk\n", " ,o_shippriority\n", " ,o_comment\n", "FROM orders" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "{\n", " \"type\": \"ParquetLoad\",\n", " \"name\": \"ParquetLoad\",\n", " \"environments\": [\n", " \"production\",\n", " \"test\"\n", " ],\n", " \"inputView\": \"orders_output\",\n", " \"outputURI\": ${OUTPUT_PATH}\"/orders\",\n", " \"numPartitions\": ${CONF_NUM_PARITIONS}\n", "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# nation" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "{\n", " \"type\": \"DelimitedExtract\",\n", " \"name\": \"DelimitedExtract\",\n", " \"environments\": [\n", " \"production\",\n", " \"test\"\n", " ],\n", " \"inputURI\": ${INPUT_PATH}\"/nation.tbl\",\n", " \"outputView\": \"nation_raw\",\n", " \"header\": false,\n", " \"delimiter\": \"Pipe\"\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "{\n", " \"type\": \"TypingTransform\",\n", " \"name\": \"TypingTransform\",\n", " \"environments\": [\n", " \"production\",\n", " \"test\"\n", " ],\n", " \"inputView\": \"nation_raw\",\n", " \"schemaURI\": ${SCHEMA_PATH}\"/nation.schema.json\",\n", " \"outputView\": \"nation\",\n", " \"failMode\": \"failfast\"\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%sql name=\"nation\" outputView=nation_output environments=production,test\n", "SELECT\n", " n_nationkey\n", " ,n_name\n", " ,n_regionkey\n", " ,n_comment\n", "FROM nation" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "{\n", " \"type\": \"ParquetLoad\",\n", " \"name\": \"ParquetLoad\",\n", " \"environments\": [\n", " \"production\",\n", " \"test\"\n", " ],\n", " \"inputView\": \"nation_output\",\n", " \"outputURI\": ${OUTPUT_PATH}\"/nation\",\n", " \"numPartitions\": ${CONF_NUM_PARITIONS}\n", "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# region" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "{\n", " \"type\": \"DelimitedExtract\",\n", " \"name\": \"DelimitedExtract\",\n", " \"environments\": [\n", " \"production\",\n", " \"test\"\n", " ],\n", " \"inputURI\": ${INPUT_PATH}\"/region.tbl\",\n", " \"outputView\": \"region_raw\",\n", " \"header\": false,\n", " \"delimiter\": \"Pipe\"\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "{\n", " \"type\": \"TypingTransform\",\n", " \"name\": \"TypingTransform\",\n", " \"environments\": [\n", " \"production\",\n", " \"test\"\n", " ],\n", " \"inputView\": \"region_raw\",\n", " \"schemaURI\": ${SCHEMA_PATH}\"/region.schema.json\",\n", " \"outputView\": \"region\",\n", " \"failMode\": \"failfast\" \n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%sql name=\"region\" outputView=region_output environments=production,test\n", "SELECT\n", " r_regionkey\n", " ,r_name\n", " ,r_comment\n", "FROM region" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "{\n", " \"type\": \"ParquetLoad\",\n", " \"name\": \"ParquetLoad\",\n", " \"environments\": [\n", " \"production\",\n", " \"test\"\n", " ],\n", " \"inputView\": \"region_output\",\n", " \"outputURI\": ${OUTPUT_PATH}\"/region\",\n", " \"numPartitions\": ${CONF_NUM_PARITIONS}\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Arc", "language": "javascript", "name": "arc" }, "language_info": { "codemirror_mode": "javascript", "file_extension": ".json", "mimetype": "javascript", "name": "arc", "nbconvert_exporter": "arcexport", "version": "3.11.0" } }, "nbformat": 4, "nbformat_minor": 4 }