#!/bin/bash
set -e
#### For version: HDP 3.1.x
#### This script has to be run on the LLAP Host.
#### It can be ported to retrieve information using the Ambari REST APIs, allowing this to work from any host. Using bash for speed.

#### Pre-requisites:
# 1) LLAP is installed on this host.
# 2) Spark Client is installed on this host.

# File holder for required information:
hive_site_llap=/etc/hive_llap/conf/hive-site.xml
beeline_site_llap=/etc/hive_llap/conf/beeline-site.xml

if [ -r "$hive_site_llap" ] && [ -r "$beeline_site_llap" ]; then

    hive_metastore_uris=$(grep -e "thrift.*9083" "$hive_site_llap" |awk -F"<|>" '{print $3}')
    hive_llap_daemon_service_hosts=$(grep "hive.llap.daemon.service.hosts" -A1 "$hive_site_llap" |awk 'NR==2' | awk -F"[<|>]" '{print $3}')
    hive_zookeeper_quorum=$(grep "hive.zookeeper.quorum" -A1 "$hive_site_llap" |awk 'NR==2' | awk -F"[<|>]" '{print $3}')
    hwc_jar=$(find /usr/hdp/current/hive_warehouse_connector/ -name *assembly*.jar)
    hwc_pyfile=$(find /usr/hdp/current/hive_warehouse_connector/ -name *hwc*.zip)
    hive_jdbc_url=$(grep "beeline.hs2.jdbc.url.llap" -A1 "$beeline_site_llap" |awk 'NR==2' | awk -F"[<|>]" '{print $3}')
    hive_jdbc_url_principal=$(grep "hive.server2.authentication.kerberos.principal" -A1 "$hive_site_llap" |awk 'NR==2' | awk -F"[<|>]" '{print $3}')
    hive_zookeeper_quorum=$(grep "hive.zookeeper.quorum" -A1 "$hive_site_llap" |awk 'NR==2' | awk -F"[<|>]" '{print $3}')

    echo -e "To apply this configuration cluster wide, copy and paste the following list of properties in Ambari UI -> Spark2 -> Configs -> Advanced -> Custom spark2-defaults (Bulk Property Add mode)\n"
    echo -e "spark.datasource.hive.warehouse.load.staging.dir=/tmp"
    echo -e "spark.datasource.hive.warehouse.metastoreUri="$hive_metastore_uris
    echo -e "spark.hadoop.hive.llap.daemon.service.hosts="$hive_llap_daemon_service_hosts
    echo -e "spark.jars="$hwc_jar
    echo -e "spark.submit.pyFiles="$hwc_pyfile
    echo -e "spark.security.credentials.hiveserver2.enabled=false"
    echo -e "spark.sql.hive.hiveserver2.jdbc.url="$hive_jdbc_url
    echo -e "spark.sql.hive.zookeeper.quorum="$hive_zookeeper_quorum
    #If Kerberized:
    [ ! -z "$hive_jdbc_url_principal"] && echo -e "spark.sql.hive.hiveserver2.jdbc.url.principal="$hive_jdbc_url_principal

    echo -e "\n### Save and restart."
    echo -e "\nNote: In a kerberized environment the property spark.security.credentials.hiveserver2.enabled has to be set to TRUE for deploy-mode cluster, i.e.:\n spark-submit --conf spark.security.credentials.hiveserver2.enabled=true"

    echo -e "\nIf you'd like to test this per job instead of cluster wide, then use the following command as an example:\n

    spark-shell --master yarn --conf spark.datasource.hive.warehouse.load.staging.dir=/tmp --conf spark.datasource.hive.warehouse.metastoreUri=$hive_metastore_uris --conf spark.hadoop.hive.llap.daemon.service.hosts=$hive_llap_daemon_service_hosts --conf spark.jars=$hwc_jar --conf spark.submit.pyFiles=$hwc_pyfile --conf spark.security.credentials.hiveserver2.enabled=false --conf spark.sql.hive.hiveserver2.jdbc.url=\"$hive_jdbc_url\" --conf spark.sql.hive.zookeeper.quorum=\"$hive_zookeeper_quorum\" \n"

    echo -e "Once in the Scala REPL, run the following snippet example to test basic conectivity:\n"
    echo -e "scala> import com.hortonworks.hwc.HiveWarehouseSession"
    echo "scala> import com.hortonworks.hwc.HiveWarehouseSession._"
    echo "scala> val hive = HiveWarehouseSession.session(spark).build()"
    echo -e "scala> hive.showDatabases().show()\n"

else
     echo -e $hive_site_llap" and/or "$beeline_site_llap" doesn't exist on this host, or the current user $(whoami) doesn't have access to the files\n"
     echo "Try running this command as the root or hive user"
fi