#!/bin/bash cd `dirname $0` cd .. BASE_DIR=`pwd` SCRIPTNAME=pio-setup PIOSETUP_VERSION=1.1.2 PIO_HOME=$BASE_DIR/predictionio SPARK_HOME=$BASE_DIR/spark ES_HOME=$BASE_DIR/elasticsearch TARGET_DIR=$BASE_DIR/target TEMPLATE_DIR=$BASE_DIR/templates DEFAULT_PIO_VERSION=0.12.0-incubating DEFAULT_SCALA_VERSION=2.11.8 DEFAULT_SPARK_VERSION=2.2.0 DEFAULT_HADOOP_VERSION=2.7.3 DEFAULT_ELASTICSEARCH_VERSION=5.6.3 DEFAULT_PIO_GIT_USER=jpioug DEFAULT_PIO_GIT_BRANCH=pio_setup PIO_CMD=$PIO_HOME/bin/pio PIO_PID_FILE=$PIO_HOME/pid.txt ES_PID_FILE=$ES_HOME/pid.txt PREDICT_PID_FILE=predict_pid.txt LOG_FILE=$TARGET_DIR/pio-setup.log IMPORT_PY_FILE=data/import_eventserver.py setup_all() { if [ x"$SCALA_VERSION" = x ] ; then SCALA_VERSION=$DEFAULT_SCALA_VERSION fi if [ x"$SPARK_VERSION" = x ] ; then SPARK_VERSION=$DEFAULT_SPARK_VERSION fi if [ x"$HADOOP_VERSION" = x ] ; then HADOOP_VERSION=$DEFAULT_HADOOP_VERSION fi if [ x"$ELASTICSEARCH_VERSION" = x ] ; then ELASTICSEARCH_VERSION=$DEFAULT_ELASTICSEARCH_VERSION fi if [ x"$SPARK_FILE" = x ] ; then SPARK_FILE=spark-${SPARK_VERSION}-bin-hadoop2.6.tgz fi if [ x"$ES_FILE" = x ] ; then ES_FILE=elasticsearch-${ELASTICSEARCH_VERSION}.tar.gz fi if [ x"$PIO_GIT_USER" = x ] ; then PIO_GIT_USER=$DEFAULT_PIO_GIT_USER fi if [ x"$PIO_GIT_REPO" = x ] ; then PIO_GIT_REPO=incubator-predictionio fi if [ x"$PIO_GIT_BRANCH" = x ] ; then PIO_GIT_BRANCH=$DEFAULT_PIO_GIT_BRANCH fi if [ x"$PIO_EVENTDATA_REFRESH" = x ] ; then PIO_EVENTDATA_REFRESH=false fi if [ x"$PIO_VERSION" = x ] ; then PIO_VERSION=$DEFAULT_PIO_VERSION fi mkdir -p $TARGET_DIR setup_pio setup_spark setup_es } clean_all() { echo "Cleaning up..." rm -rf $PIO_HOME rm -rf $ES_HOME rm -rf $SPARK_HOME rm -rf $TARGET_DIR } setup_pio() { rm -rf $PIO_HOME cd $TARGET_DIR if [ x"$PIO_VERSION" = "xgit" ] ; then if [ ! -d $PIO_GIT_REPO ] ; then echo "Cloning PredictionIO..." git clone https://github.com/$PIO_GIT_USER/$PIO_GIT_REPO.git cd $PIO_GIT_REPO if [ x"$PIO_GIT_BRANCH" != "xdevelop" ] ; then git checkout -b $PIO_GIT_BRANCH origin/$PIO_GIT_BRANCH fi else cd $PIO_GIT_REPO fi echo "Building PredictionIO..." bash ./make-distribution.sh \ -Dscala.version=$SCALA_VERSION \ -Dspark.version=$SPARK_VERSION \ -Dhadoop.version=$HADOOP_VERSION \ -Delasticsearch.version=$ELASTICSEARCH_VERSION \ $PIO_MAKE_OPTS if [ $? != 0 ] ; then echo "Build Failed!" exit 1 fi else DOWNLOAD_SCALA_VERSION=`echo $SCALA_VERSION | sed -e "s/\(.*\)\.[0-9]*/\1/"` DOWNLOAD_SPARK_VERSION=`echo $SPARK_VERSION | sed -e "s/\(.*\)\.[0-9]*/\1/"` curl -s -L -o PredictionIO-${PIO_VERSION}.tar.gz \ https://github.com/jpioug/incubator-predictionio/releases/download/v${PIO_VERSION}-pio-setup/PredictionIO-${PIO_VERSION}_${DOWNLOAD_SCALA_VERSION}_${DOWNLOAD_SPARK_VERSION}.tar.gz fi echo "Deploying PredictionIO..." PIO_NAME=`ls PredictionIO-*.tar.gz | sed -e "s/\(.*\).tar.gz/\1/"` if [ ! -e "${PIO_NAME}.tar.gz" ] ; then echo "$BASE_DIR/${PIO_NAME}.tar.gz does not exist." exit 1 fi tar zxvf ${PIO_NAME}.tar.gz mv $PIO_NAME $PIO_HOME ES_NAME=ELASTICSEARCH PIO_ENV_FILE=$PIO_HOME/conf/pio-env.sh replace_line "s/^SPARK_HOME/# SPARK_HOME/" $PIO_ENV_FILE replace_line "s/^POSTGRES_JDBC_DRIVER/# POSTGRES_JDBC_DRIVER/" $PIO_ENV_FILE replace_line "s/^MYSQL_JDBC_DRIVER/# MYSQL_JDBC_DRIVER/" $PIO_ENV_FILE replace_line "s/PIO_STORAGE_REPOSITORIES_METADATA_SOURCE=PGSQL/PIO_STORAGE_REPOSITORIES_METADATA_SOURCE=${ES_NAME}/" $PIO_ENV_FILE replace_line "s/PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE=PGSQL/PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE=${ES_NAME}/" $PIO_ENV_FILE replace_line "s/PIO_STORAGE_REPOSITORIES_MODELDATA_SOURCE=PGSQL/PIO_STORAGE_REPOSITORIES_MODELDATA_SOURCE=LOCALFS/" $PIO_ENV_FILE replace_line "s/^PIO_STORAGE_SOURCES_PGSQL_/# PIO_STORAGE_SOURCES_PGSQL_/g" $PIO_ENV_FILE replace_line "s/# PIO_STORAGE_SOURCES_LOCALFS/PIO_STORAGE_SOURCES_LOCALFS/" $PIO_ENV_FILE echo 'PIO_STORAGE_SOURCES_ELASTICSEARCH_TYPE=elasticsearch' >> $PIO_ENV_FILE echo 'PIO_STORAGE_SOURCES_ELASTICSEARCH_HOSTS=localhost' >> $PIO_ENV_FILE echo 'PIO_STORAGE_SOURCES_ELASTICSEARCH_PORTS=9200' >> $PIO_ENV_FILE echo 'PIO_STORAGE_SOURCES_ELASTICSEARCH_SCHEMES=http' >> $PIO_ENV_FILE echo 'PIO_STORAGE_SOURCES_ELASTICSEARCH_HOME='$ES_HOME >> $PIO_ENV_FILE echo 'SPARK_HOME='$SPARK_HOME >> $PIO_ENV_FILE echo 'PIO_STORAGE_SOURCES_ELASTICSEARCH_PIO_META_NUM_OF_SHARDS=1' >> $PIO_ENV_FILE echo 'PIO_STORAGE_SOURCES_ELASTICSEARCH_PIO_META_NUM_OF_REPLICAS=0' >> $PIO_ENV_FILE echo 'PIO_STORAGE_SOURCES_ELASTICSEARCH_PIO_EVENT_NUM_OF_SHARDS=5' >> $PIO_ENV_FILE echo 'PIO_STORAGE_SOURCES_ELASTICSEARCH_PIO_EVENT_NUM_OF_REPLICAS=0' >> $PIO_ENV_FILE echo "PIO_STORAGE_SOURCES_ELASTICSEARCH_EVENTDATA_REFRESH=$PIO_EVENTDATA_REFRESH" >> $PIO_ENV_FILE mkdir -p $PIO_HOME/log echo "Deployed PredictionIO" } replace_line() { REPLACEMENT=$1 FILE=$2 if [ `uname` = "Linux" ] ; then sed -i "$REPLACEMENT" "$FILE" else sed -i '' "$REPLACEMENT" "$FILE" fi } setup_spark() { rm -rf $SPARK_HOME cd $TARGET_DIR if [ ! -f $SPARK_FILE ] ; then echo "Downloading Spark..." curl -s -o $SPARK_FILE http://d3kbcqa49mib13.cloudfront.net/$SPARK_FILE fi echo "Deploying Spark..." rm -rf vendor mkdir vendor tar zxvfC $SPARK_FILE vendor > /dev/null rm -rf $SPARK_HOME mv vendor/* $SPARK_HOME echo "spark.locality.wait.node 0s" >> $SPARK_HOME/conf/spark-defaults.conf echo "Deployed Spark" } setup_es() { rm -rf $ES_HOME cd $TARGET_DIR if [ ! -f $ES_FILE ] ; then echo "Downloading Elasticsearch..." curl -s -o $ES_FILE https://artifacts.elastic.co/downloads/elasticsearch/$ES_FILE fi echo "Deploying Elasticsearch..." rm -rf vendor mkdir vendor tar zxvfC $ES_FILE vendor > /dev/null mv vendor/* $ES_HOME ES_CONF_FILE=$ES_HOME/config/elasticsearch.yml echo 'http.cors.enabled: true' >> $ES_CONF_FILE echo 'http.cors.allow-origin: "*"' >> $ES_CONF_FILE echo 'network.host: "0"' >> $ES_CONF_FILE echo 'discovery.type: "single-node"' >> $ES_CONF_FILE echo "Deployed Elasticsearch" } start_pio() { export PIO_HOME export ES_HOME if [ -f $ES_PID_FILE ] ; then kill `cat $ES_PID_FILE` >/dev/null 2>&1 fi if [ -f $PIO_PID_FILE ] ; then kill `cat $PIO_PID_FILE` >/dev/null 2>&1 fi mkdir -p $TARGET_DIR echo "Starting PredictionIO..." nohup $ES_HOME/bin/elasticsearch >> $LOG_FILE 2>&1 & echo $! > $ES_PID_FILE ret=7 while [ $ret = 7 ] ; do sleep 1 if ! ps -p `cat $ES_PID_FILE` > /dev/null ; then echo "Failed to start Elasticsearch" exit 1 fi curl -s -XGET 'localhost:9200/_cluster/health?wait_for_status=yellow&timeout=30s' >> $LOG_FILE ret=$? done $PIO_CMD eventserver >> $LOG_FILE 2>&1 & echo $! > $PIO_PID_FILE echo "Started PredictionIO" } stop_pio() { export PIO_HOME export ES_HOME if [ -f $PIO_PID_FILE ] ; then kill `cat $PIO_PID_FILE` >/dev/null 2>&1 echo "Stopped PredictionIO" fi rm -f $PIO_PID_FILE if [ -f $ES_PID_FILE ] ; then curl -s -XPOST 'localhost:9200/_flush?wait_if_ongoing=true' >> $LOG_FILE kill `cat $ES_PID_FILE` >/dev/null 2>&1 echo "Stopped Elasticsearch" fi rm -f $ES_PID_FILE } status_pio() { export PIO_HOME export ES_HOME echo "Checking PredictionIO Status..." $PIO_CMD status } template_get() { mkdir -p $TEMPLATE_DIR cd $TEMPLATE_DIR if [ x"$2" = "x" ] ; then GIT_USERS="apache jpioug" GIT_REPOS="$1 predictionio-template-$1 predictionio-$1 incubator-predictionio-template-$1 incubator-predictionio-$1 incubator-$1" else GIT_USERS="$1" GIT_REPOS="$2 predictionio-template-$2 predictionio-$2 incubator-predictionio-template-$2 incubator-predictionio-$2 incubator-$2" if [ x"$3" != "x" ] ; then GIT_BRANCH=$3 fi fi GIT_FOUND=0 for user in `echo $GIT_USERS` ; do for repo in `echo $GIT_REPOS` ; do echo "Accessing to https://github.com/$user/${repo}..." HTTP_STATUS=`curl -LI -o /dev/null -w '%{http_code}' -s https://github.com/$user/$repo` if [ x"$HTTP_STATUS" = x200 ] ; then if [ -d "$repo" ] ; then echo "$repo exists." exit 1 fi echo "Cloning https://github.com/$user/${repo}..." git clone https://github.com/$user/$repo.git GIT_FOUND=1 break fi done done if [ $GIT_FOUND = 0 ] ; then echo "Git repositories are not found." exit 1 fi if [ x"$GIT_BRANCH" != "x" ] ; then cd $repo echo "Changing branch to ${GIT_BRANCH}..." git checkout -b $GIT_BRANCH origin/$GIT_BRANCH fi } template_lookup() { TEMPLATE_NAMES="$1 predictionio-template-$1 predictionio-$1 incubator-predictionio-template-$1 incubator-predictionio-$1 incubator-$1" for name in $TEMPLATE_NAMES ; do TEMPLATE_PATH=$TEMPLATE_DIR/$name ls "$TEMPLATE_PATH" > /dev/null 2>&1 if [ $? = 0 ] ; then echo "$name" return fi done echo "Template directory is not found." exit 1 } template_init() { TEMPLATE_NAME=`template_lookup $1` cd $TEMPLATE_DIR/$TEMPLATE_NAME APP_NAME=`grep \"appName\" engine.json | head -n1 | sed -e "s/.*appName\".*:.*\"\([^\"]*\)\".*/\1/"` if [ x"$APP_NAME" = "x" ] ; then APP_NAME=`echo $TEMPLATE_NAME | sed -e "s/.*template-//"` elif [ x"$APP_NAME" = "xINVALID_APP_NAME" ] ; then APP_NAME=`echo $TEMPLATE_NAME | sed -e "s/.*template-//"` replace_line "s/INVALID_APP_NAME/$APP_NAME/" engine.json fi echo "Registering ${APP_NAME} app..." $PIO_CMD app new $APP_NAME } template_accesskey() { APP_NAME=`grep \"appName\" engine.json | head -n1 | sed -e "s/.*appName\".*:.*\"\([^\"]*\)\".*/\1/"` $PIO_CMD app show $APP_NAME | grep "Access Key" | sed -e "s/.*Access Key: \(.*\) |.*/\1/" } template_build() { TEMPLATE_NAME=`template_lookup $1` cd $TEMPLATE_DIR/$TEMPLATE_NAME echo "Building ${TEMPLATE_NAME}..." $PIO_CMD build } template_train() { TEMPLATE_NAME=`template_lookup $1` cd $TEMPLATE_DIR/$TEMPLATE_NAME if [ -f train.py ] ; then PY_OPT="--main-py-file train.py" fi echo "Training ${TEMPLATE_NAME}..." $PIO_CMD train $PY_OPT } template_deploy() { TEMPLATE_NAME=`template_lookup $1` cd $TEMPLATE_DIR/$TEMPLATE_NAME if [ -f $PREDICT_PID_FILE ] ; then kill `cat $PREDICT_PID_FILE` >/dev/null 2>&1 fi echo "Deploying ${TEMPLATE_NAME}..." $PIO_CMD deploy >> $LOG_FILE 2>&1 & echo $! > $PREDICT_PID_FILE } template_undeploy() { TEMPLATE_NAME=`template_lookup $1` cd $TEMPLATE_DIR/$TEMPLATE_NAME echo "Undeploying ${TEMPLATE_NAME}..." if [ -f $PREDICT_PID_FILE ] ; then kill `cat $PREDICT_PID_FILE` >/dev/null 2>&1 fi rm -f $PREDICT_PID_FILE } template_import() { TEMPLATE_NAME=`template_lookup $1` cd $TEMPLATE_DIR/$TEMPLATE_NAME ACCESS_KEY=`template_accesskey` grep "access-key" $IMPORT_PY_FILE >/dev/null 2>&1 if [ $? = 0 ] ; then ACCESS_KEY_OPT="--access-key $ACCESS_KEY" else ACCESS_KEY_OPT="--access_key $ACCESS_KEY" fi echo "Importing data from ${TEMPLATE_NAME}..." python $IMPORT_PY_FILE $ACCESS_KEY_OPT } case "$1" in start) start_pio ;; stop) stop_pio ;; status) status_pio ;; deploy) setup_all ;; clean) stop_pio clean_all ;; template) shift case "$1" in get) shift template_get $@ ;; init) shift template_init $@ ;; import) shift template_import $@ ;; build) shift template_build $@ ;; train) shift template_train $@ ;; deploy) shift template_deploy $@ ;; undeploy) shift template_undeploy $@ ;; *) echo "Usage: $SCRIPTNAME template {get|build|train|deploy|undeploy}" >&2 exit 3 ;; esac ;; version) echo "PredictionIO Setup $PIOSETUP_VERSION" ;; *) echo "Usage: $SCRIPTNAME {start|stop|status|deploy|clean}" >&2 exit 3 ;; esac