----------------------------------------------------------------------------- -- Hivemall: Hive scalable Machine Learning Library ----------------------------------------------------------------------------- -- CREATE DATABASE IF NOT EXISTS hivemall; -- USE hivemall; -- set hivevar:hivemall_jar=hdfs:///apps/hivemall/hivemall-with-dependencies.jar; DROP FUNCTION IF EXISTS hivemall_version; CREATE FUNCTION hivemall_version as 'hivemall.HivemallVersionUDF' USING JAR '${hivemall_jar}'; --------------------------- -- binary classification -- --------------------------- DROP FUNCTION IF EXISTS train_classifier; CREATE FUNCTION train_classifier as 'hivemall.classifier.GeneralClassifierUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_perceptron; CREATE FUNCTION train_perceptron as 'hivemall.classifier.PerceptronUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_pa; CREATE FUNCTION train_pa as 'hivemall.classifier.PassiveAggressiveUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_pa1; CREATE FUNCTION train_pa1 as 'hivemall.classifier.PassiveAggressiveUDTF$PA1' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_pa2; CREATE FUNCTION train_pa2 as 'hivemall.classifier.PassiveAggressiveUDTF$PA2' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_cw; CREATE FUNCTION train_cw as 'hivemall.classifier.ConfidenceWeightedUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_arow; CREATE FUNCTION train_arow as 'hivemall.classifier.AROWClassifierUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_arowh; CREATE FUNCTION train_arowh as 'hivemall.classifier.AROWClassifierUDTF$AROWh' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_scw; CREATE FUNCTION train_scw as 'hivemall.classifier.SoftConfideceWeightedUDTF$SCW1' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_scw2; CREATE FUNCTION train_scw2 as 'hivemall.classifier.SoftConfideceWeightedUDTF$SCW2' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_adagrad_rda; CREATE FUNCTION train_adagrad_rda as 'hivemall.classifier.AdaGradRDAUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_kpa; CREATE FUNCTION train_kpa as 'hivemall.classifier.KernelExpansionPassiveAggressiveUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS kpa_predict; CREATE FUNCTION kpa_predict as 'hivemall.classifier.KPAPredictUDAF' USING JAR '${hivemall_jar}'; -------------------------------- -- Multiclass classification -- -------------------------------- DROP FUNCTION IF EXISTS train_multiclass_perceptron; CREATE FUNCTION train_multiclass_perceptron as 'hivemall.classifier.multiclass.MulticlassPerceptronUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_multiclass_pa; CREATE FUNCTION train_multiclass_pa as 'hivemall.classifier.multiclass.MulticlassPassiveAggressiveUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_multiclass_pa1; CREATE FUNCTION train_multiclass_pa1 as 'hivemall.classifier.multiclass.MulticlassPassiveAggressiveUDTF$PA1' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_multiclass_pa2; CREATE FUNCTION train_multiclass_pa2 as 'hivemall.classifier.multiclass.MulticlassPassiveAggressiveUDTF$PA2' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_multiclass_cw; CREATE FUNCTION train_multiclass_cw as 'hivemall.classifier.multiclass.MulticlassConfidenceWeightedUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_multiclass_arow; CREATE FUNCTION train_multiclass_arow as 'hivemall.classifier.multiclass.MulticlassAROWClassifierUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_multiclass_arowh; CREATE FUNCTION train_multiclass_arowh as 'hivemall.classifier.multiclass.MulticlassAROWClassifierUDTF$AROWh' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_multiclass_scw; CREATE FUNCTION train_multiclass_scw as 'hivemall.classifier.multiclass.MulticlassSoftConfidenceWeightedUDTF$SCW1' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_multiclass_scw2; CREATE FUNCTION train_multiclass_scw2 as 'hivemall.classifier.multiclass.MulticlassSoftConfidenceWeightedUDTF$SCW2' USING JAR '${hivemall_jar}'; -------------------------- -- similarity functions -- -------------------------- DROP FUNCTION IF EXISTS cosine_similarity; CREATE FUNCTION cosine_similarity as 'hivemall.knn.similarity.CosineSimilarityUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS jaccard_similarity; CREATE FUNCTION jaccard_similarity as 'hivemall.knn.similarity.JaccardIndexUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS angular_similarity; CREATE FUNCTION angular_similarity as 'hivemall.knn.similarity.AngularSimilarityUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS euclid_similarity; CREATE FUNCTION euclid_similarity as 'hivemall.knn.similarity.EuclidSimilarity' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS distance2similarity; CREATE FUNCTION distance2similarity as 'hivemall.knn.similarity.Distance2SimilarityUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS dimsum_mapper; CREATE FUNCTION dimsum_mapper as 'hivemall.knn.similarity.DIMSUMMapperUDTF' USING JAR '${hivemall_jar}'; ------------------------ -- distance functions -- ------------------------ DROP FUNCTION IF EXISTS popcnt; CREATE FUNCTION popcnt as 'hivemall.knn.distance.PopcountUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS kld; CREATE FUNCTION kld as 'hivemall.knn.distance.KLDivergenceUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS hamming_distance; CREATE FUNCTION hamming_distance as 'hivemall.knn.distance.HammingDistanceUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS euclid_distance; CREATE FUNCTION euclid_distance as 'hivemall.knn.distance.EuclidDistanceUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS cosine_distance; CREATE FUNCTION cosine_distance as 'hivemall.knn.distance.CosineDistanceUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS angular_distance; CREATE FUNCTION angular_distance as 'hivemall.knn.distance.AngularDistanceUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS jaccard_distance; CREATE FUNCTION jaccard_distance as 'hivemall.knn.distance.JaccardDistanceUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS manhattan_distance; CREATE FUNCTION manhattan_distance as 'hivemall.knn.distance.ManhattanDistanceUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS minkowski_distance; CREATE FUNCTION minkowski_distance as 'hivemall.knn.distance.MinkowskiDistanceUDF' USING JAR '${hivemall_jar}'; ------------------- -- LSH functions -- ------------------- DROP FUNCTION IF EXISTS minhashes; CREATE FUNCTION minhashes as 'hivemall.knn.lsh.MinHashesUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS minhash; CREATE FUNCTION minhash as 'hivemall.knn.lsh.MinHashUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS bbit_minhash; CREATE FUNCTION bbit_minhash as 'hivemall.knn.lsh.bBitMinHashUDF' USING JAR '${hivemall_jar}'; ---------------------- -- voting functions -- ---------------------- DROP FUNCTION IF EXISTS voted_avg; CREATE FUNCTION voted_avg as 'hivemall.ensemble.bagging.VotedAvgUDAF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS weight_voted_avg; CREATE FUNCTION weight_voted_avg as 'hivemall.ensemble.bagging.WeightVotedAvgUDAF' USING JAR '${hivemall_jar}'; -------------------- -- misc functions -- -------------------- DROP FUNCTION IF EXISTS max_label; CREATE FUNCTION max_label as 'hivemall.ensemble.MaxValueLabelUDAF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS maxrow; CREATE FUNCTION maxrow as 'hivemall.ensemble.MaxRowUDAF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS argmin_kld; CREATE FUNCTION argmin_kld as 'hivemall.ensemble.ArgminKLDistanceUDAF' USING JAR '${hivemall_jar}'; ----------------------- -- hashing functions -- ----------------------- DROP FUNCTION IF EXISTS mhash; CREATE FUNCTION mhash as 'hivemall.ftvec.hashing.MurmurHash3UDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS array_hash_values; CREATE FUNCTION array_hash_values as 'hivemall.ftvec.hashing.ArrayHashValuesUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS prefixed_hash_values; CREATE FUNCTION prefixed_hash_values as 'hivemall.ftvec.hashing.ArrayPrefixedHashValuesUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS feature_hashing; CREATE FUNCTION feature_hashing as 'hivemall.ftvec.hashing.FeatureHashingUDF' USING JAR '${hivemall_jar}'; ----------------------- -- pairing functions -- ----------------------- DROP FUNCTION IF EXISTS polynomial_features; CREATE FUNCTION polynomial_features as 'hivemall.ftvec.pairing.PolynomialFeaturesUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS powered_features; CREATE FUNCTION powered_features as 'hivemall.ftvec.pairing.PoweredFeaturesUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS feature_pairs; CREATE FUNCTION feature_pairs as 'hivemall.ftvec.pairing.FeaturePairsUDTF' USING JAR '${hivemall_jar}'; ----------------------- -- scaling functions -- ----------------------- DROP FUNCTION IF EXISTS rescale; CREATE FUNCTION rescale as 'hivemall.ftvec.scaling.RescaleUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS zscore; CREATE FUNCTION zscore as 'hivemall.ftvec.scaling.ZScoreUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS l1_normalize; CREATE FUNCTION l1_normalize as 'hivemall.ftvec.scaling.L1NormalizationUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS l2_normalize; CREATE FUNCTION l2_normalize as 'hivemall.ftvec.scaling.L2NormalizationUDF' USING JAR '${hivemall_jar}'; --------------------------------- -- Feature Selection functions -- --------------------------------- DROP FUNCTION IF EXISTS chi2; CREATE FUNCTION chi2 as 'hivemall.ftvec.selection.ChiSquareUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS snr; CREATE FUNCTION snr as 'hivemall.ftvec.selection.SignalNoiseRatioUDAF' USING JAR '${hivemall_jar}'; -------------------- -- misc functions -- -------------------- DROP FUNCTION IF EXISTS amplify; CREATE FUNCTION amplify as 'hivemall.ftvec.amplify.AmplifierUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS rand_amplify; CREATE FUNCTION rand_amplify as 'hivemall.ftvec.amplify.RandomAmplifierUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS add_bias; CREATE FUNCTION add_bias as 'hivemall.ftvec.AddBiasUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS sort_by_feature; CREATE FUNCTION sort_by_feature as 'hivemall.ftvec.SortByFeatureUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS extract_feature; CREATE FUNCTION extract_feature as 'hivemall.ftvec.ExtractFeatureUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS extract_weight; CREATE FUNCTION extract_weight as 'hivemall.ftvec.ExtractWeightUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS add_feature_index; CREATE FUNCTION add_feature_index as 'hivemall.ftvec.AddFeatureIndexUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS feature; CREATE FUNCTION feature as 'hivemall.ftvec.FeatureUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS feature_index; CREATE FUNCTION feature_index as 'hivemall.ftvec.FeatureIndexUDF' USING JAR '${hivemall_jar}'; ---------------------------------- -- feature converting functions -- ---------------------------------- DROP FUNCTION IF EXISTS conv2dense; CREATE FUNCTION conv2dense as 'hivemall.ftvec.conv.ConvertToDenseModelUDAF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS to_dense_features; CREATE FUNCTION to_dense_features as 'hivemall.ftvec.conv.ToDenseFeaturesUDF' USING JAR '${hivemall_jar}'; -- alias DROP FUNCTION IF EXISTS to_dense; CREATE FUNCTION to_dense as 'hivemall.ftvec.conv.ToDenseFeaturesUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS to_sparse_features; CREATE FUNCTION to_sparse_features as 'hivemall.ftvec.conv.ToSparseFeaturesUDF' USING JAR '${hivemall_jar}'; -- alias DROP FUNCTION IF EXISTS to_sparse; CREATE FUNCTION to_sparse as 'hivemall.ftvec.conv.ToSparseFeaturesUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS quantify; CREATE FUNCTION quantify as 'hivemall.ftvec.conv.QuantifyColumnsUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS build_bins; CREATE FUNCTION build_bins as 'hivemall.ftvec.binning.BuildBinsUDAF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS feature_binning; CREATE FUNCTION feature_binning as 'hivemall.ftvec.binning.FeatureBinningUDF' USING JAR '${hivemall_jar}'; -------------------------- -- feature transformers -- -------------------------- DROP FUNCTION IF EXISTS vectorize_features; CREATE FUNCTION vectorize_features as 'hivemall.ftvec.trans.VectorizeFeaturesUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS categorical_features; CREATE FUNCTION categorical_features as 'hivemall.ftvec.trans.CategoricalFeaturesUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS ffm_features; CREATE FUNCTION ffm_features as 'hivemall.ftvec.trans.FFMFeaturesUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS indexed_features; CREATE FUNCTION indexed_features as 'hivemall.ftvec.trans.IndexedFeatures' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS quantified_features; CREATE FUNCTION quantified_features as 'hivemall.ftvec.trans.QuantifiedFeaturesUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS quantitative_features; CREATE FUNCTION quantitative_features as 'hivemall.ftvec.trans.QuantitativeFeaturesUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS binarize_label; CREATE FUNCTION binarize_label as 'hivemall.ftvec.trans.BinarizeLabelUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS onehot_encoding; CREATE FUNCTION onehot_encoding as 'hivemall.ftvec.trans.OnehotEncodingUDAF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS add_field_indices; CREATE FUNCTION add_field_indices as 'hivemall.ftvec.trans.AddFieldIndicesUDF' USING JAR '${hivemall_jar}'; -- alias for backward compatibility DROP FUNCTION IF EXISTS add_field_indicies; CREATE FUNCTION add_field_indicies as 'hivemall.ftvec.trans.AddFieldIndicesUDF' USING JAR '${hivemall_jar}'; ------------------------------ -- ranking helper functions -- ------------------------------ DROP FUNCTION IF EXISTS bpr_sampling; CREATE FUNCTION bpr_sampling as 'hivemall.ftvec.ranking.BprSamplingUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS item_pairs_sampling; CREATE FUNCTION item_pairs_sampling as 'hivemall.ftvec.ranking.ItemPairsSamplingUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS populate_not_in; CREATE FUNCTION populate_not_in as 'hivemall.ftvec.ranking.PopulateNotInUDTF' USING JAR '${hivemall_jar}'; -------------------------- -- ftvec/text functions -- -------------------------- DROP FUNCTION IF EXISTS tf; CREATE FUNCTION tf as 'hivemall.ftvec.text.TermFrequencyUDAF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS bm25; CREATE FUNCTION bm25 as 'hivemall.ftvec.text.OkapiBM25UDF' USING JAR '${hivemall_jar}'; -------------------------- -- Regression functions -- -------------------------- DROP FUNCTION IF EXISTS train_regressor; CREATE FUNCTION train_regressor as 'hivemall.regression.GeneralRegressorUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_logregr; CREATE FUNCTION train_logregr as 'hivemall.regression.LogressUDTF' USING JAR '${hivemall_jar}'; -- alias for backward compatibility DROP FUNCTION IF EXISTS logress; CREATE FUNCTION logress as 'hivemall.regression.LogressUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_logistic_regr; CREATE FUNCTION train_logistic_regr as 'hivemall.regression.LogressUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_pa1_regr; CREATE FUNCTION train_pa1_regr as 'hivemall.regression.PassiveAggressiveRegressionUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_pa1a_regr; CREATE FUNCTION train_pa1a_regr as 'hivemall.regression.PassiveAggressiveRegressionUDTF$PA1a' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_pa2_regr; CREATE FUNCTION train_pa2_regr as 'hivemall.regression.PassiveAggressiveRegressionUDTF$PA2' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_pa2a_regr; CREATE FUNCTION train_pa2a_regr as 'hivemall.regression.PassiveAggressiveRegressionUDTF$PA2a' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_arow_regr; CREATE FUNCTION train_arow_regr as 'hivemall.regression.AROWRegressionUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_arowe_regr; CREATE FUNCTION train_arowe_regr as 'hivemall.regression.AROWRegressionUDTF$AROWe' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_arowe2_regr; CREATE FUNCTION train_arowe2_regr as 'hivemall.regression.AROWRegressionUDTF$AROWe2' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_adagrad_regr; CREATE FUNCTION train_adagrad_regr as 'hivemall.regression.AdaGradUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_adadelta_regr; CREATE FUNCTION train_adadelta_regr as 'hivemall.regression.AdaDeltaUDTF' USING JAR '${hivemall_jar}'; --------------------- -- array functions -- --------------------- DROP FUNCTION IF EXISTS float_array; CREATE FUNCTION float_array as 'hivemall.tools.array.AllocFloatArrayUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS array_remove; CREATE FUNCTION array_remove as 'hivemall.tools.array.ArrayRemoveUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS sort_and_uniq_array; CREATE FUNCTION sort_and_uniq_array as 'hivemall.tools.array.SortAndUniqArrayUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS subarray_endwith; CREATE FUNCTION subarray_endwith as 'hivemall.tools.array.SubarrayEndWithUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS subarray_startwith; CREATE FUNCTION subarray_startwith as 'hivemall.tools.array.SubarrayStartWithUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS array_concat; CREATE FUNCTION array_concat as 'hivemall.tools.array.ArrayConcatUDF' USING JAR '${hivemall_jar}'; -- alias for backward compatibility DROP FUNCTION IF EXISTS concat_array; CREATE FUNCTION concat_array as 'hivemall.tools.array.ArrayConcatUDF' USING JAR '${hivemall_jar}'; -- alias for backward compatibility DROP FUNCTION IF EXISTS subarray; CREATE FUNCTION subarray as 'hivemall.tools.array.ArraySliceUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS array_slice; CREATE FUNCTION array_slice as 'hivemall.tools.array.ArraySliceUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS array_avg; CREATE FUNCTION array_avg as 'hivemall.tools.array.ArrayAvgGenericUDAF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS array_sum; CREATE FUNCTION array_sum as 'hivemall.tools.array.ArraySumUDAF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS to_string_array; CREATE FUNCTION to_string_array as 'hivemall.tools.array.ToStringArrayUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS array_intersect; CREATE FUNCTION array_intersect as 'hivemall.tools.array.ArrayIntersectUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS select_k_best; CREATE FUNCTION select_k_best as 'hivemall.tools.array.SelectKBestUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS array_append; CREATE FUNCTION array_append as 'hivemall.tools.array.ArrayAppendUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS element_at; CREATE FUNCTION element_at as 'hivemall.tools.array.ArrayElementAtUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS array_union; CREATE FUNCTION array_union as 'hivemall.tools.array.ArrayUnionUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS first_element; CREATE FUNCTION first_element as 'hivemall.tools.array.FirstElementUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS last_element; CREATE FUNCTION last_element as 'hivemall.tools.array.LastElementUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS array_flatten; CREATE FUNCTION array_flatten as 'hivemall.tools.array.ArrayFlattenUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS array_to_str; CREATE FUNCTION array_to_str as 'hivemall.tools.array.ArrayToStrUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS conditional_emit; CREATE FUNCTION conditional_emit as 'hivemall.tools.array.ConditionalEmitUDTF' USING JAR '${hivemall_jar}'; ----------------------------- -- bit operation functions -- ----------------------------- DROP FUNCTION IF EXISTS bits_collect; CREATE FUNCTION bits_collect as 'hivemall.tools.bits.BitsCollectUDAF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS to_bits; CREATE FUNCTION to_bits as 'hivemall.tools.bits.ToBitsUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS unbits; CREATE FUNCTION unbits as 'hivemall.tools.bits.UnBitsUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS bits_or; CREATE FUNCTION bits_or as 'hivemall.tools.bits.BitsORUDF' USING JAR '${hivemall_jar}'; --------------------------- -- compression functions -- --------------------------- DROP FUNCTION IF EXISTS inflate; CREATE FUNCTION inflate as 'hivemall.tools.compress.InflateUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS deflate; CREATE FUNCTION deflate as 'hivemall.tools.compress.DeflateUDF' USING JAR '${hivemall_jar}'; --------------------- -- map functions -- --------------------- DROP FUNCTION IF EXISTS map_get_sum; CREATE FUNCTION map_get_sum as 'hivemall.tools.map.MapGetSumUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS map_tail_n; CREATE FUNCTION map_tail_n as 'hivemall.tools.map.MapTailNUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS to_map; CREATE FUNCTION to_map as 'hivemall.tools.map.UDAFToMap' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS to_ordered_map; CREATE FUNCTION to_ordered_map as 'hivemall.tools.map.UDAFToOrderedMap' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS map_include_keys; CREATE FUNCTION map_include_keys as 'hivemall.tools.map.MapIncludeKeysUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS map_exclude_keys; CREATE FUNCTION map_exclude_keys as 'hivemall.tools.map.MapExcludeKeysUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS map_index; CREATE FUNCTION map_index as 'hivemall.tools.map.MapIndexUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS map_key_values; CREATE FUNCTION map_key_values as 'hivemall.tools.map.MapKeyValuesUDF' USING JAR '${hivemall_jar}'; --------------------- -- list functions -- --------------------- DROP FUNCTION IF EXISTS to_ordered_list; CREATE FUNCTION to_ordered_list as 'hivemall.tools.list.UDAFToOrderedList' USING JAR '${hivemall_jar}'; --------------------- -- Math functions -- --------------------- DROP FUNCTION IF EXISTS sigmoid; CREATE FUNCTION sigmoid as 'hivemall.tools.math.SigmoidGenericUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS l2_norm; CREATE FUNCTION l2_norm as 'hivemall.tools.math.L2NormUDAF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS infinity; CREATE FUNCTION infinity as 'hivemall.tools.math.InfinityUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS is_infinite; CREATE FUNCTION is_infinite as 'hivemall.tools.math.IsInfiniteUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS is_finite; CREATE FUNCTION is_finite as 'hivemall.tools.math.IsFiniteUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS nan; CREATE FUNCTION nan as 'hivemall.tools.math.NanUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS is_nan; CREATE FUNCTION is_nan as 'hivemall.tools.math.IsNanUDF' USING JAR '${hivemall_jar}'; ----------------------------- -- Matrix/Vector functions -- ----------------------------- DROP FUNCTION IF EXISTS transpose_and_dot; CREATE FUNCTION transpose_and_dot as 'hivemall.tools.matrix.TransposeAndDotUDAF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS vector_add; CREATE FUNCTION vector_add as 'hivemall.tools.vector.VectorAddUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS vector_dot; CREATE FUNCTION vector_dot as 'hivemall.tools.vector.VectorDotUDF' USING JAR '${hivemall_jar}'; ---------------------- -- mapred functions -- ---------------------- DROP FUNCTION IF EXISTS taskid; CREATE FUNCTION taskid as 'hivemall.tools.mapred.TaskIdUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS jobid; CREATE FUNCTION jobid as 'hivemall.tools.mapred.JobIdUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS rowid; CREATE FUNCTION rowid as 'hivemall.tools.mapred.RowIdUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS rownum; CREATE FUNCTION rownum as 'hivemall.tools.mapred.RowNumberUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS distcache_gets; CREATE FUNCTION distcache_gets as 'hivemall.tools.mapred.DistributedCacheLookupUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS jobconf_gets; CREATE FUNCTION jobconf_gets as 'hivemall.tools.mapred.JobConfGetsUDF' USING JAR '${hivemall_jar}'; -------------------- -- JSON functions -- -------------------- DROP FUNCTION IF EXISTS to_json; CREATE FUNCTION to_json as 'hivemall.tools.json.ToJsonUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS from_json; CREATE FUNCTION from_json as 'hivemall.tools.json.FromJsonUDF' USING JAR '${hivemall_jar}'; ---------------------------- -- Sanity Check functions -- ---------------------------- DROP FUNCTION IF EXISTS assert; CREATE FUNCTION assert as 'hivemall.tools.sanity.AssertUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS raise_error; CREATE FUNCTION raise_error as 'hivemall.tools.sanity.RaiseErrorUDF' USING JAR '${hivemall_jar}'; -------------------- -- misc functions -- -------------------- DROP FUNCTION IF EXISTS generate_series; CREATE FUNCTION generate_series as 'hivemall.tools.GenerateSeriesUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS convert_label; CREATE FUNCTION convert_label as 'hivemall.tools.ConvertLabelUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS x_rank; CREATE FUNCTION x_rank as 'hivemall.tools.RankSequenceUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS each_top_k; CREATE FUNCTION each_top_k as 'hivemall.tools.EachTopKUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS try_cast; CREATE FUNCTION try_cast as 'hivemall.tools.TryCastUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS sessionize; CREATE FUNCTION sessionize as 'hivemall.tools.datetime.SessionizeUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS moving_avg; CREATE FUNCTION moving_avg as 'hivemall.tools.timeseries.MovingAverageUDTF' USING JAR '${hivemall_jar}'; ------------------------------- -- Text processing functions -- ------------------------------- DROP FUNCTION IF EXISTS tokenize; CREATE FUNCTION tokenize as 'hivemall.tools.text.TokenizeUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS is_stopword; CREATE FUNCTION is_stopword as 'hivemall.tools.text.StopwordUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS singularize; CREATE FUNCTION singularize as 'hivemall.tools.text.SingularizeUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS split_words; CREATE FUNCTION split_words as 'hivemall.tools.text.SplitWordsUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS normalize_unicode; CREATE FUNCTION normalize_unicode as 'hivemall.tools.text.NormalizeUnicodeUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS base91; CREATE FUNCTION base91 as 'hivemall.tools.text.Base91UDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS unbase91; CREATE FUNCTION unbase91 as 'hivemall.tools.text.Unbase91UDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS word_ngrams; CREATE FUNCTION word_ngrams as 'hivemall.tools.text.WordNgramsUDF' USING JAR '${hivemall_jar}'; --------------------------------- -- Dataset generator functions -- --------------------------------- DROP FUNCTION IF EXISTS lr_datagen; CREATE FUNCTION lr_datagen as 'hivemall.dataset.LogisticRegressionDataGeneratorUDTF' USING JAR '${hivemall_jar}'; -------------------------- -- Evaluating functions -- -------------------------- DROP FUNCTION IF EXISTS f1score; CREATE FUNCTION f1score as 'hivemall.evaluation.F1ScoreUDAF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS fmeasure; CREATE FUNCTION fmeasure as 'hivemall.evaluation.FMeasureUDAF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS mae; CREATE FUNCTION mae as 'hivemall.evaluation.MeanAbsoluteErrorUDAF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS mse; CREATE FUNCTION mse as 'hivemall.evaluation.MeanSquaredErrorUDAF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS rmse; CREATE FUNCTION rmse as 'hivemall.evaluation.RootMeanSquaredErrorUDAF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS r2; CREATE FUNCTION r2 as 'hivemall.evaluation.R2UDAF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS ndcg; CREATE FUNCTION ndcg as 'hivemall.evaluation.NDCGUDAF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS precision_at; CREATE FUNCTION precision_at as 'hivemall.evaluation.PrecisionUDAF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS recall_at; CREATE FUNCTION recall_at as 'hivemall.evaluation.RecallUDAF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS hitrate; CREATE FUNCTION hitrate as 'hivemall.evaluation.HitRateUDAF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS mrr; CREATE FUNCTION mrr as 'hivemall.evaluation.MRRUDAF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS average_precision; CREATE FUNCTION average_precision as 'hivemall.evaluation.MAPUDAF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS auc; CREATE FUNCTION auc as 'hivemall.evaluation.AUCUDAF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS logloss; CREATE FUNCTION logloss as 'hivemall.evaluation.LogarithmicLossUDAF' USING JAR '${hivemall_jar}'; -------------------------- -- Matrix Factorization -- -------------------------- DROP FUNCTION IF EXISTS mf_predict; CREATE FUNCTION mf_predict as 'hivemall.mf.MFPredictionUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_mf_sgd; CREATE FUNCTION train_mf_sgd as 'hivemall.mf.MatrixFactorizationSGDUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_mf_adagrad; CREATE FUNCTION train_mf_adagrad as 'hivemall.mf.MatrixFactorizationAdaGradUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_bprmf; CREATE FUNCTION train_bprmf as 'hivemall.mf.BPRMatrixFactorizationUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS bprmf_predict; CREATE FUNCTION bprmf_predict as 'hivemall.mf.BPRMFPredictionUDF' USING JAR '${hivemall_jar}'; --------------------------- -- Factorization Machine -- --------------------------- DROP FUNCTION IF EXISTS fm_predict; CREATE FUNCTION fm_predict as 'hivemall.fm.FMPredictGenericUDAF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_fm; CREATE FUNCTION train_fm as 'hivemall.fm.FactorizationMachineUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_ffm; CREATE FUNCTION train_ffm as 'hivemall.fm.FieldAwareFactorizationMachineUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS ffm_predict; CREATE FUNCTION ffm_predict as 'hivemall.fm.FFMPredictGenericUDAF' USING JAR '${hivemall_jar}'; --------------------------- -- Anomaly Detection ------ --------------------------- DROP FUNCTION IF EXISTS changefinder; CREATE FUNCTION changefinder as 'hivemall.anomaly.ChangeFinderUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS sst; CREATE FUNCTION sst as 'hivemall.anomaly.SingularSpectrumTransformUDF' USING JAR '${hivemall_jar}'; -------------------- -- Topic Modeling -- -------------------- DROP FUNCTION IF EXISTS train_lda; CREATE FUNCTION train_lda as 'hivemall.topicmodel.LDAUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS lda_predict; CREATE FUNCTION lda_predict as 'hivemall.topicmodel.LDAPredictUDAF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_plsa; CREATE FUNCTION train_plsa as 'hivemall.topicmodel.PLSAUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS plsa_predict; CREATE FUNCTION plsa_predict as 'hivemall.topicmodel.PLSAPredictUDAF' USING JAR '${hivemall_jar}'; --------------------------- -- Geo-Spatial functions -- --------------------------- DROP FUNCTION IF EXISTS tile; CREATE FUNCTION tile as 'hivemall.geospatial.TileUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS map_url; CREATE FUNCTION map_url as 'hivemall.geospatial.MapURLUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS lat2tiley; CREATE FUNCTION lat2tiley as 'hivemall.geospatial.Lat2TileYUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS lon2tilex; CREATE FUNCTION lon2tilex as 'hivemall.geospatial.Lon2TileXUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS tilex2lon; CREATE FUNCTION tilex2lon as 'hivemall.geospatial.TileX2LonUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS tiley2lat; CREATE FUNCTION tiley2lat as 'hivemall.geospatial.TileY2LatUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS haversine_distance; CREATE FUNCTION haversine_distance as 'hivemall.geospatial.HaversineDistanceUDF' USING JAR '${hivemall_jar}'; ---------------------------- -- Smile related features -- ---------------------------- DROP FUNCTION IF EXISTS train_randomforest_classifier; CREATE FUNCTION train_randomforest_classifier as 'hivemall.smile.classification.RandomForestClassifierUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_randomforest_regressor; CREATE FUNCTION train_randomforest_regressor as 'hivemall.smile.regression.RandomForestRegressionUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS train_randomforest_regr; CREATE FUNCTION train_randomforest_regr as 'hivemall.smile.regression.RandomForestRegressionUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS tree_predict; CREATE FUNCTION tree_predict as 'hivemall.smile.tools.TreePredictUDF' USING JAR '${hivemall_jar}'; -- for backward compatibility DROP FUNCTION IF EXISTS tree_predict_v1; CREATE FUNCTION tree_predict_v1 as 'hivemall.smile.tools.TreePredictUDFv1' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS tree_export; CREATE FUNCTION tree_export as 'hivemall.smile.tools.TreeExportUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS rf_ensemble; CREATE FUNCTION rf_ensemble as 'hivemall.smile.tools.RandomForestEnsembleUDAF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS guess_attribute_types; CREATE FUNCTION guess_attribute_types as 'hivemall.smile.tools.GuessAttributesUDF' USING JAR '${hivemall_jar}'; -------------------- -- Recommendation -- -------------------- DROP FUNCTION IF EXISTS train_slim; CREATE FUNCTION train_slim as 'hivemall.recommend.SlimUDTF' USING JAR '${hivemall_jar}'; ----------------- -- Data Sketch -- ----------------- DROP FUNCTION IF EXISTS approx_count_distinct; CREATE FUNCTION approx_count_distinct as 'hivemall.sketch.hll.ApproxCountDistinctUDAF' USING JAR '${hivemall_jar}'; ------------------ -- Bloom Filter -- ------------------ DROP FUNCTION IF EXISTS bloom; CREATE FUNCTION bloom as 'hivemall.sketch.bloom.BloomFilterUDAF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS bloom_and; CREATE FUNCTION bloom_and as 'hivemall.sketch.bloom.BloomAndUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS bloom_contains; CREATE FUNCTION bloom_contains as 'hivemall.sketch.bloom.BloomContainsUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS bloom_not; CREATE FUNCTION bloom_not as 'hivemall.sketch.bloom.BloomNotUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS bloom_or; CREATE FUNCTION bloom_or as 'hivemall.sketch.bloom.BloomOrUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS bloom_contains_any; CREATE FUNCTION bloom_contains_any as 'hivemall.sketch.bloom.BloomContainsAnyUDF' USING JAR '${hivemall_jar}'; ------------------------------ -- XGBoost related features -- ------------------------------ DROP FUNCTION train_xgboost_regr; CREATE FUNCTION train_xgboost_regr AS 'hivemall.xgboost.regression.XGBoostRegressionUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION train_xgboost_classifier; CREATE FUNCTION train_xgboost_classifier AS 'hivemall.xgboost.classification.XGBoostBinaryClassifierUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION train_multiclass_xgboost_classifier; CREATE FUNCTION train_multiclass_xgboost_classifier AS 'hivemall.xgboost.classification.XGBoostMulticlassClassifierUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION xgboost_predict; CREATE FUNCTION xgboost_predict AS 'hivemall.xgboost.tools.XGBoostPredictUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION xgboost_multiclass_predict; CREATE FUNCTION xgboost_multiclass_predict AS 'hivemall.xgboost.tools.XGBoostMulticlassPredictUDTF' USING JAR '${hivemall_jar}';