#
#  Copyright Alibaba Group Holding Ltd.
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

import sys
from operator import add
import logging
from pyspark.sql import SparkSession
from pyspark.files import SparkFiles

# This application demonstrates how to register and invoke a
# UDAF (user-defined-aggregation-function) in a pyspark job.

if __name__ == "__main__":
    spark = SparkSession\
        .builder\
        .appName("HiveUDAF")\
        .getOrCreate()
    spark.sql("CREATE TEMPORARY FUNCTION my_udaf AS 'com.aliyun.lindorm.ldspark.examples.udf.SimpleHiveUDAF'")
    l = [('Alice', 1, 2), ('Bob', 3, 4), ('Trump', 5, 6), ('Bob', 7, 8), ('Alice', 9, 0)]
    spark.createDataFrame(l, ['name', 'col0', 'col1']).createTempView('my_tbl')
    df = spark.sql('select my_udaf(cast(col0 as int), cast(col1 as int)) from my_tbl group by name')
    df.show()
    spark.stop()
