#!/bin/bash
set -e
set -x
source ./config
BUILD_DIR=$(cd `dirname $0`; pwd)
LOG_DIR=${BUILD_DIR}/logs/load_data
mkdir -p ${LOG_DIR}
LOG=${LOG_DIR}/load.log
> $LOG # clear the log

start=$(($(date +%s%N)/1000000))
echo "At `date '+%Y-%m-%d %H:%M:%S.%2N'` , Load data started." | tee -a $LOG 2>&1

echo "At `date '+%Y-%m-%d %H:%M:%S.%2N'` ,Init the db ..." | tee -a $LOG 2>&1
${ODPS_CLT_CMD} --project ${PROJECT} -e "$SQL_FLAGS" -f ./sqls/create_tables.sql >> ${LOG_DIR}/create_tables.log 2>&1


echo "At `date '+%Y-%m-%d %H:%M:%S.%2N'` ,Prepare tpcds data load sql." |tee -a $LOG 2>&1
rm -rf ${BUILD_DIR}/sqls/load_data_sql_${SF}
\cp -rf ${BUILD_DIR}/sqls/load_data_sql ${BUILD_DIR}/sqls/load_data_sql_${SF}

if (( $SF <= 1 ))
then
    PARALLEL=1
elif (( $SF <= 10 ))
then
    PARALLEL=2
elif (( $SF <= 100 ))
then
    PARALLEL=10
elif (( $SF <= 1000 ))
then
    PARALLEL=100
elif (( $SF <= 10000 ))
then
    PARALLEL=1000
elif (( $SF <= 100000 ))
then
    PARALLEL=5000
else
  PARALLEL=10000
fi

cd ${BUILD_DIR}/sqls/load_data_sql_${SF}/
for i in `ls ${BUILD_DIR}/sqls/load_data_sql_${SF}/`
do
sed -i  "s/SCALE/${SF}/g" ${i}
sed -i  "s/PARALLNUM/${PARALLEL}/g" ${i}
done
sed -i  "s#\${BUILD_DIR}#${BUILD_DIR}#g" ${BUILD_DIR}/sqls/load_data_sql_${SF}/tpcds_data_module.sql.prepare
cd ${BUILD_DIR}/
${ODPS_CLT_CMD} --project ${PROJECT} -e "$UPLOAD_FLAGS" -f ${BUILD_DIR}/sqls/load_data_sql_${SF}/tpcds_data_module.sql.prepare > ${LOG_DIR}/sql_prepare.log 2>&1
echo "At `date '+%Y-%m-%d %H:%M:%S.%2N'` ,Load data to tables ..." | tee -a $LOG 2>&1
if [ "${SF}"x != "1"x ]
then 
#${ODPS_CLT_CMD} --project ${PROJECT} -e "$UPLOAD_FLAGS" -f ${BUILD_DIR}/sqls/load_data_sql_${SF}/tpcds_data_module.sql.prepare > ${LOG_DIR}/sql_prepare.log 2>&1
${ODPS_CLT_CMD} --project ${PROJECT} -e "$UPLOAD_FLAGS" -f ${BUILD_DIR}/sqls/load_data_sql_${SF}/tpcds_data_module.sql.dim > ${LOG_DIR}/dim.log 2>&1 &
${ODPS_CLT_CMD} --project ${PROJECT} -e "$UPLOAD_FLAGS" -f ${BUILD_DIR}/sqls/load_data_sql_${SF}/tpcds_data_module.sql.customer > ${LOG_DIR}/customer.log 2>&1 &
${ODPS_CLT_CMD} --project ${PROJECT} -e "$UPLOAD_FLAGS" -f ${BUILD_DIR}/sqls/load_data_sql_${SF}/tpcds_data_module.sql.store_sales${PARTITION} > ${LOG_DIR}/store_sales.log 2>&1 &
${ODPS_CLT_CMD} --project ${PROJECT} -e "$UPLOAD_FLAGS" -f ${BUILD_DIR}/sqls/load_data_sql_${SF}/tpcds_data_module.sql.store_returns${PARTITION} > ${LOG_DIR}/store_returns.log 2>&1 &
${ODPS_CLT_CMD} --project ${PROJECT} -e "$UPLOAD_FLAGS" -f ${BUILD_DIR}/sqls/load_data_sql_${SF}/tpcds_data_module.sql.web_sales${PARTITION} > ${LOG_DIR}/web_sales.log 2>&1 &
${ODPS_CLT_CMD} --project ${PROJECT} -e "$UPLOAD_FLAGS" -f ${BUILD_DIR}/sqls/load_data_sql_${SF}/tpcds_data_module.sql.web_returns${PARTITION} > ${LOG_DIR}/web_return.log 2>&1 &
${ODPS_CLT_CMD} --project ${PROJECT} -e "$UPLOAD_FLAGS" -f ${BUILD_DIR}/sqls/load_data_sql_${SF}/tpcds_data_module.sql.catalog_sales${PARTITION} > ${LOG_DIR}/catalog_sales.log 2>&1 &
${ODPS_CLT_CMD} --project ${PROJECT} -e "$UPLOAD_FLAGS" -f ${BUILD_DIR}/sqls/load_data_sql_${SF}/tpcds_data_module.sql.catalog_returns${PARTITION} > ${LOG_DIR}/catalog_returns.log 2>&1 &
${ODPS_CLT_CMD} --project ${PROJECT} -e "$UPLOAD_FLAGS" -f ${BUILD_DIR}/sqls/load_data_sql_${SF}/tpcds_data_module.sql.inventory${PARTITION} > ${LOG_DIR}/inventory.log 2>&1 &
wait

else
${ODPS_CLT_CMD} --project ${PROJECT} -e "$UPLOAD_FLAGS" -f ${BUILD_DIR}/sqls/load_data_sql_${SF}/tpcds_data_1g.sql > ${LOG_DIR}/tpcds_data_1g.log 2>&1
fi

end=$(($(date +%s%N)/1000000))
duration=$(( (end - start) / 1000))
sec=`bc <<< "scale=3; ($end - $start)/1000"`
elapse_time=`printf "%d:%02d:%02d, %s seconds" $(($duration / 3600)) $((($duration / 60) % 60)) $(($duration % 60)) $sec`

echo "At `date '+%Y-%m-%d %H:%M:%S.%2N'` ,Load data to tables finished. Time token: $elapse_time" | tee -a $LOG 2>&1
cd ${BUILD_DIR}
# Analyze tables and get statistics
echo "At `date '+%Y-%m-%d %H:%M:%S.%2N'` ,Analyze tables started ..." | tee -a $LOG 2>&1
analyze_start=$(($(date +%s%N)/1000000))
${ODPS_CLT_CMD} --project ${PROJECT} -e "$UPLOAD_FLAGS" -f ./sqls/analyze_tables.sql > ${LOG_DIR}/analyze_tables.log 2>&1

end=$(($(date +%s%N)/1000000))
duration=$(( (end - analyze_start) / 1000))
sec=`bc <<< "scale=3; ($end - $analyze_start)/1000"`
elapse_time=`printf "%d:%02d:%02d, %s seconds" $(($duration / 3600)) $((($duration / 60) % 60)) $(($duration % 60)) $sec`

echo "At `date '+%Y-%m-%d %H:%M:%S.%2N'` ,Analyze tables finished. Time token: $elapse_time" | tee -a $LOG 2>&1


end=$(($(date +%s%N)/1000000))
duration=$(( (end - start) / 1000))
sec=`bc <<< "scale=3; ($end - $start)/1000"`
elapse_time=`printf "%d:%02d:%02d, %s seconds" $(($duration / 3600)) $((($duration / 60) % 60)) $(($duration % 60)) $sec`
echo "At `date '+%Y-%m-%d %H:%M:%S.%2N'` ,Analyze tables finished." | tee -a $LOG 2>&1
echo "At `date '+%Y-%m-%d %H:%M:%S.%2N'` ,Load data finished. Time token: $elapse_time" | tee -a $LOG 2>&1

