#!/bin/bash
##********************************************************************#
##
## 日期支持运算,通过以下方式:
## ${DATA_DATE offset field formatter},
## DATE_DATE:*固定值,为当前作业的业务时间
## offet:*必填,当前的日期偏移量,根据field判定偏移字段,取值为数值可正可负
## field:*必填,偏移的字段,取值可以为:day,month,year,minute,hour,week,second
## formatter:选填,计算偏移后的日期格式,如:yyyy-MM-dd HH:mm:ss
## 如:${DATA_DATE -1 day 'yyyy-MM-dd HH:mm'}
##********************************************************************#
source $BIPROG_ROOT/bin/shell/common.sh
vDay=${DATA_DATE} #yesterday
yyyy_mm_dd_1=`date -d "$vDay" +%Y-%m-%d` #yesterday
yyyy_mm_dd_2=`date -d "$vDay -1 days" +%Y-%m-%d` #2 days ago
yyyy_mm_dd_3=`date -d "$vDay -2 days" +%Y-%m-%d` #3 days ago
yyyy_mm_dd_5=`date -d "$vDay -4 days" +%Y-%m-%d` #4 days ago
#vEdition="9156"
vEdition="9355"
start_time="${yyyy_mm_dd_2} 00:00:00.000"
end_time="${yyyy_mm_dd_2} 23:59:59.999"
thisTable="extern.jimmylian_query_rank_correct"
sourceTable="temp.jimmylian_query_rank_correct"
max_word=20
threshold=15
#thresholdcount=2
#to activate chocie_multiple
thresholdcount=0
spark-submit \
--name jimmy_spark_query_suggest \
--jars jieba-analysis-1.0.2.jar,diff-match-patch-1.2.jar,jpinyin-1.1.8.jar,hanlp-portable-1.7.8.jar \
--master yarn \
--queue root.XXXXXXXXXXXQueue \
--deploy-mode client \
--executor-memory 20G \
--driver-memory 2g \
--executor-cores 3 \
--num-executors 3 \
--conf spark.sql.shuffle.partitions=2001 \
--conf spark.network.timeout=800 \
--conf spark.scheduler.listenerbus.eventqueue.size=100000 \
--conf spark.sql.hive.convertMetastoreOrc=false \
--class CorrectCheck \
CorrectCheck-1.0.jar \
${yyyy_mm_dd_2} \
${yyyy_mm_dd_2} \
${vEdition} \
"${start_time}" \
"${end_time}" \
${thisTable} \
${max_word} \
${sourceTable} \
${threshold} \
${thresholdcount}