脚本如下所示:
#创建分区CREATE EXTERNAL TABLE ubas(ip string, timespan string, url string,hour string)PARTITIONED BY (logdate string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LOCATION '/home/hdfs/ubas/out/meta'
统计的KPI脚本,如下所示:
# clean hdfs data and output
/home/hadoop/hadoop-2.6.0/bin/hadoop jar ubas-1.0.0-jar-with-dependencies.jar $1# use hive to stats
## 1.location data to partition/home/hadoop/hive-0.14.0-bin/bin/hive -e "ALTER TABLE ubas ADD PARTITION(logdate='$1') LOCATION '/home/hdfs/ubas/out/meta/$1';"
## 2.stats pv/home/hadoop/hive-0.14.0-bin/bin/hive -e "CREATE TABLE pv_$1 AS SELECT COUNT(1) AS PV FROM ubas WHERE logdate='$1';"
## 3.stats ip/home/hadoop/hive-0.14.0-bin/bin/hive -e "CREATE TABLE ip_$1 AS SELECT COUNT(DISTINCT ip) AS IP FROM ubas WHERE logdate='$1';"
## 4.stats amount hour/home/hadoop/hive-0.14.0-bin/bin/hive -e "CREATE TABLE amount_$1 ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' AS SELECT '$1',hour AS HOUR_TAG, COUNT(hour) AS HOUR,'' AS UPDATE_DATE FROM ubas WHERE logdate='$1' GROUP BY hour;"
## 5.stats jr/home/hadoop/hive-0.14.0-bin/bin/hive -e "CREATE TABLE jr_$1 AS SELECT COUNT(1) AS JR FROM (SELECT COUNT(ip) AS times FROM ubas WHERE logdate='$1' GROUP BY ip HAVING times=1) e;"
## 6.combine pv,ip,jr and tr to ubas table/home/hadoop/hive-0.14.0-bin/bin/hive -e "CREATE TABLE ubas_$1 ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' AS SELECT '$1', a.pv, b.ip, c.jr, ROUND(COALESCE(CAST(b.ip AS DOUBLE), 0)/a.pv, 2),'' AS UPDATE_DATE FROM pv_$1 a JOIN ip_$1 b ON 1=1 JOIN jr_$1 c ON 1=1 ;"
* @Author dengjie */public class StatsServer { private static Logger logger = LoggerFactory.getLogger(StatsServer.class); private final int PORT = 9090;
TNonblockingServerSocket socket = new TNonblockingServerSocket(PORT); final UBASService.Processor processor = new UBASService.Processor(new UBASServiceImpl());
THsHaServer.Args arg = new THsHaServer.Args(socket); /*
* Binary coded format efficient, intensive data transmission, The
* use of non blocking mode of transmission, according to the size