南天一柱 发表于 2018-10-22 09:22:10

hive ETL之业绩报表sql

-- case4 --  

  
--========== rates ==========--
  

  
app0    1
  
app1    2
  
app2    2
  
app3    3
  
app4    3
  
app5    3
  
app6    5
  
app7    5
  
app8    5
  
app9    5
  

  
CREATE EXTERNAL TABLE rates (
  
    app_name    STRING
  
, star_ratesSTRING
  
)
  
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
  
LOCATION '/tmp/db/rates';
  

  
create table app_ranks as
  
select app_name as app
  
       , star_rates as stars
  
       , NTILE(3) OVER (ORDER BY star_rates DESC) as nt
  
       , row_number() OVER (ORDER BY star_rates DESC) as rn
  
       , rank() OVER (ORDER BY star_rates DESC) as rk
  
       , dense_rank() OVER (ORDER BY star_rates DESC) as drk
  
       , CUME_DIST() OVER (ORDER BY star_rates) as cd
  
       , PERCENT_RANK() OVER (ORDER BY star_rates) as pr
  
from rates
  
order by stars desc
  
;
  

  
select app, stars, cd, sum(cd) OVER (PARTITION BY stars ORDER BY rn ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) from app_ranks;
  

  
select app, stars, cd, sum(cd) OVER (PARTITION BY stars ORDER BY rn ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) from app_ranks;
  

  
select app, stars, cd, sum(cd) OVER (PARTITION BY stars ORDER BY rn ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING) from app_ranks;
  

  
select app, stars, rn, lead(rn) OVER (PARTITION BY stars ORDER BY rn), lag(rn) OVER (PARTITION BY stars ORDER BY rn) from app_ranks;
  

  

  
--========== visitors ==========--
  

  
d001    201301    101
  
d002    201301    102
  
d003    201301    103
  
d001    201302    111
  
d002    201302    112
  
d003    201302    113
  
d001    201303    121
  
d002    201303    122
  
d003    201303    123
  
d001    201304    131
  
d002    201304    132
  
d003    201304    133
  
d001    201305    141
  
d002    201305    142
  
d003    201305    143
  
d001    201306    151
  
d002    201306    152
  
d003    201306    153
  
d001    201307    201
  
d002    201307    202
  
d003    201307    203
  
d001    201308    211
  
d002    201308    212
  
d003    201308    213
  
d001    201309    221
  
d002    201309    222
  
d003    201309    223
  
d001    201310    231
  
d002    201310    232
  
d003    201310    233
  
d001    201311    241
  
d002    201311    242
  
d003    201311    243
  
d001    201312    301
  
d002    201312    302
  
d003    201312    303
  
d001    201401    301
  
d002    201401    302
  
d003    201401    303
  
d001    201402    211
  
d002    201402    212
  
d003    201402    213
  
d001    201403    271
  
d002    201403    272
  
d003    201403    273
  
d001    201404    331
  
d002    201404    332
  
d003    201404    333
  
d001    201405    351
  
d002    201405    352
  
d003    201405    353
  

  
CREATE EXTERNAL TABLE visitors (
  
    domainSTRING
  
, month   STRING
  
, visitor STRING
  
)
  
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
  
LOCATION '/tmp/db/visitors';
  

  
select * from visitors where domain = 'd001';
  

  
select domain
  
   , month
  
   , visitor
  
   , first_value(visitor) OVER (PARTITION BY domain ORDER BY month DESC)
  
   , last_value(visitor) OVER (PARTITION BY domain ORDER BY month DESC)
  
   , lead(visitor) OVER (PARTITION BY domain ORDER BY month DESC)
  
   , lag(visitor) OVER (PARTITION BY domain ORDER BY month DESC)
  
from visitors
  
where domain = 'd001';
  

  
select domain
  
   , month
  
   , visitor
  
   , first_value(visitor) OVER (PARTITION BY domain ORDER BY month DESC)
  
   , last_value(visitor) OVER (PARTITION BY domain ORDER BY month DESC)
  
   , lead(visitor, 1, 0) OVER (PARTITION BY domain ORDER BY month DESC)
  
   , lag(visitor, 1, 0) OVER (PARTITION BY domain ORDER BY month DESC)
  
   , lead(visitor, 12, 0) OVER (PARTITION BY domain ORDER BY month DESC)
  
   , lag(visitor, 12, 0) OVER (PARTITION BY domain ORDER BY month DESC)
  
from visitors
  
where domain = 'd001';
  

  
create table visitors_report as
  
select domain
  
       , month
  
       , visitor
  
       , lead(visitor, 1, 0) OVER (PARTITION BY domain ORDER BY month DESC)            as last_mon
  
       , visitor - lead(visitor, 1, 0) OVER (PARTITION BY domain ORDER BY month DESC)as delta_mon
  
       , lead(visitor, 12, 0) OVER (PARTITION BY domain ORDER BY month DESC)         as last_year
  
       , visitor - lead(visitor, 12, 0) OVER (PARTITION BY domain ORDER BY month DESC) as delta_year
  
from visitors
  
;
  

  
select * from visitors_report where domain = 'd001' and month > '2014';
  

  
select month
  
   , domain
  
   , visitor
  
   , last_mon
  
   , last_year
  
from visitors_report
  
where (domain = 'd001' or domain = 'd002') and month > '2014'
  
order by month desc, domain asc
  
limit 100;
  

  
select month
  
   , domain
  
   , visitor
  
   , max(visitor) OVER (PARTITION BY month) as max_visitors
  
   , min(visitor) OVER (PARTITION BY month) as min_visitors
  
from visitors
  
where month > '2014'
  
order by month desc, domain asc;
  

  
select *
  
from (
  
select month
  
   , domain
  
   , visitor
  
   , max(visitor) OVER (PARTITION BY domain ORDER BY month DESC ROWS BETWEEN CURRENT ROW AND 12 FOLLOWING) as max_visitors_last_12_mon
  
   , min(visitor) OVER (PARTITION BY domain ORDER BY month DESC ROWS BETWEEN CURRENT ROW AND 12 FOLLOWING) as min_visitors_last_12_mon
  
from visitors
  
) v
  
where month > '20131'
  
order by month desc, domain asc;


页: [1]
查看完整版本: hive ETL之业绩报表sql