Andy column=baseINFO:age, value=21
Andy column=baseINFO:gender, value=0
Andy column=baseINFO:telphone_number, value=110110110
Tom column=baseINFO:age, value=18
Tom column=baseINFO:gender, value=1
Tom column=baseINFO:telphone_number, value=120120120
如上表所示,将之用spark进行分组,达到这样的效果:
[Andy,(21,0,110110110)]
[Tom,(18,1,120120120)]
需求比较简单,主要是熟悉一下程序运行过程
二、具体代码
package com.union.bigdata.spark.hbase;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.mapreduce.TableSplit;import org.apache.hadoop.hbase.util.Base64;import org.apache.hadoop.hbase.util.Bytes;import org.apache.spark.api.java.JavaSparkContext;import org.apache.spark.api.java.JavaRDD;import org.apache.spark.SparkConf;import org.apache.spark.api.java.function.Function;import org.apache.spark.api.java.function.Function2;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.client.Scan;import org.apache.hadoop.hbase.client.Result;import org.apache.hadoop.hbase.io.ImmutableBytesWritable;import org.apache.hadoop.hbase.mapreduce.TableInputFormat;import org.apache.spark.api.java.JavaPairRDD;import org.apache.hadoop.hbase.protobuf.ProtobufUtil;import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;import org.apache.spark.api.java.function.PairFunction;import scala.Tuple10;import scala.Tuple2;import java.io.IOException;import java.util.ArrayList;import java.util.List;public class ReadHbase {
private static String appName = "ReadTable";
public static void main(String[] args) {
SparkConf sparkConf = new SparkConf();
//we can also run it at local:"local[3]" the number 3 means 3 threads
sparkConf.setMaster("spark://master:7077").setAppName(appName);
JavaSparkContext jsc = new JavaSparkContext(sparkConf);
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "master");
conf.set("hbase.zookeeper.property.clientPort", "2181");
Scan scan = new Scan();
scan.addFamily(Bytes.toBytes("baseINFO"));
scan.addColumn(Bytes.toBytes("baseINFO"), Bytes.toBytes("telphone_number"));
scan.addColumn(Bytes.toBytes("baseINFO"), Bytes.toBytes("age"));
scan.addColumn(Bytes.toBytes("baseINFO"), Bytes.toBytes("gender"));
String scanToString = "";
try {
ClientProtos.Scan proto = ProtobufUtil.toScan(scan);
scanToString = Base64.encodeBytes(proto.toByteArray());
} catch (IOException io) {
System.out.println(io);
}
for (int i = 0; i < 2; i++) {
try {
String tableName = "VIPUSER";
conf.set(TableInputFormat.INPUT_TABLE, tableName);
conf.set(TableInputFormat.SCAN, scanToString);
//get the Result of query from the Table of Hbase
JavaPairRDD hBaseRDD = jsc.newAPIHadoopRDD(conf,
TableInputFormat.class, ImmutableBytesWritable.class,
Result.class);
//group by row key like : [(Andy,110,21,0),(Tom,120,18,1)]
JavaPairRDD art_scores = hBaseRDD.mapToPair(
new PairFunction() {
@Override
public Tuple2 call(Tuple2 results) {
List list = new ArrayList();
byte[] telphone_number = results._2().getValue(Bytes.toBytes("baseINFO"), Bytes.toBytes("telphone_number"));
byte[] age = results._2().getValue(Bytes.toBytes("baseINFO"), Bytes.toBytes("age"));
byte[] gender = results._2().getValue(Bytes.toBytes("baseINFO"), Bytes.toBytes("gender"));
//the type of storage at Hbase is Byte Array,so we must let it be normal like Int,String and so on
list.add(Integer.parseInt(Bytes.toString(telphone_number)));
list.add(Integer.parseInt(Bytes.toString(age)));
list.add(Integer.parseInt(Bytes.toString(gender)));
return new Tuple2(Bytes.toString(results._1().get()), list);
}
}
);
//switch to Cartesian product
JavaPairRDD cart = art_scores.cartesian(art_scores);
//use Row Key to delete the repetition from the last step "Cartesian product"
JavaPairRDD cart2 = cart.filter(
new Function() {
public Boolean call(Tuple2 tuple2Tuple2Tuple2) throws Exception {
return tuple2Tuple2Tuple2._1()._1().compareTo(tuple2Tuple2Tuple2._2()._1()) < 0;
}
}
);
System.out.println("Create the List 'collect'...");
//get the result we need
List collect = cart2.collect();
System.out.println("Done..");
System.out.println(collect.size() > i ? collect.get(i):"STOP");
if (collect.size() > i ) break;
} catch (Exception e) {
System.out.println(e);
}
}
}
}