stcaac 发表于 2018-8-5 14:47:35

利用Python调用HBASE-DBAspace

  利用Python调用HBASE的 需要安装thrift hbase-thrift
  启动hbase的thrift服务:bin/hbase-daemon.sh start thrift 默认端口是9090
  mysql 到hbase的数据同步:
  1、put
  2、Importtsv
  3、编写MapReduce Job导入
  4、sqoop
  简单code:
  #!/usr/bin/env python
  #coding=utf-8
  import sys
  sys.path.append('/usr/lib/python2.6/site-packages/hbase')
  from thrift import Thrift
  from thrift.transport import TSocket
  from thrift.transport import TTransport
  from thrift.protocol import TBinaryProtocol
  from hbase import Hbase
  from hbase.ttypes import *
  import csv
  from hbase.ttypes import ColumnDescriptor, Mutation, BatchMutation, TRegionInfo
  from hbase.ttypes import IOError, AlreadyExists
  ######
  def client_conn():
  transport=TSocket.TSocket("172.16.10.87",9090)
  transport=TTransport.TBufferedTransport(transport)
  protocol=TBinaryProtocol.TBinaryProtocol(transport)
  client=Hbase.Client(protocol)
  transport.open()
  return client
  def __del__():
  transport.close()
  if __name__=="__main__":
  client=client_conn()
  #获取表名字
  print client.getTableNames()
  ##创建表
  #client.createTable('name2',)
  #写入数据
  client.mutateRow('name2','a1',)
  client.mutateRow('ca_record','1',)
  ##获取数据
  aa=client.getRow('name2','a1')
  for r in aa:
  print 'row',r.row
  print '\br'
  print 'value',r.columns.get("user_id:1").value
  ##删除表
  #client.disableTable("t1")
  #client.deleteTable("t1")
  print client.getTableNames()
  ###获取表的行键值
  #print client.scannerGet(client.scannerOpen('t2',"cmd",["a"]))
  print client.scannerGet(client.scannerOpen('t2',"",["a"])) #当row key为空取第一个
  print "------"
  print client.getColumnDescriptors('t3')
  #在自己开发环境只要安装好thrift 及hbase-thrift的包,在import的时候不会出问题
页: [1]
查看完整版本: 利用Python调用HBASE-DBAspace