Accessing Hbase table via Hive.

Create the HBase Table :
hbase(main):> CREATE 'employee', {NAME => 'e', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => '189341712', COMPRESSION => 'SNAPPY', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '1'}


Load the values in HBase Table :
hbase(main):> put 'employee', 'employee_123', 'e:n' , 'my_name'
hbase(main):> put 'employee', 'employee_123', 'e:id' , '2345687'
hbase(main):> put 'employee', 'employee_123', 'e:l' , 'san_franscisco'
hbase(main):> put 'employee', 'employee_123', 'e:r' , '25'
hbase(main):> put 'employee', 'employee_123', 'e:cd' , '2015-11-05T01:28:22.232Z'
hbase(main):> put 'employee', 'employee_123', 'e:g' , '2015-11-05T01:28:22.232Z'


Validate the HBase Table :
hbase(main):> scan 'employee', LIMIT => 1
ROW                                                                     COLUMN+CELL
 employee_123                                                           column=e:n, timestamp=1498255961717, value=my_name
 employee_123                                                           column=e:id, timestamp=1498257594962, value=2345687
 employee_123                                                           column=e:l, timestamp=1498256528932, value=san_franscisco
 employee_123                                                           column=e:r, timestamp=1498257582346, value=25
 employee_123                                                           column=e:cd, timestamp=1498256552609, value=2015-11-05T01:28:22.232Z
 employee_123                                                           column=e:g, timestamp=1498256565541, value=2015-11-05T01:28:22.232Z


Create the Hive Table :
hive >
 CREATE EXTERNAL TABLE `hbase_employee`(
  `eid` string COMMENT 'from deserializer',
  `name` string COMMENT 'from deserializer',
  `employee_id` string COMMENT 'from deserializer',
  `location` string COMMENT 'from deserializer',
  `grade` string COMMENT 'from deserializer',
  `created_dt` string COMMENT 'from deserializer',
  `emp_join_dt` string COMMENT 'from deserializer')
ROW FORMAT SERDE
  'org.apache.hadoop.hive.hbase.HBaseSerDe'
STORED BY
  'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES (
  'hbase.columns.mapping'=':key,e:n,e:id,e:l,e:r,e:cd,e:g',
  'serialization.format'='1')
TBLPROPERTIES (
  'COLUMN_STATS_ACCURATE'='false',
  'hbase.table.name'='employee',
  'numFiles'='0',
  'numRows'='-1',
  'rawDataSize'='-1',
  'totalSize'='0',
  'transient_lastDdlTime'='1495480449')


Validate the Hive Table Schema :
hive> desc hbase_employee;
OK
eid                 string               from deserializer
name                 string               from deserializer
employee_id         string               from deserializer
location             string               from deserializer
grade               string               from deserializer
created_dt           string               from deserializer
emp_join_dt         string               from deserializer



Validate the Hive Table :
hive> select * from hbase_employee;
OK
employee_123 my_name 2345687 san_franscisco 25 2015-11-05T01:28:22.232Z 2015-11-05T01:28:22.232Z

Comments

Popular posts from this blog

Using Java API to access Google Search Console Data

Setting Up Eclipse to run Spark using Scala