Accessing Hbase table via Hive.
Create the HBase Table :
hbase(main):> CREATE 'employee', {NAME => 'e', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => '189341712', COMPRESSION => 'SNAPPY', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '1'}
Load the values in HBase Table :
hbase(main):> put 'employee', 'employee_123', 'e:n' , 'my_name'
hbase(main):> put 'employee', 'employee_123', 'e:id' , '2345687'
hbase(main):> put 'employee', 'employee_123', 'e:l' , 'san_franscisco'
hbase(main):> put 'employee', 'employee_123', 'e:r' , '25'
hbase(main):> put 'employee', 'employee_123', 'e:cd' , '2015-11-05T01:28:22.232Z'
hbase(main):> put 'employee', 'employee_123', 'e:g' , '2015-11-05T01:28:22.232Z'
Validate the HBase Table :
hbase(main):> scan 'employee', LIMIT => 1
ROW COLUMN+CELL
employee_123 column=e:n, timestamp=1498255961717, value=my_name
employee_123 column=e:id, timestamp=1498257594962, value=2345687
employee_123 column=e:l, timestamp=1498256528932, value=san_franscisco
employee_123 column=e:r, timestamp=1498257582346, value=25
employee_123 column=e:cd, timestamp=1498256552609, value=2015-11-05T01:28:22.232Z
employee_123 column=e:g, timestamp=1498256565541, value=2015-11-05T01:28:22.232Z
Create the Hive Table :
hive >
CREATE EXTERNAL TABLE `hbase_employee`(
`eid` string COMMENT 'from deserializer',
`name` string COMMENT 'from deserializer',
`employee_id` string COMMENT 'from deserializer',
`location` string COMMENT 'from deserializer',
`grade` string COMMENT 'from deserializer',
`created_dt` string COMMENT 'from deserializer',
`emp_join_dt` string COMMENT 'from deserializer')
ROW FORMAT SERDE
'org.apache.hadoop.hive.hbase.HBaseSerDe'
STORED BY
'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES (
'hbase.columns.mapping'=':key,e:n,e:id,e:l,e:r,e:cd,e:g',
'serialization.format'='1')
TBLPROPERTIES (
'COLUMN_STATS_ACCURATE'='false',
'hbase.table.name'='employee',
'numFiles'='0',
'numRows'='-1',
'rawDataSize'='-1',
'totalSize'='0',
'transient_lastDdlTime'='1495480449')
Validate the Hive Table Schema :
hive> desc hbase_employee;
OK
eid string from deserializer
name string from deserializer
employee_id string from deserializer
location string from deserializer
grade string from deserializer
created_dt string from deserializer
emp_join_dt string from deserializer
Validate the Hive Table :
hive> select * from hbase_employee;
OK
employee_123 my_name 2345687 san_franscisco 25 2015-11-05T01:28:22.232Z 2015-11-05T01:28:22.232Z
hbase(main):> CREATE 'employee', {NAME => 'e', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => '189341712', COMPRESSION => 'SNAPPY', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '1'}
Load the values in HBase Table :
hbase(main):> put 'employee', 'employee_123', 'e:n' , 'my_name'
hbase(main):> put 'employee', 'employee_123', 'e:id' , '2345687'
hbase(main):> put 'employee', 'employee_123', 'e:l' , 'san_franscisco'
hbase(main):> put 'employee', 'employee_123', 'e:r' , '25'
hbase(main):> put 'employee', 'employee_123', 'e:cd' , '2015-11-05T01:28:22.232Z'
hbase(main):> put 'employee', 'employee_123', 'e:g' , '2015-11-05T01:28:22.232Z'
Validate the HBase Table :
hbase(main):> scan 'employee', LIMIT => 1
ROW COLUMN+CELL
employee_123 column=e:n, timestamp=1498255961717, value=my_name
employee_123 column=e:id, timestamp=1498257594962, value=2345687
employee_123 column=e:l, timestamp=1498256528932, value=san_franscisco
employee_123 column=e:r, timestamp=1498257582346, value=25
employee_123 column=e:cd, timestamp=1498256552609, value=2015-11-05T01:28:22.232Z
employee_123 column=e:g, timestamp=1498256565541, value=2015-11-05T01:28:22.232Z
Create the Hive Table :
hive >
CREATE EXTERNAL TABLE `hbase_employee`(
`eid` string COMMENT 'from deserializer',
`name` string COMMENT 'from deserializer',
`employee_id` string COMMENT 'from deserializer',
`location` string COMMENT 'from deserializer',
`grade` string COMMENT 'from deserializer',
`created_dt` string COMMENT 'from deserializer',
`emp_join_dt` string COMMENT 'from deserializer')
ROW FORMAT SERDE
'org.apache.hadoop.hive.hbase.HBaseSerDe'
STORED BY
'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES (
'hbase.columns.mapping'=':key,e:n,e:id,e:l,e:r,e:cd,e:g',
'serialization.format'='1')
TBLPROPERTIES (
'COLUMN_STATS_ACCURATE'='false',
'hbase.table.name'='employee',
'numFiles'='0',
'numRows'='-1',
'rawDataSize'='-1',
'totalSize'='0',
'transient_lastDdlTime'='1495480449')
Validate the Hive Table Schema :
hive> desc hbase_employee;
OK
eid string from deserializer
name string from deserializer
employee_id string from deserializer
location string from deserializer
grade string from deserializer
created_dt string from deserializer
emp_join_dt string from deserializer
Validate the Hive Table :
hive> select * from hbase_employee;
OK
employee_123 my_name 2345687 san_franscisco 25 2015-11-05T01:28:22.232Z 2015-11-05T01:28:22.232Z
Comments
Post a Comment