-
-
Save zzztimbo/1115008 to your computer and use it in GitHub Desktop.
CellCounter for HBase Scan with TimeRange
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
=begin | |
Usage: /bin/hbase shell cellcounter.rb | |
=end | |
import java.text.SimpleDateFormat | |
import java.text.ParsePosition | |
import java.util.Date | |
import org.apache.hadoop.hbase.client.HTable | |
import org.apache.hadoop.hbase.client.Scan | |
import org.apache.hadoop.hbase.util.Bytes | |
table_name = 'lead' | |
column_name = 'binary:object' | |
start_date = '08012010000000' | |
end_date = '08022010000000' | |
timerange_start = SimpleDateFormat.new("MMddyyyyHHmmss").parse(start_date, ParsePosition.new(0)).getTime() | |
timerange_end = SimpleDateFormat.new("MMddyyyyHHmmss").parse(end_date, ParsePosition.new(0)).getTime() | |
=begin | |
puts timerange_start | |
puts timerange_end | |
puts Date.new(timerange_start).toString() | |
puts Date.new(timerange_end).toString() | |
=end | |
scanner = Scan.new | |
scanner.setMaxVersions() | |
scanner.setTimeRange(timerange_start,timerange_end) | |
table = HTable.new(@hbase.configuration, table_name) | |
table_scanner = table.getScanner(scanner) | |
row_counter = 0 | |
total_cell_counter = 0 | |
table_scan_iter = table_scanner.iterator | |
while table_scan_iter.hasNext | |
table_row = table_scan_iter.next | |
table_row_key = Bytes::toStringBinary(table_row.getRow) | |
cell_counter = 0 | |
table_row.list.each do |kv| | |
family = String.from_java_bytes(kv.getFamily) | |
qualifier = Bytes::toStringBinary(kv.getQualifier) | |
column = "#{family}:#{qualifier}" | |
if column.eql? column_name | |
cell_counter += 1 | |
total_cell_counter += 1 | |
end | |
end | |
puts "#{table_name}_id - #{table_row_key} | versions - #{cell_counter}" | |
row_counter += 1 | |
end | |
puts "total row count - #{row_counter}" | |
puts "total version count - #{total_cell_counter}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment