Last active
February 16, 2018 15:41
-
-
Save apeiros/e5252a391d5ae6de90bc9b3f06496417 to your computer and use it in GitHub Desktop.
Extract pieces from line-based logfiles
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# WARNING: lots of edge-cases are not properly handled. this is just code I wrote to inspect some | |
# rails logs by hand in pry where I could easily deal manually in edge-cases. | |
# however, feel free to fork and fix the edge-cases (and tell me about it!) | |
# | |
# use like this: | |
# start_time = Time.local(2017,1,1) | |
# end_time = Time.local(2017,1,2) | |
# puts rails_log_within("path/to/logfile.log", start_time, end_time) | |
require "time" # Time.iso8601 is stdlib, not core | |
# read the next full line from a position (i.e. from start of a file, or | |
# immediately after a newline until the next newline) and return the offsets | |
# e.g. a File with "012\n456\n89" | |
# read_line_at(file, 0) # => [0, 3, "012\n"] | |
# read_line_at(file, 1) # => [4, 7, "456\n"] | |
# read_line_at(file, 4) # => [4, 7, "456\n"] | |
# read_line_at(file, 5) # => [8, 9, "89"] | |
# read_line_at(file, 10) # => [nil, nil, nil] | |
def read_line_at(file, pos) | |
start_offset = pos | |
if pos.zero? | |
buffer = file.gets | |
elsif pos > 0 | |
file.seek(pos-1, IO::SEEK_SET) | |
if file.read(1) == "\n" | |
buffer = file.gets | |
else | |
skip = file.gets | |
if skip | |
start_offset = pos + skip.bytesize # skip 1 line | |
buffer = file.gets | |
else | |
buffer = nil | |
end | |
end | |
else | |
raise ArgumentError, "pos argument must be positive" | |
end | |
if buffer | |
[start_offset, start_offset+buffer.bytesize, buffer] | |
else | |
[nil, nil, nil] | |
end | |
end | |
def binary_search_line_in_path(path, **args, &block) | |
return binary_search_line(path, **args&block) if path.is_a?(IO) | |
File.open(path, 'rb') { |file| binary_search_line(file, **args) } | |
end | |
# The block acts as "needle", i.e. your block must return -1,0,1 or :next_line to indicate whether | |
# the needle was found. -1 means we're left of the needle, 0 means we found it, 1 means we're right | |
# of the needle. :next_line is to be used if the current line did not contain anything to compare | |
# to the needle (e.g. if you're looking for timestamps, and they're not on every line). | |
# | |
# @return [nil, Array<Integer>] | |
# Returns [from_offset, to_offset] of the first occurrence which is bigger than the needle | |
# Returns nil if there's no line which is bigger than the needle | |
def binary_search_line(file, start_at: 0, stop_at: nil) | |
raise ArgumentError, "file argument must be an IO" unless file.is_a?(IO) | |
previous_pos = -1 | |
cur_left = 0 | |
cur_right = 0 | |
left_boundary = start_at | |
right_boundary = stop_at || file.size-1 | |
position = (left_boundary+right_boundary) >> 1 | |
rv = nil | |
until left_boundary >= right_boundary || position == previous_pos | |
previous_pos = position | |
previous_bounds = [cur_left, cur_right] | |
cur_left, cur_right, line = read_line_at(file, position) | |
case rv = yield(line) | |
when -1 then left_boundary = cur_right; position = (left_boundary+right_boundary) >> 1 | |
when 1 then right_boundary = cur_left; position = (left_boundary+right_boundary) >> 1 | |
when 0 then return [cur_left, cur_right] | |
when :next_line then position = cur_right # do not move boundaries, only position (boundaries only move when something to compare was found) | |
else raise "your block must return -1, 0, 1 or :next_line - got #{rv.inspect}" | |
end | |
# p position: previous_pos, cur_left: cur_left, cur_right: cur_right, left_boundary: left_boundary, right_boundary: right_boundary | |
end | |
if rv == -1 | |
previous_bounds | |
else | |
[cur_left, cur_right] | |
end | |
end | |
def rails_log_within(path, start_time, end_time) | |
File.open(path, 'rb') { |file| | |
offset1, _, offset2, _ = [start_time, end_time].flat_map { |time| | |
binary_search_line(file) { |line| | |
time_string = line[/\[(\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d\.\d+)/, 1] | |
if time_string | |
cmp = Time.iso8601(time_string) | |
cmp <=> time | |
else | |
:next_line | |
end | |
} | |
} | |
file.seek(offset1, IO::SEEK_SET) | |
file.read(offset2-offset1+1) | |
} | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment