Created
January 28, 2022 07:06
-
-
Save oreoshake/51281b1247e3ac33d82ac7390af951e1 to your computer and use it in GitHub Desktop.
Large files and ruby
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require "benchmark-memory" | |
require "benchmark" | |
require "benchmark/ips" | |
TEST_FILE = ARGV.first | |
# TODO this might not be ideal but is a good start (apostrophes aren't at the beginning of a word for example, hyphens can't be at the beginning or end, there may be more punctuation to consider) | |
WORD_SPLIT = /[\w'-]+/ | |
def lazy_grep | |
IO.foreach(TEST_FILE).lazy.grep(WORD_SPLIT).tally | |
end | |
def grep | |
IO.foreach(TEST_FILE).grep(WORD_SPLIT).tally | |
end | |
def lazy_hipster | |
IO.foreach(TEST_FILE).lazy.flat_map {|line| line.scan(WORD_SPLIT)}.tally | |
end | |
def hipster | |
IO.foreach(TEST_FILE).flat_map {|line| line.scan(WORD_SPLIT)}.tally | |
end | |
def basic | |
IO.foreach(TEST_FILE).each_with_object(Hash.new(0)) do |line, hash| | |
line.scan(WORD_SPLIT).each do |word| | |
hash[word] += 1 | |
end | |
end | |
end | |
def each_line | |
hash = Hash.new(0) | |
File.read(TEST_FILE).each_line do |line| | |
line.split.each do |word| | |
hash[word] += 1 | |
end | |
end | |
hash | |
end | |
SUITES = %w(lazy_grep grep lazy_hipster hipster basic each_line) | |
Benchmark.memory do |x| | |
SUITES.each do |suite| | |
x.report(suite) { send(suite) } | |
end | |
x.compare! | |
end | |
Benchmark.bm do |x| | |
SUITES.each do |suite| | |
x.report(suite) { send(suite) } | |
end | |
end | |
Benchmark.ips do |x| | |
SUITES.each do |suite| | |
x.report(suite) { send(suite) } | |
end | |
x.compare! | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Calculating ------------------------------------- | |
lazy_grep 157.265M memsize ( 0.000 retained) | |
2.000M objects ( 0.000 retained) | |
50.000 strings ( 0.000 retained) | |
non_lazy_grep 168.901M memsize ( 0.000 retained) | |
2.000M objects ( 0.000 retained) | |
50.000 strings ( 0.000 retained) | |
lazy_hipster 678.973M memsize ( 0.000 retained) | |
8.472M objects ( 0.000 retained) | |
50.000 strings ( 0.000 retained) | |
hipster 727.592M memsize ( 0.000 retained) | |
8.472M objects ( 0.000 retained) | |
50.000 strings ( 0.000 retained) | |
basic 678.972M memsize ( 0.000 retained) | |
8.472M objects ( 0.000 retained) | |
50.000 strings ( 0.000 retained) | |
each_line 513.663M memsize ( 0.000 retained) | |
6.472M objects ( 0.000 retained) | |
50.000 strings ( 0.000 retained) | |
Comparison: | |
lazy_grep: 157265432 allocated | |
non_lazy_grep: 168900728 allocated - 1.07x more | |
each_line: 513663391 allocated - 3.27x more | |
basic: 678972450 allocated - 4.32x more | |
lazy_hipster: 678973490 allocated - 4.32x more | |
hipster: 727592162 allocated - 4.63x more | |
user system total real | |
lazy_grep 2.159524 0.032121 2.191645 ( 2.194424) | |
non_lazy_grep 3.372937 0.059949 3.432886 ( 3.438654) | |
lazy_hipster 5.949340 0.072011 6.021351 ( 6.022157) | |
hipster 6.561818 0.048044 6.609862 ( 6.612127) | |
basic 5.530344 0.027989 5.558333 ( 5.559264) | |
each_line 4.229015 0.039961 4.268976 ( 4.269845) | |
Warming up -------------------------------------- | |
lazy_grep 1.000 i/100ms | |
non_lazy_grep 1.000 i/100ms | |
lazy_hipster 1.000 i/100ms | |
hipster 1.000 i/100ms | |
basic 1.000 i/100ms | |
each_line 1.000 i/100ms | |
Calculating ------------------------------------- | |
lazy_grep 0.508 (± 0.0%) i/s - 3.000 in 5.930276s | |
non_lazy_grep 0.648 (± 0.0%) i/s - 4.000 in 6.217387s | |
lazy_hipster 0.171 (± 0.0%) i/s - 1.000 in 5.838436s | |
hipster 0.162 (± 0.0%) i/s - 1.000 in 6.190860s | |
basic 0.183 (± 0.0%) i/s - 1.000 in 5.475410s | |
each_line 0.284 (± 0.0%) i/s - 2.000 in 7.041673s | |
Comparison: | |
non_lazy_grep: 0.6 i/s | |
lazy_grep: 0.5 i/s - 1.28x (± 0.00) slower | |
each_line: 0.3 i/s - 2.28x (± 0.00) slower | |
basic: 0.2 i/s - 3.55x (± 0.00) slower | |
lazy_hipster: 0.2 i/s - 3.78x (± 0.00) slower | |
hipster: 0.2 i/s - 4.01x (± 0.00) slower |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment