Created
February 7, 2015 14:59
-
-
Save ccocchi/f8d58626a9ac1b9536e5 to your computer and use it in GitHub Desktop.
Profanity filter
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| require 'benchmark/ips' | |
| text = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aliquam ullamcorper eleifend velit sed placerat. In libero leo, fringilla ut mauris nec, bibendum varius nisi. Nam rhoncus facilisis lacinia. Integer massa quam, vestibulum eget est sed, elementum vulputate nibh. Mauris fermentum tellus eu commodo blandit. Cras ornare, risus id semper lacinia, est urna maximus dolor, aliquam maximus metus sem a velit. Suspendisse vel libero eu leo volutpat eleifend accumsan nec massa. Integer eu vulputate quam, ac tristique arcu. Fusce et justo vitae nulla maximus lacinia. Integer dolor massa, finibus vel mattis a, porta eu erat. Suspendisse potenti. Sed ex magna, imperdiet sit amet augue ac, vestibulum vehicula mi. Praesent sapien neque, bibendum eget lorem fermentum, molestie semper leo.' | |
| words_10 = %w(foo bar string amp baguette marine grenadine quezac bambou panda) | |
| words_100 = words_10 * 10 | |
| regexp_10 = Regexp.new(words_10.join('|'), Regexp::IGNORECASE) | |
| regexp_100 = Regexp.new(words_100.join('|'), Regexp::IGNORECASE) | |
| r_array_10 = words_10.map { |w| /\b#{w}\b/i } | |
| r_array_100 = words_100.map { |w| /\b#{w}\b/i } | |
| puts 'Worst case: no element found' | |
| puts "Text size: #{text.size}" | |
| Benchmark.ips do |x| | |
| x.report('regexp_10') { (regexp_10 =~ text) != nil } | |
| x.report('regexp_100') { (regexp_100 =~ text) != nil } | |
| x.report('regexp_loop_10') { | |
| r_array_10.each do |foul| | |
| break(true) if foul =~ text | |
| end | |
| false | |
| } | |
| x.report('regexp_loop_100') { | |
| r_array_100.each do |foul| | |
| break(true) if foul =~ text | |
| end | |
| false | |
| } | |
| x.report('array#&_10') { (words_10 & text.split(/\W+/).map! { |w| w.downcase!; w }).empty? } | |
| x.report('array#&_100') { (words_100 & text.split(/\W+/).map! { |w| w.downcase!; w }).empty? } | |
| x.report('array_loop_10') { | |
| text.split(/\W+/).each do |word| | |
| word.downcase! | |
| break(true) if words_10.include?(word) | |
| end | |
| false | |
| } | |
| x.report('array_loop_100') { | |
| text.split(/\W+/).each do |word| | |
| word.downcase! | |
| break(true) if words_100.include?(word) | |
| end | |
| false | |
| } | |
| end | |
| puts | |
| words_10 = %w(integer bar string amp baguette marine grenadine quezac bambou panda) | |
| words_100 = words_10 * 10 | |
| regexp_10 = Regexp.new(words_10.join('|'), Regexp::IGNORECASE) | |
| regexp_100 = Regexp.new(words_100.join('|'), Regexp::IGNORECASE) | |
| r_array_10 = words_10.map { |w| /\b#{w}\b/i } | |
| r_array_100 = words_100.map { |w| /\b#{w}\b/i } | |
| puts 'Best case: first element match' | |
| puts "Text size: #{text.size}" | |
| Benchmark.ips do |x| | |
| x.report('regexp_10') { (regexp_10 =~ text) != nil } | |
| x.report('regexp_100') { (regexp_100 =~ text) != nil } | |
| x.report('regexp_loop_10') { | |
| r_array_10.each do |foul| | |
| break(true) if foul =~ text | |
| end | |
| false | |
| } | |
| x.report('regexp_loop_100') { | |
| r_array_100.each do |foul| | |
| break(true) if foul =~ text | |
| end | |
| false | |
| } | |
| x.report('array#&_10') { (words_10 & text.split(/\W+/).map! { |w| w.downcase!; w }).empty? } | |
| x.report('array#&_100') { (words_100 & text.split(/\W+/).map! { |w| w.downcase!; w }).empty? } | |
| x.report('array_loop_10') { | |
| text.split(/\W+/).each do |word| | |
| word.downcase! | |
| break(true) if words_10.include?(word) | |
| end | |
| false | |
| } | |
| x.report('array_loop_100') { | |
| text.split(/\W+/).each do |word| | |
| word.downcase! | |
| break(true) if words_100.include?(word) | |
| end | |
| false | |
| } | |
| end | |
| # Benchmark.compare *result |
Author
Changing
r_array_10 = words_10.map { |w| /\b#{w}\b/i }
r_array_100 = words_100.map { |w| /\b#{w}\b/i }
for
r_array_10 = words_10.map { |w| /#{w}/i }
r_array_100 = words_100.map { |w| /#{w}/i }
To be equivalent.
Worst case: no element found
Text size: 789
Calculating -------------------------------------
regexp_10 1.608k i/100ms
regexp_100 216.000 i/100ms
regexp_loop_10 3.939k i/100ms
regexp_loop_100 400.000 i/100ms
array#&_10 869.000 i/100ms
array#&_100 785.000 i/100ms
array_loop_10 752.000 i/100ms
array_loop_100 163.000 i/100ms
-------------------------------------------------
regexp_10 16.428k (± 4.2%) i/s - 83.616k
regexp_100 2.149k (± 5.3%) i/s - 10.800k
regexp_loop_10 40.490k (± 4.3%) i/s - 204.828k
regexp_loop_100 4.011k (± 4.8%) i/s - 20.400k
array#&_10 8.826k (± 4.7%) i/s - 44.319k
array#&_100 8.102k (± 4.2%) i/s - 40.820k
array_loop_10 7.459k (± 4.2%) i/s - 37.600k
array_loop_100 1.633k (± 4.2%) i/s - 8.150k
Best case: first element match
Text size: 789
Calculating -------------------------------------
regexp_10 4.982k i/100ms
regexp_100 668.000 i/100ms
regexp_loop_10 63.389k i/100ms
regexp_loop_100 64.403k i/100ms
array#&_10 879.000 i/100ms
array#&_100 794.000 i/100ms
array_loop_10 1.282k i/100ms
array_loop_100 507.000 i/100ms
-------------------------------------------------
regexp_10 50.999k (± 3.9%) i/s - 259.064k
regexp_100 6.781k (± 5.1%) i/s - 34.068k
regexp_loop_10 1.139M (± 5.2%) i/s - 5.705M
regexp_loop_100 1.133M (± 6.3%) i/s - 5.667M
array#&_10 8.770k (± 4.0%) i/s - 43.950k
array#&_100 8.210k (± 4.4%) i/s - 41.288k
array_loop_10 13.040k (± 4.3%) i/s - 65.382k
array_loop_100 5.152k (± 5.5%) i/s - 25.857k
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Worst case: no element found Text size: 789 Calculating ------------------------------------- regexp_10 541 i/100ms regexp_100 75 i/100ms regexp_loop_10 1649 i/100ms regexp_loop_100 173 i/100ms array#&_10 325 i/100ms array#&_100 294 i/100ms array_loop_10 275 i/100ms array_loop_100 61 i/100ms ------------------------------------------------- regexp_10 5511.7 (±0.8%) i/s - 27591 in 5.006269s regexp_100 762.5 (±1.2%) i/s - 3825 in 5.017255s regexp_loop_10 15789.5 (±11.8%) i/s - 79152 in 5.092684s regexp_loop_100 1701.2 (±5.2%) i/s - 8650 in 5.099933s array#&_10 2941.1 (±10.1%) i/s - 14625 in 5.028964s array#&_100 2587.5 (±10.9%) i/s - 12936 in 5.071354s array_loop_10 2731.7 (±4.1%) i/s - 13750 in 5.042852s array_loop_100 589.2 (±5.6%) i/s - 2989 in 5.089925s Best case: first element match Text size: 789 Calculating ------------------------------------- regexp_10 1595 i/100ms regexp_100 236 i/100ms regexp_loop_10 18586 i/100ms regexp_loop_100 18615 i/100ms array#&_10 327 i/100ms array#&_100 304 i/100ms array_loop_10 479 i/100ms array_loop_100 194 i/100ms ------------------------------------------------- regexp_10 17059.1 (±0.7%) i/s - 86130 in 5.049162s regexp_100 2346.3 (±3.0%) i/s - 11800 in 5.034060s regexp_loop_10 400161.7 (±4.7%) i/s - 2007288 in 5.029350s regexp_loop_100 401668.4 (±2.9%) i/s - 2010420 in 5.009522s array#&_10 3286.2 (±3.6%) i/s - 16677 in 5.082299s array#&_100 3001.9 (±4.8%) i/s - 15200 in 5.076940s array_loop_10 4487.5 (±9.8%) i/s - 22513 in 5.069066s array_loop_100 1894.1 (±6.1%) i/s - 9506 in 5.040303s