Created
May 13, 2011 05:53
-
-
Save jamiew/970048 to your computer and use it in GitHub Desktop.
Noodling with different ways of expanding short URLs in bulk
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| require 'rubygems' | |
| require 'benchmark' | |
| require 'pp' | |
| require 'mechanize' | |
| require 'eventmachine' | |
| require 'em-http-request' | |
| def syncronous(urls) | |
| agent = Mechanize.new | |
| expanded_urls = urls.map{|url| | |
| begin | |
| resp = agent.head(url) | |
| resp.uri.to_s | |
| rescue | |
| STDERR.puts "Error fetching #{url.inspect} => #{$!.inspect}" | |
| end | |
| } | |
| return expanded_urls | |
| end | |
| def iterator(urls, concurrency = 10) | |
| expanded_urls = nil | |
| EventMachine.run do | |
| responses = EM::Iterator.new(urls, concurrency).map(proc{|url,iter| | |
| http = EventMachine::HttpRequest.new(url).head(:redirects => 1) | |
| http.callback { iter.return(http.response_header['LOCATION']) } | |
| }, proc{|responses| | |
| expanded_urls = responses | |
| EventMachine.stop | |
| }) | |
| end | |
| return expanded_urls | |
| end | |
| def multi(urls) | |
| expanded_urls = nil | |
| EventMachine.run do | |
| multi = EventMachine::MultiRequest.new | |
| urls.each{|url| multi.add(EventMachine::HttpRequest.new(url).head(:redirects => 1)) } | |
| multi.callback do | |
| expanded_urls = multi.responses[:succeeded].map{|r| r.response_header['LOCATION'] } | |
| EventMachine.stop | |
| end | |
| end | |
| return expanded_urls | |
| end | |
| # urls = ["http://owl.li/4TAMu", "http://nyti.ms/lXobRU", "http://youtu.be/ydbOwOpyF-o", "http://joint.im/beta/r/khPZl9sn4gT", "http://www.aim.com/av/", "http://nyti.ms/kMalpD", "http://nyti.ms/kushxM", "http://4sq.com/ls4SMM", "http://bzfd.it/lCFVTs", "http://29.media.tumblr.com/tumblr_ll3cdksMCl1qz6f9yo1_500.jpg", "http://bit.ly/mdmt3N", "http://bit.ly/juekXw", "http://nyti.ms/j4FEdf", "http://kck.st/l1PnlT", "http://j.mp/5epoi", "http://j.mp/kmrpev", "http://ttk.me/t4Bm5", "http://bit.ly/lTPQFN", "http://4sq.com/lFdbD1", "http://bit.ly/l6MXft", "http://nyti.ms/lSWIPU", "http://www.psfk.com/2011/05/the-quarterlife-crisis-young-insecure-and-depressed.html?sms_ss=twitter&at_xt=4dcc927f32bf1691,0", "http://su.pr/A1eCd9", "http://instagr.am/p/EK-_5/", "http://twitpic.com/4wv5oc", "http://bit.ly/lyeHQf", "http://bit.ly/jkOi8o", "http://4sq.com/mzo07c", "http://nyti.ms/iFlKgo", "http://wp.me/p4-Hw", "http://bzfd.it/llbwwg", "http://bit.ly/mv4h50", "http://bit.ly/j7RkNE", "http://mikufes.com", "http://mikufes.com/", "http://www.anime-expo.org/?p=6818", "http://bit.ly/k6hbm4", "http://nyti.ms/iLtXjX", "http://bit.ly/kHTq9e", "http://soupsoup.net/lTB4ym", "http://ttk.me/t4Bm1", "http://knowledge.wharton.upenn.edu/article.cfm?articleid=2755", "http://nyti.ms/jjTgpd", "http://bit.ly/kEW119", "http://su.pr/A6nk05", "http://instagr.am/p/ELE55/", "http://nyti.ms/kOseS2", "http://is.gd/fn8Vyz", "http://tcrn.ch/kx2nx0", "http://bit.ly/lR4bIZ", "http://bzfd.it/ka6KT7", "http://soupsoup.net/miMftM", "http://bit.ly/kHBc9P", "http://is.gd/ztnK1C", "http://youtu.be/qwvdxV26q8I", "http://ping.fm/yDzgX", "http://bit.ly/jvmnNp", "http://chzb.gr/rollercoasterchess", "http://bit.ly/hb3UbD", "http://bit.ly/lQ6WHv", "http://www.storyful.com/stories/gjdjks", "http://vhx.tv/73589", "http://econ.st/mJjaDX", "http://strawberrymoth.blogspot.com/2011/01/inspiring-style-files-presents.html", "http://twitpic.com/4wu6ll", "http://nyr.kr/lZDT7n", "http://nyti.ms/m1CfjD", "http://amarpai.com/bikemap/bikemap.html", "http://bit.ly/bjyRwD", "http://nyti.ms/io6orz", "http://nyti.ms/mtZNEn", "http://bit.ly/k9KYwL", "http://yfrog.com/h0xq7vqj", "http://hb.ly/mqh5Y9", "http://drbl.in/bkDf", "http://fncy.it/lEy1EK", "http://4sq.com/iRoVnp", "http://www.justin.tv/hatperson", "http://www.htmlfivewow.com/", "http://aim.com/av", "http://instagr.am/p/EK8mh/", "http://4sq.com/lLQRrp", "http://storify.com/hrheingold/students-selforganize-their-own-syllabus", "http://sfy.co/8Wm", "http://on.fb.me/kayJ2u", "http://frc.vc/3eN", "http://nyti.ms/ljFiEw", "http://bit.ly/kCjzKp", "http://twitpic.com/4wtowc", "http://nyti.ms/jViJ3s", "http://nyti.ms/iB1o3b", "http://bzfd.it/kTfJdD", "http://lockerz.com/s/101093774", "http://thepulsenetwork.com/technology/stevegarfield-tv-on-tpn/05-12-11-this-week-in-steve/", "http://thepulsenetwork.com/technology/stevegarfield-tv-on-tpn/05-12-11-video-soup/", "http://thepulsenetwork.com/technology/stevegarfield-tv-on-tpn/05-12-11-the-killing-of-bin-laden/"] | |
| urls = ["http://owl.li/4TAMu", "http://nyti.ms/lXobRU", "http://youtu.be/ydbOwOpyF-o", "http://joint.im/beta/r/khPZl9sn4gT", "http://www.aim.com/av/", "http://nyti.ms/kMalpD", "http://nyti.ms/kushxM", "http://4sq.com/ls4SMM", "http://bzfd.it/lCFVTs", "http://bit.ly/juekXw"] | |
| puts "Processing #{urls.length} URLs..." | |
| puts "\nSyncronously..." | |
| output0 = [] | |
| time = Benchmark.realtime{ output0 = syncronous(urls).compact.sort } | |
| pp output0.sort | |
| puts "Took #{time}s (#{(time*1000.0/urls.length.to_f).to_i}ms/url)" | |
| sleep 2 | |
| puts "\nEM::Iterator(2)..." | |
| output1 = [] | |
| time = Benchmark.realtime{ output1 = iterator(urls, 2).compact.sort } | |
| pp output1.sort | |
| puts "Took #{time}s (#{(time*1000.0/urls.length.to_f).to_i}ms/url)" | |
| sleep 2 | |
| puts "\nEM::Iterator(10)..." | |
| output2 = [] | |
| time = Benchmark.realtime{ output2 = iterator(urls, 10).compact.sort } | |
| pp output2.sort | |
| puts "Took #{time}s (#{(time*1000.0/urls.length.to_f).to_i}ms/url)" | |
| sleep 2 | |
| puts "\nEm::MultiRequest..." | |
| output3 = [] | |
| time = Benchmark.realtime{ output3 = multi(urls).compact.sort } | |
| pp output3.sort | |
| puts "Took #{time}s (#{(time*1000.0/urls.length.to_f).to_i}ms/url)" | |
| sleep 2 | |
| puts "\nDone!" | |
| puts "output0.length=#{output0.length} output1.length=#{output1.length} output2.length=#{output2.length} output3.length=#{output3.length}" | |
| puts "0==1?#{output0 == output1} 1==2?#{output1 == output2} 2==3?#{output2 == output3}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment