Created
April 14, 2010 17:58
-
-
Save francisoud/366112 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Configuration: | |
# change 'username' | |
# change 'max_pages' | |
# it's the number of pages of bookmarks | |
# you need to go to delicious and find the exact number manually | |
# see Watir::Browser.xxx for extra config parameters | |
# | |
# This code: | |
# 1- open a browser to your delicious page | |
# 2- fetch each link | |
# 3- only keep bookmarks | |
# 4- check each bookmark http status code | |
# 5- if status code is wrong: display link in error and open a browser windo to it | |
# 6- fetc next page and go on | |
# ... | |
# 999- you need to manually delete the failling link | |
# | |
# Watir installation: http://wiki.openqa.org/display/WTR/Installation | |
require 'watir' | |
require 'net/http' | |
username= 'xxx' | |
max_pages = 3 | |
def display_error(link, msg) | |
puts "*** #{link} *** \t\t => \t #{msg}" | |
end | |
# open a browser with the error page to viculy checkit | |
def error_and_browser(link, msg) | |
display_error(link, msg) | |
Watir::Browser.start link | |
end | |
# Watir::Browser.default = "ie" # ie , firefox , safari | |
# Watir::Browser.speed = 'zippy' # "fast", "slow", "zippy" | |
# Watir::Browser.visible = true | |
b = Watir::Browser.new | |
# look in each page | |
(1..max_pages).each do |page| | |
puts "\n\t\t\t=== page: #{page} of #{max_pages} ===" | |
b.goto("http://delicious.com/#{username}?page=#{page}") | |
# only keep http:// links | |
hrefs = b.links.collect {|link| link.href if link.href =~ /http/} | |
# remove username links | |
hrefs = hrefs.compact.collect {|href| href unless href.include? username} | |
# remove all delicious and yahoo links | |
links = hrefs.compact.collect {|href| href unless href =~ /(delicious.com)|(info.yahoo.com)/} | |
links.compact.each do |link| | |
begin | |
response = Net::HTTP.get_response URI.parse(link) | |
case response | |
when Net::HTTPSuccess then puts "\tok: #{link}" | |
when Net::HTTPFound then puts "\tok (302): #{link}" | |
else | |
error_and_browser(link, "#{response.code} #{response.message}") | |
end | |
rescue Errno::ETIMEDOUT => detail | |
display_error(link, detail) | |
rescue Net::HTTPBadResponse => detail | |
display_error(link, detail) | |
rescue EOFError => detail | |
display_error(link, detail) | |
rescue Timeout::Error => detail | |
display_error(link, detail) | |
rescue Errno::ECONNRESET => detail | |
display_error(link, detail) | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment