require 'fileutils' require 'date/format' require 'date' require 'time' require 'net/http' #Optimize to get rid of redundant code (3 separate copy pastes for 3 addresses) #Update to use Head call to check for existence of page. Cheaper than full get #Maybe should just parse his main pages to find URL's of items.... #Add links to bottom of items referencing his various sites def fetch_url(address) body = "" #Hit the URL and see if anything is there url = URI.parse(address) req = Net::HTTP::Head.new(url.path) res = Net::HTTP.start(url.host, url.port) {|http| http.request(req) } #puts res.http_version #puts res.message if(res.code == "200") #200 = HTTP OK puts res.code + " - " + address #Exists so go ahead and do a full get req = Net::HTTP::Get.new(url.path) res = Net::HTTP.start(url.host, url.port) {|http| http.request(req) } body = res.body end return body end def create_item(address, body, regexp, existing_filename, d, title) #puts address reg = Regexp.new('
') body = body.gsub(/<\/o:p>/i, '
') body = body.gsub(/style=".+?"/im, '') #get rid of any pesky non-ascii characters body = body.gsub(/[^\x20-\x7F]/, ' ') #body = body.gsub(/