require 'rubygems' require 'uri' require 'open-uri' require 'net/http' require 'json/pure' require 'cgi' require 'hpricot' def getGuardianData(da) api_key = "foo" count = 100 u ="http://api.guardianapis.com/content/search?format=json&after=#{da}&filter=/books&filter=/global/reviews&count=#{count}&api_key=#{api_key}" url = URI.parse u puts "getting #{url} of #{da}" req = Net::HTTP::Get.new(url.request_uri) begin res = Net::HTTP.new(url.host, url.port).start {|http|http.request(req) } end j = nil begin j = JSON.parse(res.body) rescue OpenURI::HTTPError=>e case e.to_s when /^404/ raise 'Not Found' when /^304/ raise 'No Info' end end return j end def getAMZData(name,author) id = "bar" amzURL = "http://webservices.amazon.co.uk/onca/xml?Service=AWSECommerceService&AWSAccessKeyId=#{id}&Operation=ItemSearch&SearchIndex=Books" amzURL = amzURL + "&Title=#{CGI.escape(name)}" if author != nil amzURL = amzURL + "&Author=#{CGI.escape(author)}" end url = URI.parse amzURL puts "getting #{url}" req = Net::HTTP::Get.new(url.request_uri) begin res = Net::HTTP.new(url.host, url.port).start {|http|http.request(req) } end doc = nil begin doc = Hpricot.XML(res.body.to_s) end return doc end begin # make the right date for the guardian request t = DateTime.now #this is GMT apparantly t1 = DateTime.now - 2 d = t.strftime("%Y%m%d") d1 = t1.strftime("%Y%m%d") j = getGuardianData(d1)["search"]["results"] # text value for printing txt = "
\nsee blog post for more information
" now = 0 while now < j.length arr = {} z = j[now]["id"] x = j[now]["linkText"] y = j[now]["publication"] a = j[now]["webUrl"] b = j[now]["trailImage"] c = j[now]["typeSpecific"]["body"] byline = j[now]["byline"] trailText = j[now]["trailText"] trailText.gsub!(x,"")#remove dupe text tags = j[now]["tags"] puts "#{z} #{x} #{y}" now = now+1 searchText = x author="" multi=false if searchText!=nil && searchText!="" txt << "#{trailText}
\n" end #process it item = doc.search("//Item")[0] if item!=nil puts (item/:ItemAttributes/:Author).inner_html puts (item/:ItemAttributes/:Title).inner_html amzU = (item/:DetailPageURL).inner_html amzASIN = (item/:ASIN).inner_html txt << "Amazon Link\n" txt << "| Google search for ASIN\n" #wait a little sleep(5) else txt << "Nothing found on Amazon\n" end txt << "| Full review\n" end txt << "\n