# Dogs of the FTSE # Copyright Doug Bromley # Released under Creative Commons Attribution-Noncommercial-Share Alike 3.0 require 'rubygems' require 'scrubyt' final_data = {} # We create our first extractor to get the FTSE 100 list from Yahoo ftse_list = Scrubyt::Extractor.define do fetch 'http://uk.finance.yahoo.com/q/cp?s=%5EFTSE' ftse_listing("/html/body/div/div/table/tr/td/table/tr/td/table/tr/td/b", { :generalize => true }) do stock_code("/a[1]") end.ensure_presence_of_pattern("stock_code") # The listing is split across two pages so we go to the next page and repeat next_page("Next", { :limit => 2 }) end # All of my scraped data is being put into a hash "ftse_100" ftse_100 = ftse_list.to_hash # Now for each ftse listing... ftse_100.each do |ftse_1| # ... get the stock code ... sc = ftse_1[:stock_code] # Our final extractor for searching a stock code and retrieving all relevent data (div yield, etc) # I get the fieldheader such as "Div Yield" as well as the field data such as "2.3%" co_data = Scrubyt::Extractor.define do fetch 'http://uk.finance.yahoo.com/' fill_textfield 's', sc submit co_fields("/html/body/div/div/table/tr/td/table/tr/td/table/tr", { :generalize => true }) do fieldheader("/td[1]") fielddata("/td[2]") end.ensure_presence_of_ancestor_node(:td, {"class"=>"yfnc_tabledata1"}) end # Adding all the stock data to a hash with the key being the stock code final_data[sc] = co_data.to_hash end # Here you can do what you want with this final set of data you got above. # Here I'm just outputing the "Dividend Yield" for each stock. # Note my check for the string "yield" final_data.each do |key, entry| puts "\n\n#{key}" entry.each do |dataset| if dataset[:fieldheader].include? "Yield" puts "#{dataset[:fieldheader]} #{dataset[:fielddata]}" end end end