#!/usr/bin/ruby1.8 -w raise "URL ID ??" if ARGV[0].nil? require 'http-access2' require 'html/xmltree' require 'iconv' require 'dbi' url = "http://fr.allafrica.com/stories/#{ARGV[ 0 ]}.html" client=HTTPAccess2::Client.new( nil, "Mozilla/5.0 (X11; U; Linux ppc; en-US; rv:1.8.0.1) Gecko/20060324 Ubuntu/dapper Firefox/1.5.0.1", "admin@cybertigi.com" ) parser = HTMLTree::XMLParser.new(false,false) parser.feed(client.get(url).body.content) xml=parser.document title = "" data = "" xml.elements.each( '//title[1]' ) do |n| title = n.text.to_s.gsub(/allafrica\.com\:/i, '') end data << "#{title}

#{title}

" i = 0 bad_indexes = [1,2,3] xml.elements.each( '//p[>=5]' ) do |node| data << node.to_s unless bad_indexes.include? i i += 1 end data.gsub!("

HOME

", '') data = Iconv.iconv('utf-8', 'ISO-8859-1', data) data << "
" ip = ARGV[1].nil? ? "0.0.0.0" : ARGV[1].to_s begin dbh = DBI.connect("dbi:Mysql:******:buildafrica.org", "******", "******") dbh.do("INSERT INTO rbgan_news(titre,site,url,date) VALUES (\"#{title}\",\"#{ip}\",\"#{url}\", NOW())") rescue ensure dbh.disconnect if dbh end puts data