#
# $Id: runner.rb,v 1.1 2004/01/10 07:37:28 hn Exp $
# Copyright (c) Narushima Hironori.
#

require 'uri'
require 'linkchecker/checkers'

module LinkChecker

class Runner

	def initialize
		@check_file_extensions = %w!html htm!
		@link_regexps = [/href="(.+?)"/, /src="(.+?)"/]
		@ignore_link_regexp = []
		
		@checkers = {
			nil => FileChecker.new,
			'http' => HTTPChecker.new,
			'ftp' => FTPChecker.new,
		}
		
		@checked_outernal_url = {}
	end

	attr_reader :check_file_extensions,
		:link_regexps,
		:ignore_link_regexp,
		:checkers

	#
	# Testing start as specified resources.
	#
	# #run(resources_array) or #run(resource)
	#
	def run(rs)
		@checked_outernal_url.clear
	
		rs = [rs] if rs.instance_of?(String)
		rs.each { |r|
			if File.directory?(r)
				run_dir(r)
			elsif File.file?(r)
				run_file(r)
			end
		}
		$stdout << "done\n"
	end

	def run_dir(dir)
		Dir.glob( File.join(dir, '**/*') ).select { |f|
			File.file?(f) and @check_file_extensions.include?( File.extname(f).gsub(/^\./, '') )
		}.each { |f|
			run_file(f)
		}
	end

	def run_file(file)
		$stdout << "#{file}\n"
		$stdout.flush
		choose_links(file) { |url, i|
			msg = begin
				uri = URI.parse(url)
				if checker = @checkers[uri.scheme]
					unless @checked_outernal_url.key?(uri)
						@checked_outernal_url[uri] =
							checker.method(:check).arity == 1 ?
								checker.check(uri) : checker.check(uri, file)
					end
					@checked_outernal_url[uri]
				else
					nil
				end
			rescue
				$!.message
			end

			if msg
				$stdout << "\t#{url},#{i},#{msg}\n"
				$stdout.flush
			end
		}
	end

	def choose_links(file)
		i = 0
		c = 0
		lines = IO.read(file)
		lines.each { |line|
			i += 1
			@link_regexps.each { |reg|
				line.scan(reg) { |url|
					u = url[0]
					idx = lines.index(u, c)
					
					if lines.rindex('<', idx) > lines.rindex('>', idx) and
						lines.index('>', idx) < lines.index('<', idx) and
						!@ignore_link_regexp.any? { |r| r === u }
						
						yield u, i
					end
				}
			}
			c += line.size
		}
	end

end

end