#!/opt/local/bin/ruby

require 'net/http'
require 'thread'
require 'optparse'
require 'rexml/document'
require 'rexml/streamlistener'
require 'set'
require 'rubygems'
require 'rubyful_soup'
require 'logger'
require 'yaml'

# Arguments parsing
class CatArguments < Hash
	def initialize(args)
 		super()
		self[:debug] = false
		self[:verbose] = false
		self[:pools] = 1
		self[:depth] = 999999
		self[:redirections] = 10
		self[:excuded_ext] = { "php" => "application/php", "asp"  => "application/asp", "jsp"  => "application/jsp", "rb"  => "application/ruby", "rbx"  => "application/ruby" }
		self[:title] = "Untitled"
		self[:output_format] = "OPML"
		# colors should be kept ordered inside same MIME category (see text after text/XXX : pattern looking stops on first match !)
		self[:colors] = [ 	
			["INVALID" , "red"],
			["EXTERNAL" , "crimson"],
			["DEFAULT" , "black"],
			["HOME" , "lightgray"],
			["HOMEREF" , "violetred"],
			["UNKNOWN" , "salmon"],
			["text/html" , "steelblue2"], 
			["text/css" , "cornflowerblue"], 
			["text" , "aliceblue"],
			["application/x-stuffit" , "brown"], 
			["application/zip" , "brown"], 
			["application/pdf" , "brown"], 
			["application/x-gzip" , "brown"], 
			["application/php" , "beige"], 
			["application/asp" , "beige"], 
			["application/jsp" , "beige"], 
			["application/ruby" , "beige"], 
			["application" , "sienna"], 
			["image" , "yellowgreen"]
		]
		self[:output_file] = nil
		self[:input_file] = nil
		self[:one_perpage] = false
			
		opts = OptionParser.new do |opts|
			opts.banner = "Usage: #$0 [options] (url ...  | -i file)"

			opts.on('-d', '--debug', 'turn on debugging') do
				self[:debug] = true
			end

			opts.on('-p', '--pools [INTEGER]', 'number of thread to use') do |string|
				self[:pools] = string.to_i || 1
			end

			opts.on('-v', '--verbose', 'turn on verbose mode') do
				self[:verbose] = true
			end

			opts.on('-t', '--title [STRING]', 'document title') do |string|
				self[:title] = string
			end

			opts.on('-n', '--ndeep [INTEGER]', 'number of levels to walk thru URLs (default: 999999)') do |string|
				self[:depth] = string.to_i || 1
				self[:depth] = 999999 if self[:depth] <= 0
			end

			opts.on('-r', '--nredirection [INTEGER]', 'max number of redirection to follow when fetching URLs (default: 10)') do |string|
				self[:redirections] = string.to_i || 1
				self[:redirections] = 999999 if self[:redirections] <= 0
			end

			opts.on('-f', '--format [STRING]', 'output format [OPML | XOXO | DOT] (default OPML)') do |string|
				string.upcase!
				
				self[:output_format] = "OPML" if ["OPML", "OPM", "OP", "O"].include?(string)
				self[:output_format] = "XOXO" if ["XOXO", "XOX", "XO", "X"].include?(string)
				self[:output_format] = "DOT" if ["DOT", "DO", "D", "DIGRAPH", "DI"].include?(string)
			end
			
			opts.on('-o', '--output [STRING]', 'output data structure to YAML format') do |string|
				self[:output_file] = string
			end
			
			opts.on('-i', '--input [STRING]', 'input file in YAML format from previous execution with -o option') do |string|
				self[:input_file] = string
			end
			
			opts.on('-c', '--colors [STRING]', 'string is "mime=color;..." or @filename (YAML format)') do |string|
				if string[0,1] == "@"
					path = string.gsub("@","")
					colors = self[:colors]
					colors = open(path) { |f| YAML.load(f) }
				else
					colors = Array.new
					string.split(";").each { |color_def| colors << color_def.split("=") }
				end
				self[:colors] = colors
			end
			
			opts.on('-C', '--dumpcolors [STRING]', 'dump colors to filename (YAML format)') do |string|
				open(string, 'w') do |f|
					YAML.dump(self[:colors], f)
				end
			end
			
			opts.on('-e', '--excluded [STRING]', 'string is "extension=mime;..." or @filename (YAML format)') do |string|
				if string[0,1] == "@"
					path = string.gsub("@","")
					excluded_ext = self[:excuded_ext]
					excluded_ext = open(path) { |f| YAML.load(f) }
				else
					excluded_ext = Hash.new
					excluded_ext = string.split(";").each do |ext_def| 
						hash_def = ext_def.split("=")
						excluded_ext[hash_def[0]] = hash_def[1]
					end
				end
				self[:excuded_ext] = excluded_ext
			end
			
			opts.on('-E', '--dumpextenstions [STRING]', 'dump excluded extensions to filename (YAML format)') do |string|
				open(string, 'w') do |f|
					YAML.dump(self[:excuded_ext], f)
				end
			end
			
			opts.on('-S', '--separate', 'outputs one DOT layer per depth level found (usefull only if option -o DOT is also selected)') do
				self[:one_perpage] = true
			end
			
			opts.on('-D', '--dumpdefaults [STRING]', 'dump defaults do filename and quits (any option before -D is taken into account)') do |string|
				open(string, 'w') do |f|
					YAML.dump(self,f)
				end
				exit 0
			end
			
			opts.on('-I', '--usedefaults [STRING]', 'read defaults from file') do |string|
				defaults = open(string) { |f| YAML.load(f) }
				if defaults
					defaults.each do |k,v|
						self[k] = v
					end
				else
					$LOG.error "Can't read defaults from #{string}"
					exit 1
				end
			end
						
			opts.on_tail('-h', '--help', 'Show help and quits') do
				puts opts
				exit 0
			end
		end
		opts.parse!(args)
	end
end

# Extends Object to support synchronize à la Java
class Object
		def synchronize
			mutex.synchronize { yield self }
		end
		def mutex
			@mutex ||= Mutex.new
		end
end

# Extends Array to add queue and dequeue as synonyms of push and shift resp.
class Array
	alias :queue :push
	alias :dequeue	:shift
end

# To support multiple connections using threads
class ThreadPool
	def	initialize(max_size)
		@pool = []
		@max_size = max_size
		@pool_mutex = Mutex.new
		@pool_cv = ConditionVariable.new
	end
	
	def dispatch(*args)
		Thread.new do
			@pool_mutex.synchronize do
				while @pool.size >= @max_size
					print "Pool full, waiting... \n" if $DEBUG
					@pool_cv.wait(@pool_mutex)
				end
			end
			
			@pool << Thread.current
			
			begin
				yield(*args)
			rescue => e
				exception(self, e, *args)
			ensure
				@pool_mutex.synchronize do
					@pool.delete(Thread.current)
					@pool_cv.signal
				end
			end
		end
	end
	
	def exception(thread, exception, *original_args)
		$LOG.error("Exception in thread #{thread}; #{exception}")
	end

	def shutdown
		@pool_mutex.synchronize { @pool_cv.wait(@pool_mutex) until @pool.empty? }
	end

	def empty?
		@pool.empty?
	end
	
end

# To parse links in HTML text
#    change parse at you taste to follow more or less type of links
class LinkParser 
	attr_reader :links
	
	def initialize
		@links = Set.new
	end
	
	def parse(html)
		bs = BeautifulStoneSoup.new(html)
		bs.find_all('a').each do |mark|
			@links << mark['href'] if mark['href']
		end
		bs.find_all('img').each do |mark|
			@links << mark['src'] if mark['src']
		end
		bs.find_all('link').each do |mark|
			@links << mark['href'] if mark['href']
		end
		bs.find_all('script').each do |mark|
			@links << mark['src'] if mark['src']
		end
	end
	
end

# Extends URI with simplify method to remove '/..' from URL paths
#    needed to avoid fetch loops
module URI
	def self.simplify(urlString)
		uri = URI::parse(urlString)
		paths = uri.path.split("/")
		new_paths = Array.new

		paths.each do |p|
			if p == ".."
				new_paths.pop
			else
				new_paths << p
			end
		end
	
		newpath = "/"  + new_paths.join("/")
		newpath.gsub!("//","/")
		uri.path = newpath
		return uri.to_s 
	end
end

# Extends Net::HTTP to support request_head as class method
module Net
	class HTTP

 	   def HTTP.request_head(uri_or_host, path = nil, port = nil, &block)
	      if path
	        host = uri_or_host
	        new(host, port || HTTP.default_port).start {|http|
	          return http.request_head(path, nil, &block)
	        }
	      else
	        uri = uri_or_host
	        new(uri.host, uri.port).start { |http|
	          return http.request_head(uri.request_uri, nil, &block)
	        }
	      end
	    end
	end
end

# Tests if file extension is one allowed to be followed
#    needed to avoid following dynamic pages 
#    adapt CatArguments[:excuded_ext] to your needs
def is_allowed_ext(url_str)
	url_array = url_str.split("/")

	if /(.*)\.(.*)/.match(url_array.last)
		extension = $2
		$dontfollow_extensions.each do |a|
			re = a[0]
			return [false, a[1]] if re.match(extension)
		end		
	end
	
	return [true,nil]
end

# Main processing of URLs (recursive)
def process_url(url , from_url = nil, level = 1)
	$max_level = level if level > $max_level
	
	followed_url = url
	uri = URI::parse(url)

	followed = $redirection_limit
	done = false
	response = nil
	
	while ! done
		response = Net::HTTP.request_head( uri )	
		case response
			when Net::HTTPRedirection
				followed -= 1
				followed_url = response['location']
				uri = URI::parse(followed_url)
		else
			done = true
		end
 	end
	uri_parts = URI::split(followed_url)
	
	# => Making the base_url from where we got...
	if (uri_parts[5] != nil)
		split_path = uri_parts[5].split("/")
		split_path.pop
		base_url = "#{uri_parts[0]}://#{uri_parts[2]}/" + split_path.join("/") + "/"
	else
		base_url = "#{uri_parts[0]}://#{uri_parts[2]}" + "/"
	end

	base_url.gsub!("//","/")
	base_url.gsub!("http:/","http://")
		
	case response
	when Net::HTTPSuccess
		# => Net::HTTPSuccess
		$LOG.info("response['Content-type'] #{response['Content-type']}") if $VERBOSE
		content_type = response['Content-type']
		if /(.*);(.*)/.match(content_type)
			content_type = $1
		end

		$url_processed.synchronize { |h| h[url] = { "HTTPResponse" => response.code,"Content-type" => content_type, "level" => level } }
		$url_processed.synchronize { |h| h[followed_url] = { "HTTPResponse" => response.code,"Content-type" => content_type, "level" => level } } if (followed_url != url)

		if content_type.include?("text/html")
			response = Net::HTTP.get_response( uri )

			parser = LinkParser.new
			parser.parse(response.body)
			links = parser.links

			links.each do |lk|
				begin
					new_uri = lk.clone
					new_uri_parts = URI::split(new_uri) rescue [nil, nil, nil, nil, nil, new_uri]
		
					if (/^http/.match(new_uri_parts[0])) || (new_uri_parts[0] == nil)
						new_uri = base_url + new_uri_parts[5] if ! new_uri_parts[0] 
						new_uri = URI::simplify(new_uri)
					
						valid = nil
					
						if ! $url_processed.has_key?(new_uri)
							r = is_allowed_ext(new_uri)
							allowed = r[0]
							content_type = r[1] if r[1]
						
							if allowed
								allowed = false
								new_uri_parts = URI::split(new_uri)
								new_uri_base = "#{new_uri_parts[0]}://#{new_uri_parts[2]}"

								base_url_parts = URI::split($base_url)
								base_url_base = "#{base_url_parts[0]}://#{base_url_parts[2]}"

								allowed = (new_uri_base.casecmp(base_url_base) == 0)
							end
						
							content_type = "unknown" if (level >= $levels)

							if allowed && (level < $levels)
								$url_stack.synchronize do |s| 
									s.queue([new_uri, url, level+1])
								end  
							else
								$url_processed.synchronize do |h| 
									if h.has_key?(new_uri)
										h1 = h[new_uri]
										if h1.has_key?("level")
											h1["level"] = level+1	if level+1 < h1["level"] 
										else
											h1["level"] = level+1
										end
									else
										h[new_uri] = { "level" => level+1 }
									end
								end
								# we need to increase max_level here because this level will not be processed later...
								$max_level += 1 if level == $max_level
							end 
						
						else
							processed_rec = $url_processed[new_uri]
							processed_rec.synchronize do |processed_rec| 
								if processed_rec.has_key?("level")
									processed_rec["level"] = level	if level < processed_rec["level"] 
								else
									processed_rec["level"] = level
								end
							end
							allowed = processed_rec["allowed"]
							valid = processed_rec["valid"]
							content_type = processed_rec["Content-type"]
						end

						$url_tree.synchronize do |h|
							if from_url
								fathertree = $url_tree[from_url]
								cur_record = fathertree[url]
								cur_record["valid"] = true
							end
						
							if (new_uri == url)
								$url_processed.synchronize do |h| 
									if h.has_key?(url)
										h1 = h[url]
										h1["Self-Reference"] = true 
									else
										h[url] = { "Self-Reference" => true }
									end
								end
							else
								if new_uri == $base_url
									$url_processed.synchronize do |h| 
										if h.has_key?(url)
											h1 = h[url]
											h1["Home-Reference"] = true 
										else
											h[url] = { "Home-Reference" => true }
										end
									end
								else
									if $url_tree.has_key?(url)
										subtree = $url_tree[url]
									else
										subtree = Hash.new
										$url_tree[url] = subtree
									end

									if subtree.has_key?(new_uri)
										new_record = subtree[new_uri]
									else
										new_record = Hash.new
										subtree[new_uri] = new_record
									end

									new_record["allowed"] = allowed if (allowed != nil)
									new_record["valid"] = valid if (valid != nil)
									if /(.*);(.*)/.match(content_type)
										content_type = $1
									end
									new_record["Content-type"] = content_type if (content_type != nil)
								end
							end
						end
					end
				rescue
					if $DEBUG
						$LOG.error "EXCEPTION #{e.backtrace.join("\n ")}"
						$LOG.error e.to_s
					end
				end
			end
		else
			# response is not text/html
			if $url_processed.has_key?(url)
				$url_processed.synchronize do |url_processed| 
					h = url_processed[url] 
					h["level"] = level if level < h["level"]
				end
			else
				$url_processed.synchronize { |url_processed| url_processed[url] = { "HTTPResponse" => response.code,"Content-type" => content_type, "level" => level } }
			end
			$url_tree.synchronize do |h|
				if $url_tree.has_key?(from_url)
					subtree = $url_tree[from_url]
				else
					subtree = Hash.new
					$url_tree[from_url] = subtree
				end
				if subtree.has_key?(url)
					new_record = subtree[url]
				else
					new_record = Hash.new
					subtree[url] = new_record
				end			
				new_record["allowed"] = true
				new_record["valid"] = true
				new_record["Content-type"] = content_type
			end
		end
	else
		# not HTTPSuccess
		$url_processed.synchronize { |h| h[url] = { "HTTPResponse" => response.code, "level" => level } }
		$url_tree.synchronize do |h|
			if $url_tree.has_key?(from_url)
				subtree = $url_tree[from_url]
			else
				subtree = Hash.new
				$url_tree[from_url] = subtree
			end
			if subtree.has_key?(url)
				new_record = subtree[url]
			else
				new_record = Hash.new
				subtree[url] = new_record
			end			
			new_record["allowed"] = nil
			new_record["valid"] = false
			new_record["httperror"] = response.code
		end
	end
	
	case response
	when Net::HTTPSuccess
		true
	else
		false
	end
end

# OPML output (recursive)
def opml_dump_node(xml_node, key, status, values, processed_stack)
	$LOG.info( "> #{key}") if $VERBOSE
	uri_key = URI::parse(key)
	new_node = xml_node.add_element("outline")
	new_node.add_attribute("text", key)
	new_node.add_attributes(status)

	processed_stack << key
	
	if values
		values.each do |url,status_rec|
			if ! processed_stack.include?(url)
				uri_url = URI::parse(url)

				dump = (uri_key.scheme != uri_url.scheme)
				dump ||= (uri_key.host != uri_url.host)			
				dump ||= ! uri_key.path.include?( uri_url.path )
			
				if dump
					opml_dump_node( new_node, url, status_rec, $url_tree[url], processed_stack)
				end
			else
				sub_node = new_node.add_element("link") 
				sub_node.add_attribute("text", url)
				sub_node.add_attributes(status_rec)
			end
		end
	end
	$LOG.info("< #{key}") if $VERBOSE
end

# Add attribues to XOXO node
def xoxo_add_attribute(node, values)
	dl = node.add_element("dl")
	
	values.each do |k,v|
		dl.add_element("dt").add_text(k.to_s)
		dl.add_element("dd").add_text(v.to_s)
	end
end

# XOXO output (recursive)
def xoxo_dump_node(xml_node, key, status, values, processed_stack)
	$LOG.info("> #{key}") if $VERBOSE

	uri_key = URI::parse(key)
	new_node = xml_node.add_element("li")
	a_node = new_node.add_element("a")
	a_node.add_attribute("href", key)
	a_node.add_text(key)
	xoxo_add_attribute(new_node, status)
	
	processed_stack << key
	
	if values
		ol_node = new_node.add_element("ol") 
		values.each do |url,status_rec|
			if ! processed_stack.include?(url)
				uri_url = URI::parse(url)

				dump = (uri_key.scheme != uri_url.scheme)
				dump ||= (uri_key.host != uri_url.host)			
				dump ||= ! uri_key.path.include?( uri_url.path )
			
				if dump
					xoxo_dump_node( ol_node, url, status_rec, $url_tree[url], processed_stack)
				end
			else
				sub_node = ol_node.add_element("li") 
				a_node = sub_node.add_element("a") 
				a_node.add_attribute("href", url)
				xoxo_add_attributes(sub_node,status_rec)
			end
		end
	end

	$LOG.info("< #{key}") if $VERBOSE
end

# Simplify a node name for DOT output
#    to avoid repeating the 'http://domain' in front of each local node
def simplified_name(my_host, uri)
	if uri.path == ""
		if my_host != uri.host
			return uri.to_s
		else
			return uri.host
		end
	else
		if uri.host != my_host
			return uri.to_s
		else
			return uri.path
		end
	end
	
	return uri.to_s
end

# Returns the color to use for DOT edges according to target node
def color_for(status)
	if status.has_key?("allowed") && ! status["allowed"]
		return $colors_hash["EXTERNAL"]
	end
	
	if status.has_key?("valid") && ! status["valid"]
		return $colors_hash["INVALID"]
	end
	
	if status.has_key?("Content-type") 
		return $colors_hash["UNKNOWN"] if status['Content-type'] == "unknown"

		$colors.each do |a|
			k, v = a[0], a[1]
			return v if status['Content-type'].index(k) == 0
		end
	end
	
	return $colors_hash["DEFAULT"]
end

# DOT output (recursive)
def dot_dump_node(my_host, key, status, values, processed_stack, recursive = true)
	$LOG.info("> #{key}") if $VERBOSE

	uri_key = URI::parse(key)	
	processed_stack << key
	
	from = simplified_name(my_host, uri_key)
	from_color = color_for(status)

	# node [color=red, label="\N"];
	
	if values
		values.each do |url,status_rec|
			uri_url = URI::parse(url)

			to = simplified_name(my_host, uri_url)
			to_color = color_for(status_rec)
			$stdout.puts "\"#{from}\" -> \"#{to}\" [color=#{to_color}]"
			if recursive && ! processed_stack.include?(url)

				dump = (uri_key.scheme != uri_url.scheme)
				dump ||= (uri_key.host != uri_url.host)			
				dump ||= ! uri_key.path.include?( uri_url.path )
			
				if dump
					dot_dump_node( my_host, url, status_rec, $url_tree[url], processed_stack, true)
				end
			end
		end
	end

	$LOG.info("< #{key}") if $VERBOSE
end

=begin
                                               
    []      []    [][]    [][][]  []      []   
   [][]  [][]  []    []    []    [][]    []    
  []  []  []  [][][][]    []    []  []  []     
 []      []  []    []    []    []    [][]      
[]      []  []    []  [][][]  []      []       
                                               
=end

$LOG = Logger.new($stderr)

arguments = CatArguments.new(ARGV)

pool = ThreadPool.new(arguments[:pools])

$DEBUG = arguments[:debug]
$VERBOSE = arguments[:verbose]

$dontfollow_extensions = Array.new

arguments[:excuded_ext].each do |pattern, mime|
	$dontfollow_extensions << [Regexp.new(pattern), mime]
end

$levels = arguments[:depth]
$colors = arguments[:colors]
$colors_hash = Hash.new
$colors.each do |a|
	$colors_hash[a[0]] = a[1]
end

$redirection_limit = arguments[:redirections]

$get_headers = { 'User-Agent' => 'RWB 1.0' }
$url_stack = Array.new
$url_processed = Hash.new
$url_tree = Hash.new
$max_level = 0

if arguments[:input_file] == nil
	if ARGV.length == 0
		$LOG.error "no URL provided !"
		exit 1 
	end
	
	a = ARGV[0].downcase
	$url_stack << [a, nil, 0] 
	$base_url = a

	while ! ($url_stack.empty? && pool.empty?)
		if $url_stack.empty?
			while !pool.empty? 
				sleep 1
			end
		end
		if ! $url_stack.empty?
			pool.dispatch( $url_stack.dequeue ) do |urls|
				$LOG.info(">> processing #{urls[0]} from #{urls[1]}") if $VERBOSE
				url_ok = false
	
				begin 
					url_ok = process_url(urls[0], urls[1], urls[2])
				rescue Exception => e
					$LOG.error "EXCEPTION #{e.backtrace.join("\n ")}"
					$LOG.error e.to_s
				end		

				$LOG.info("<< processed #{urls[0]} #{url_ok}") if $VERBOSE
			end
		end
	end

	pool.shutdown
else
	$LOG.info("loading configuration #{arguments[:input_file]}") if $VERBOSE
	conf = open(arguments[:input_file]) { |f| YAML.load(f) }
	if conf
		$base_url = conf['base_url']
		$url_tree = conf['url_tree'] 
		$url_processed = conf['url_processed'] 
	end
end

if arguments[:output_file] != nil
	conf = { 'base_url' => $base_url, 'url_tree' => $url_tree, 'url_processed' => $url_processed }
	open(arguments[:output_file], 'w') do |f|
		YAML.dump(conf,f)
	end
end

case arguments[:output_format]
when "OPML"
	doc = REXML::Document.new

	opml = doc.add_element("opml")
	opml.add_attribute("version", "1.0")

	head = opml.add_element("head")
	title = head.add_element("title")
	title.add_text(arguments[:title])

	body = opml.add_element("body")

	opml_dump_node( body, key, { "valid" => true, "allowed" => true }, $url_tree[$base_url] , [])

	doc.write($stdout,1)

when "XOXO"
	doc = REXML::Document.new

	doc_type = REXML::DocType.new('html', '"-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"')

	doc.add(doc_type)

	html = doc.add_element("html")
	html.add_attribute("xmlns", "http://www.w3.org/1999/xhtml")

	head = html.add_element("head")
	meta = head.add_element("meta")
	meta.add_attribute("http-equiv", "content-type")
	meta.add_attribute("content", "text/html; charset=utf-8")

	head.add_element("title").add_text("arguments[:title]")

	body = html.add_element("body")
	ol = body.add_element("ol")
	ol.add_attribute("class", "xoxo")
	
	xoxo_dump_node( ol, $base_url, { "valid" => true, "allowed" => true }, $url_tree[$base_url] , [])

	doc.write($stdout,1)	

when "DOT"
	host = URI::parse($base_url).host
	
	urls_by_level = Hash.new
	(0..$max_level).step(1) do |i|
		urls_by_level[i.to_s] = Array.new
	end

	$url_processed.each do |key,value|
		begin
			urls_by_level[value["level"].to_s] << key
		rescue
			$max_level += 1
			urls_by_level[$max_level.to_s] = Array.new
			retry
		end
	end

	url_processed_array = $url_processed.sort { |a,b| a[1]["level"] <=> b[1]["level"] }

	if arguments[:one_perpage]	
		(0..$max_level-1).step(1) do |level|
			$stdout.puts "digraph \"#{host}-#{level}\" {"
			$stdout.puts "graph [rankdir=LR];"
			$stdout.puts "ranksep=2.0;"
			$stdout.puts "node [color=black, shape=box];"

			url_processed_array.each do |item|
				key = item[0]
				value = item[1]
				
				key_level = value["level"]
				
				if (key_level == level) || (key_level == level+1)
					shape = "box"
					style = ", style=filled"	
					if $base_url == key
						k  = simplified_name(host , URI::parse(key))
						$stdout.puts "\"#{k}\" [shape=house,style=filled,color=black,fillcolor=lightgray];"
					else
						color, fillcolor, peripheries = nil, nil, nil

						if value.has_key?("Home-Reference")
							color = $colors_hash["HOMEREF"]
						end
						if value.has_key?("Self-Reference")
							peripheries = ", peripheries=2"
						end
						if value.has_key?("Content-type")
							if value['Content-type'] == "unknown"
								fillcolor = $colors_hash["UNKNOWN"] 
							else
								$colors.each do |a|
									k, v = a[0], a[1]
									fillcolor = v if value['Content-type'].index(k) == 0
									break if fillcolor != nil
								end
							end
						else
							if value.has_key?("HTTPResponse")
								fillcolor = $colors_hash["INVALID"] if value['HTTPResponse'] != "200"
							else
								shape = "polygon, sides=4, skew=0.025"
								style = "style=filled"
							end
						end
						fillcolor = $colors_hash["UNKNOWN"] if fillcolor == nil
						color = fillcolor if color == nil
						k  = simplified_name(host , URI::parse(key))
						$stdout.puts "\"#{k}\" [shape=#{shape} #{style} #{peripheries},fillcolor=#{fillcolor},color=#{color}];"
					end
				end
			end

			$stdout.puts "{ node [shape=plaintext]; \"#{level}\" -> \"#{level+1}\" ; }"
		
			b = simplified_name(host , URI::parse($base_url))

			array = urls_by_level[level.to_s]
			$stdout.puts "{ rank = source ; \"#{level}\" ;" 
			array.each do |item|
				item = simplified_name(host , URI::parse(item))
				$stdout.puts "\"#{item}\" ; "
			end
			$stdout.puts "}" 

			array = urls_by_level[(level+1).to_s]
			$stdout.puts "{ rank = sink ; \"#{level+1}\" ;" 
			array.each do |item|
				item = simplified_name(host , URI::parse(item))
				$stdout.puts "\"#{item}\" ; "
			end
			$stdout.puts "}" 

			array = urls_by_level[level.to_s]
			array.each do |item|
				dot_dump_node( host, item, $url_processed[item], $url_tree[item] , [], false)	
			end
		
			$stdout.puts "}"
			$stdout.puts 
			$stdout.puts "/* EOF */"
			$stdout.puts 

		end
	else
		$stdout.puts "digraph \"#{host}\" {"
		$stdout.puts "graph [rankdir=LR];"
		$stdout.puts "ranksep=2.0;"
		$stdout.puts "node [color=black, shape=box];"

		url_processed_array.each do |item|
			key = item[0]
			value = item[1]
			
			shape = "box"
			style = ", style=filled"	
			if $base_url == key
				k  = simplified_name(host , URI::parse(key))
				$stdout.puts "\"#{k}\" [shape=house,style=filled,color=black,fillcolor=lightgray];"
			else
				color, fillcolor, peripheries = nil, nil, nil
			
				if value.has_key?("Home-Reference")
					color = $colors_hash["HOMEREF"]
				end
				if value.has_key?("Self-Reference")
					peripheries = ", peripheries=2"
				end
				if value.has_key?("Content-type")
					if value['Content-type'] == "unknown"
						fillcolor = $colors_hash["UNKNOWN"] 
					else
						$stderr.puts "> DEBUG COLORS #{value['Content-type']}"
						$colors.each do |a|
							k, v = a[0], a[1]
							$stderr.puts "\t#{k}"
							fillcolor = v if value['Content-type'].index(k) == 0
							break if fillcolor != nil
						end
						$stderr.puts "< DEBUG COLORS #{fillcolor}"
					end
				else
					if value.has_key?("HTTPResponse")
						fillcolor = $colors_hash["INVALID"] if value['HTTPResponse'] != "200"
					else
						shape = "polygon, sides=4, skew=0.025"
						style = "style=filled"
					end
				end
				fillcolor = $colors_hash["UNKNOWN"] if fillcolor == nil
				color = fillcolor if color == nil
				k  = simplified_name(host , URI::parse(key))
				$stdout.puts "\"#{k}\" [shape=#{shape} #{style} #{peripheries},fillcolor=#{fillcolor},color=#{color}];"
			end
		end

		$stdout.puts "{ node [shape=plaintext];"
		(0..$max_level-1).step(1) do |i|
			$stdout.print "\"#{i}\" -> "
		end
		$stdout.puts "\"#{$max_level}\" ; }"

		b = simplified_name(host , URI::parse($base_url))
		$stdout.puts "{ rank = source ; \"0\" ; \"#{b}\" }"
		(1..$max_level-1).step(1) do |i|
			array = urls_by_level[i.to_s]
			$stdout.puts "{ rank = same ; \"#{i.to_s}\" ; " 
			array.each do |item|
				item = simplified_name(host , URI::parse(item))
				$stdout.puts "\"#{item}\" ; "
			end
			$stdout.puts "}" 
		end
		array = urls_by_level[$max_level.to_s]
		$stdout.puts "{ rank = sink ; \"#{$max_level.to_s}\" ; " 
		array.each do |item|
			item = simplified_name(host , URI::parse(item))
			$stdout.puts "\"#{item}\" ; "
		end
		$stdout.puts "}" 
	
		dot_dump_node( host, $base_url, { "valid" => true, "allowed" => true }, $url_tree[$base_url] , [], true)	
	
		$stdout.puts "}"
	end
end
