/*
** Copyright (C) 2001,2002 Sacha Faust <sacha@severus.org>
**
** This program is free software; you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation; either version 2 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program; if not, write to the Free Software
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/

/*
 *  Version : 1.3
 */
package faust.sacha.web.util;

import java.util.*;
import javax.swing.text.*;
import javax.swing.text.html.*;
import javax.swing.text.html.parser.*;
import java.io.*;

/**
 *  For DEBUG : main()
 *
import java.net.MalformedURLException;
import org.apache.commons.httpclient.*;
import org.apache.commons.httpclient.methods.*;
import faust.sacha.web.data.*;
import faust.sacha.web.util.*;
*/

public class HTMLFile extends HTMLParser {

    private ArrayList m_URLLinks;
    private ArrayList m_emailLinks;
    private ArrayList m_miscLinks;
    private ArrayList m_comments;
    private String m_miscProtocols[] = {    "telnet://", 
                                            "ldap://",
                                            "ftp://"};       //Todo: handle this
    
                                            /** Class collecting information in the HTML files.
                                             */                                            
    public HTMLFile(){
        super( new OutputStreamWriter(System.out) );
        m_URLLinks = new ArrayList(0);
        m_emailLinks = new ArrayList(0);
        m_miscLinks = new ArrayList(0);
        m_comments = new ArrayList(0);
    }
//------------------------------------------------------------------------------
    public void handleStartTag(HTML.Tag tag, MutableAttributeSet attributes, int position){

        Object attrib = null;
        String value = null;
        boolean boolValue = false;
        
        //System.err.println( "handleStartTag : " + tag );
        if( tag == HTML.Tag.A ){
            top_a:
            for( Enumeration attEnum = attributes.getAttributeNames(); attEnum.hasMoreElements(); ){
                try{
                    attrib = attEnum.nextElement();
                    value = (String)attributes.getAttribute(attrib);
                    
                    if( value != null ){
                        if( attrib.toString().compareToIgnoreCase("href") == 0 ){
                            //quick cleanup
                            value = cleanupValue(value);
                            
                            //check if it's a empty link
                            if( (value == null) || (value.equals("")) )
                                continue top_a;
                            
                            if( value.toLowerCase().startsWith("mailto:") ){
                                if( WebGlobal.DEBUG )
                                    System.err.println("HTMLFile::handleStartTag() : A : adding e-mail : " + value);

                                m_emailLinks.add((Object)value);
                                continue top_a;
                            }
                            else{
                                for( int protoIndex = 0; protoIndex < m_miscProtocols.length; protoIndex++ ){
                                    if( value.toLowerCase().startsWith(m_miscProtocols[protoIndex]) ){
                                        if( WebGlobal.DEBUG )
                                            System.err.println("HTMLFile::handleStartTag() : A : adding misc link Attribute=" + attrib + " | value= " + value);

                                        m_miscLinks.add( (Object)value );
                                        continue top_a;
                                    }
                                }
                                //ok link
                                if( WebGlobal.DEBUG )
                                    System.err.println("HTMLFile::handleStartTag() : A : adding link : " + value);
                                
                                m_URLLinks.add( (Object)value );
                            }
                        }
                    }
                }
                catch( NoSuchElementException e ){
                    System.err.println("HTMLFile::handleStartTag() : A : " + e);
                }
            }
        }
        else if( tag == HTML.Tag.SCRIPT ){
            top_script:
            for( Enumeration attEnum = attributes.getAttributeNames(); attEnum.hasMoreElements(); ){
                try{
                    attrib = attEnum.nextElement();
                    value = (String)attributes.getAttribute(attrib);
                    
                    if( value != null ){
                        if( attrib.toString().compareToIgnoreCase("src") == 0 ){
                            //quick cleanup
                            value = cleanupValue(value);
                            
                            //check if it's a empty link
                            if( (value == null) || (value.equals("")) )
                                continue top_script;
                            
                            if( value.toLowerCase().startsWith("mailto:") ){
                                if( WebGlobal.DEBUG )
                                    System.err.println("HTMLFile::handleStartTag() : SCRIPT : adding e-mail : " + value);

                                m_emailLinks.add((Object)value);
                                continue top_script;
                            }
                            else{
                                for( int protoIndex = 0; protoIndex < m_miscProtocols.length; protoIndex++ ){
                                    if( value.toLowerCase().startsWith(m_miscProtocols[protoIndex]) ){
                                        if( WebGlobal.DEBUG )
                                            System.err.println("HTMLFile::handleStartTag() : SCRIPT : adding misc link Attribute=" + attrib + " | value= " + value);

                                        m_miscLinks.add( (Object)value );
                                        continue top_script;
                                    }
                                }
                                //ok link
                                if( WebGlobal.DEBUG )
                                    System.err.println("HTMLFile::handleStartTag() : SCRIPT : adding link : " + value);
                                
                                m_URLLinks.add( (Object)value );
                            }
                        }
                    }
                }
                catch( NoSuchElementException e ){
                    System.err.println("HTMLFile::handleStartTag() : SCRIPT : " + e);
                }
            }
        }
        else if( tag == HTML.Tag.OBJECT ){
            top_object:
            for( Enumeration attEnum = attributes.getAttributeNames(); attEnum.hasMoreElements(); ){
                try{
                    attrib = attEnum.nextElement();
                    value = (String)attributes.getAttribute(attrib);
                    
                    if( value != null ){
                        if( (attrib.toString().compareToIgnoreCase("data") == 0) ||
                            (attrib.toString().compareToIgnoreCase("codebase") == 0) ||
                            (attrib.toString().compareToIgnoreCase("classid") == 0) ){
                            //quick cleanup
                            value = cleanupValue(value);
                            
                            //check if it's a empty link
                            if( (value == null) || (value.equals("")) )
                                continue top_object;
                            
                            if( value.toLowerCase().startsWith("mailto:") ){
                                if( WebGlobal.DEBUG )
                                    System.err.println("HTMLFile::handleStartTag() : OBJECT : adding e-mail : " + value);

                                m_emailLinks.add((Object)value);
                                continue top_object;
                            }
                            else{
                                for( int protoIndex = 0; protoIndex < m_miscProtocols.length; protoIndex++ ){
                                    if( value.toLowerCase().startsWith(m_miscProtocols[protoIndex]) ){
                                        if( WebGlobal.DEBUG )
                                            System.err.println("HTMLFile::handleStartTag() : OBJECT : adding misc link Attribute=" + attrib + " | value= " + value);

                                        m_miscLinks.add( (Object)value );
                                        continue top_object;
                                    }
                                }
                                //ok link
                                if( WebGlobal.DEBUG )
                                    System.err.println("HTMLFile::handleStartTag() : OBJECT : adding link : " + value);
                                
                                m_URLLinks.add( (Object)value );
                            }
                        }
                    }
                }
                catch( NoSuchElementException e ){
                    System.err.println("HTMLFile::handleStartTag() : OBJECT : " + e);
                }
            }
        }
        else if( tag == HTML.Tag.APPLET ){
            URLInfo appletCodeBase = null;
            URLInfo appletArchive = null;
            StringTokenizer archiveTokens = null;
            String tmpArchive = null;
            String archiveList = null;
            
            top_applet:
            for( Enumeration attEnum = attributes.getAttributeNames(); attEnum.hasMoreElements(); ){
                try{
                    attrib = attEnum.nextElement();
                    value = (String)attributes.getAttribute(attrib);
                    
                    if( value != null ){
                        if( (attrib.toString().compareToIgnoreCase("codebase") == 0) ||
                            (attrib.toString().compareToIgnoreCase("archive") == 0) ){
                                
                            //quick cleanup
                            value = cleanupValue(value);
                            
                            //check if it's a empty link
                            if( (value == null) || (value.equals("")) )
                                continue top_applet;
                                
                            if( value.toLowerCase().startsWith("mailto:") ){
                                if( WebGlobal.DEBUG )
                                    System.err.println("HTMLFile::handleStartTag() : APPLET : adding e-mail : " + value);

                                m_emailLinks.add((Object)value);
                                continue top_applet;
                            }
                            else{
                                if( attrib.toString().compareToIgnoreCase("codebase") == 0 ){
                                    try{
                                        appletCodeBase = new URLInfo(value);
                                    }
                                    catch( java.net.MalformedURLException urlEx ){
                                        System.err.println("HTMLFile::handleStartTag() : APPLET : codebase : " + urlEx );
                                    }
                                }
                                
                                for( int protoIndex = 0; protoIndex < m_miscProtocols.length; protoIndex++ ){
                                    if( value.toLowerCase().startsWith(m_miscProtocols[protoIndex]) ){
                                        if( WebGlobal.DEBUG )
                                            System.err.println("HTMLFile::handleStartTag() : APPLET : adding misc link Attribute=" + attrib + " | value= " + value);

                                        m_miscLinks.add( (Object)value );
                                        continue top_applet;
                                    }
                                }
                                
                                if( attrib.toString().compareToIgnoreCase("archive") == 0 ){
                                    /*
                                     *  We store the archive list so we can analyze them later
                                     *  and create a good link to them using the value foud in codebase
                                     */
                                    archiveList = value;
                                }
                                else{
                                    //ok link
                                    if( WebGlobal.DEBUG )
                                        System.err.println("HTMLFile::handleStartTag() : APPLET : adding link : " + value);

                                    m_URLLinks.add( (Object)value );
                                }
                            }
                        }
                    }
                }
                catch( NoSuchElementException e ){
                    System.err.println("HTMLFile::handleStartTag() : APPLET : " + e);
                }
            }
             
           /*
            *  Looking if there is more then 1 archive seperated by ,
            */
            archiveTokens = new StringTokenizer(archiveList, ",");
            
            while( archiveTokens.hasMoreTokens() ){
                tmpArchive = archiveTokens.nextToken();
                
                if( appletCodeBase != null ){
                    try{
                        appletArchive = new URLInfo(tmpArchive, appletCodeBase);
                    }
                    catch( java.net.MalformedURLException urlEx ){
                        System.err.println("HTMLFile::handleStartTag() : APPLET : appletArchive : " + urlEx );
                    }
                    tmpArchive = appletArchive.getURL();
                }
                
                if( WebGlobal.DEBUG )
                    System.err.println("HTMLFile::handleStartTag() : APPLET : adding link : " + tmpArchive);
                
                m_URLLinks.add( (Object)tmpArchive );
            }
        }
        else if( tag == HTML.Tag.BODY ){
            top_body:
            for( Enumeration attEnum = attributes.getAttributeNames(); attEnum.hasMoreElements(); ){
                try{
                    attrib = attEnum.nextElement();
                    if( attrib.toString().compareToIgnoreCase("background") != 0 )
                        continue top_body;
                    
                    value = (String)attributes.getAttribute(attrib);
                    
                    if( value != null ){
                        //quick cleanup
                        value = cleanupValue(value);
                        
                        //check if it's a empty link
                        if( (value == null) || (value.equals("")) )
                            continue top_body;
                        
                        if( value.toLowerCase().startsWith("mailto:") ){
                            if( WebGlobal.DEBUG )
                                System.err.println("HTMLFile::handleStartTag() : BODY : adding e-mail : " + value);
                            
                            m_emailLinks.add((Object)value);
                            continue top_body;
                        }
                        else{
                            for( int protoIndex = 0; protoIndex < m_miscProtocols.length; protoIndex++ ){
                                if( value.toLowerCase().startsWith(m_miscProtocols[protoIndex]) ){
                                    if( WebGlobal.DEBUG )
                                        System.err.println("HTMLFile::handleStartTag() : BODY : adding misc link Attribute=" + attrib + " | value= " + value);
                                    
                                    m_miscLinks.add( (Object)value );
                                    continue top_body;
                                }
                            }
                            //ok link
                            if( WebGlobal.DEBUG )
                                System.err.println("HTMLFile::handleStartTag() : BODY : adding link : " + value);
                            
                            m_URLLinks.add( (Object)value );
                        }
                    }
                }
                catch( NoSuchElementException e ){
                    System.err.println("HTMLFile::handleStartTag() : BODY : " + e);
                }
            }
        }
    }
//------------------------------------------------------------------------------
    public void handleEndTag(HTML.Tag tag, int position){

        //DEBUG
        //DebugLog.println("</" + tag.toString() + ">" + "\n");
        //
        //System.out.println("end tag = " + tag);
    }
//------------------------------------------------------------------------------
    public void handleSimpleTag( HTML.Tag tag, MutableAttributeSet attributes, int position){
        
        Object attrib = null;
        String value = null;
           
        //System.err.println( "handleSimpleTag : " + tag );
       
        if( tag == HTML.Tag.FRAME ){
            
            top_frame:
            for( Enumeration attEnum = attributes.getAttributeNames(); attEnum.hasMoreElements(); ){
                try{                    
                    attrib = attEnum.nextElement();
                    value = (String)attributes.getAttribute(attrib);
                                        
                    if( value != null ){
                        //quick cleanup
                        value = cleanupValue(value);
                            
                        //check if it's a empty link
                        if( (value == null) || (value.equals("")) )
                            continue top_frame;
                        
                        if( value.toLowerCase().startsWith("mailto:") ){
                            if( WebGlobal.DEBUG )
                                System.err.println("HTMLFile::handleSimpleTag() : FRAME : adding e-mail : " + value);

                            m_emailLinks.add((Object)value);
                            continue top_frame;
                        }
                        
                        if( attrib.toString().compareToIgnoreCase("src") == 0 ){   
                            for( int protoIndex = 0; protoIndex < m_miscProtocols.length; protoIndex++ ){
                                if( value.toLowerCase().startsWith(m_miscProtocols[protoIndex]) ){
                                    if( WebGlobal.DEBUG )
                                        System.err.println("HTMLFile::handleSimpleTag() : FRAME : adding misc link Attribute=" + attrib + " | value= " + value);

                                        m_miscLinks.add( (Object)value );
                                        continue top_frame;
                                }
                            }
                            //ok link
                            if( WebGlobal.DEBUG )
                                System.err.println("HTMLFile::handleSimpleTag() : FRAME : adding link : " + value);

                            m_URLLinks.add( (Object)value );
                        }
                    }
                }
                catch( NoSuchElementException e ){
                    System.err.println("HTMLFile::handleSimpleTag() : FRAME : " + e);
                }
            }
        }
        else if( tag == HTML.Tag.LINK ){           
            top_link:
            for( Enumeration attEnum = attributes.getAttributeNames(); attEnum.hasMoreElements(); ){
                try{                    
                    attrib = attEnum.nextElement();
                    value = (String)attributes.getAttribute(attrib);
                                        
                    if( value != null ){
                        //quick cleanup
                        value = cleanupValue(value);
                            
                        //check if it's a empty link
                        if( (value == null) || (value.equals("")) )
                            continue top_link;
                        
                        if( attrib.toString().compareToIgnoreCase("href") == 0 ){   
                            if( value.toLowerCase().startsWith("mailto:") ){
                                if( WebGlobal.DEBUG )
                                    System.err.println("HTMLFile::handleSimpleTag() : LINK : adding e-mail : " + value);

                                m_emailLinks.add((Object)value);
                                continue top_link;
                            }
                            else{
                                for( int protoIndex = 0; protoIndex < m_miscProtocols.length; protoIndex++ ){
                                    if( value.toLowerCase().startsWith(m_miscProtocols[protoIndex]) ){
                                        if( WebGlobal.DEBUG )
                                            System.err.println("HTMLFile::handleSimpleTag() : LINK : adding misc link Attribute=" + attrib + " | value= " + value);

                                        m_miscLinks.add( (Object)value );
                                        continue top_link;
                                    }
                                }
                                //ok link
                                if( WebGlobal.DEBUG )
                                    System.err.println("HTMLFile::handleSimpleTag() : LINK : adding link : " + value);

                                m_URLLinks.add( (Object)value );
                            }
                        }
                    }
                }
                catch( NoSuchElementException e ){
                    System.err.println("HTMLFile::handleSimpleTag() : LINK : " + e);
                }
            }            
        }
        else if( tag == HTML.Tag.AREA ){
            top_area:
            for( Enumeration attEnum = attributes.getAttributeNames(); attEnum.hasMoreElements(); ){
                try{                    
                    attrib = attEnum.nextElement();
                    value = (String)attributes.getAttribute(attrib);
                                        
                    if( value != null ){
                        //quick cleanup
                        value = cleanupValue(value);
                            
                        //check if it's a empty link
                        if( (value == null) || (value.equals("")) )
                            continue top_area;
                        
                        if( attrib.toString().compareToIgnoreCase("href") == 0 ){   
                            if( value.toLowerCase().startsWith("mailto:") ){
                                if( WebGlobal.DEBUG )
                                    System.err.println("HTMLFile::handleSimpleTag() : AREA : adding e-mail : " + value);

                                m_emailLinks.add((Object)value);
                                continue top_area;
                            }
                            else{
                                for( int protoIndex = 0; protoIndex < m_miscProtocols.length; protoIndex++ ){
                                    if( value.toLowerCase().startsWith(m_miscProtocols[protoIndex]) ){
                                        if( WebGlobal.DEBUG )
                                            System.err.println("HTMLFile::handleSimpleTag() : AREA : adding misc link Attribute=" + attrib + " | value= " + value);

                                        m_miscLinks.add( (Object)value );
                                        continue top_area;
                                    }
                                }
                                //ok link
                                if( WebGlobal.DEBUG )
                                    System.err.println("HTMLFile::handleSimpleTag() : AREA : adding link : " + value);

                                m_URLLinks.add( (Object)value );
                            }
                        }
                    }
                }
                catch( NoSuchElementException e ){
                    System.err.println("HTMLFile::handleSimpleTag() : AREA : " + e);
                }
            }
        }
        else if( tag == HTML.Tag.BASE ){
            top_base:
            for( Enumeration attEnum = attributes.getAttributeNames(); attEnum.hasMoreElements(); ){
                try{                    
                    attrib = attEnum.nextElement();
                    value = (String)attributes.getAttribute(attrib);
                                        
                    if( value != null ){
                        //quick cleanup
                        value = cleanupValue(value);
                            
                        //check if it's a empty link
                        if( (value == null) || (value.equals("")) )
                            continue top_base;
                        
                        if( attrib.toString().compareToIgnoreCase("href") == 0 ){   
                            if( value.toLowerCase().startsWith("mailto:") ){
                                if( WebGlobal.DEBUG )
                                    System.err.println("HTMLFile::handleSimpleTag() : BASE : adding e-mail : " + value);

                                m_emailLinks.add((Object)value);
                                continue top_base;
                            }
                            else{
                                for( int protoIndex = 0; protoIndex < m_miscProtocols.length; protoIndex++ ){
                                    if( value.toLowerCase().startsWith(m_miscProtocols[protoIndex]) ){
                                        if( WebGlobal.DEBUG )
                                            System.err.println("HTMLFile::handleSimpleTag() : BASE : adding misc link Attribute=" + attrib + " | value= " + value);

                                        m_miscLinks.add( (Object)value );
                                        continue top_base;
                                    }
                                }
                                //ok link
                                if( WebGlobal.DEBUG )
                                    System.err.println("HTMLFile::handleSimpleTag() : BASE : adding link : " + value);

                                m_URLLinks.add( (Object)value );
                            }
                        }
                    }
                }
                catch( NoSuchElementException e ){
                    System.err.println("HTMLFile::handleSimpleTag() : BASE : " + e);
                }
            }            
        }
        else if( tag == HTML.Tag.META ){
            top_meta:
            for( Enumeration attEnum = attributes.getAttributeNames(); attEnum.hasMoreElements(); ){
                try{                    
                    attrib = attEnum.nextElement();
                    value = (String)attributes.getAttribute(attrib);
                                        
                    if( value != null ){
                           
                        //check if it's a empty link
                        if( (value == null) || (value.equals("")) )
                            continue top_meta;
                        
                        if( attrib.toString().compareToIgnoreCase("content") == 0 ){    // <meta http-equiv="REFRESH" CONTENT="0; URL=http://url">
                            
                            /*
                             *  We have to split the value to get the URL
                             */
                            String tmpStr = null;
                            int elementIndex = 0;
                            String strURL = null;
                            
                            elementIndex = value.toLowerCase().indexOf("url");
                            if( elementIndex < 0 ){
                                continue top_meta;
                            }
                            tmpStr = value.substring(elementIndex + 3, value.length() );
                            
                            elementIndex = tmpStr.indexOf("=");
                            if( elementIndex < 0 ){
                                continue top_meta;
                            }
                            tmpStr = tmpStr.substring(elementIndex +1, tmpStr.length());
                            
                            // Clean the string
                            tmpStr = tmpStr.trim();
                            
                            if( tmpStr.startsWith("\"") || tmpStr.startsWith("\'") )
                                tmpStr = tmpStr.substring(1);
                            if( tmpStr.endsWith("\"") || tmpStr.endsWith("\'") )
                                tmpStr = tmpStr.substring( 0, (tmpStr.length() - 0) );
                            
                            strURL = new String(tmpStr);
                            if( WebGlobal.DEBUG )
                                System.err.println("HTMLFile::handleSimpleTag() : META : found new url : " + strURL + " : in " + value + " of attribute : " + attrib.toString() );
                            
                            if( strURL.toLowerCase().startsWith("mailto:") ){
                                if( WebGlobal.DEBUG )
                                    System.err.println("HTMLFile::handleSimpleTag() : META : adding e-mail : " + strURL);

                                m_emailLinks.add((Object)strURL);
                                continue top_meta;
                            }
                            else{
                                for( int protoIndex = 0; protoIndex < m_miscProtocols.length; protoIndex++ ){
                                    if( strURL.toLowerCase().startsWith(m_miscProtocols[protoIndex]) ){
                                        if( WebGlobal.DEBUG )
                                            System.err.println("HTMLFile::handleSimpleTag() : META : adding misc link strURL=" + strURL + " | value= " + value);

                                        m_miscLinks.add( (Object)strURL );
                                        continue top_meta;
                                    }
                                }
                                //ok link
                                if( WebGlobal.DEBUG )
                                    System.err.println("HTMLFile::handleSimpleTag() : META : adding link : " + strURL);

                                m_URLLinks.add( (Object)strURL );
                            }
                        }
                    }
                }
                catch( NoSuchElementException e ){
                    System.err.println("HTMLFile::handleSimpleTag() : META : " + e);
                }
            }
        }
    }
//------------------------------------------------------------------------------
    public void handleComment( char[] text, int position ){
        m_comments.add( new String(text) );
    }
//------------------------------------------------------------------------------    
    /**
     * @return The urls found in the html file.
     */    
    public ArrayList getURLList(){
        return m_URLLinks;
    }
//------------------------------------------------------------------------------
    /**
     * @return The emails found in the html file.
     */    
    public ArrayList getEmailFound(){
        return m_emailLinks;
    }
//------------------------------------------------------------------------------
    /**
     * @return The urls found with non http protocol found in the html file.
     */    
    public ArrayList getMiscFound(){
        return m_miscLinks;
    }
//------------------------------------------------------------------------------
    /**
     * @return The comments found in the html file.
     */    
    public ArrayList getComments(){
        return m_comments;
    }
//------------------------------------------------------------------------------
    private String cleanupValue( String data ){
        String clean = null;
        int posL = 0;
        int posR = 0;
        int posProto = 0;
        int posSecondProto = 0;
        String lowCaseData = null;
        String tmpStr = null;
        
        if( (data == null) || data.equals("") )
            return null;
        
        data = data.trim();
        lowCaseData = data.toLowerCase();
        
        
        /*
         *  check for bad HTML that doesn't close tags
         *  if we find an < character, we return a string
         *  up to <. If the postion of < is 0, we simply return
         *  null
         */
        posL = data.indexOf("<");
        posR = data.indexOf(">");
        
        if( (posL == -1) && (posR == -1) )
            clean = data;
        else{
            if( posR == -1 )
                clean = data.substring(0, posL);
            else{
                if( posL == -1 )
                    clean = data.substring(0, posR);
                else
                    clean = data.substring(0, ((posR < posL) ? posR : posL) );
            }
        }
        
        
        /*
         *  Looking for bad HTML where the url is in proto://link/proto:
         *  or the protocol is specific but not starting the link
         */
        lowCaseData = clean.toLowerCase();
        main_loop:
        for( int i = 0; i < WebGlobal.PROTOCOL_LIST.length; i++ ){
            posProto = lowCaseData.indexOf(WebGlobal.PROTOCOL_LIST[i]);
            
            if( posProto > 0 ){
                /*
                 *  trying to see if there is a protocol defined earlyer in the string
                 */
                for( int j = 0; j < WebGlobal.PROTOCOL_LIST.length; j++ ){
                    if( WebGlobal.PROTOCOL_LIST[i].equals(WebGlobal.PROTOCOL_LIST[j]) )
                        continue;
                    
                    if( lowCaseData.indexOf(WebGlobal.PROTOCOL_LIST[j]) <= 0 ){
                        clean = clean.substring(0, posProto);
                        break main_loop;
                    }
                }
            }
            else if( posProto == 0 ){
                tmpStr = clean.substring( WebGlobal.PROTOCOL_LIST[i].length(), clean.length() );
                
                for( int j = 0; j < WebGlobal.PROTOCOL_LIST.length; j++ ){
                    
                    posSecondProto = tmpStr.indexOf(WebGlobal.PROTOCOL_LIST[j]);
                    
                    if( posSecondProto != -1 ){
                        clean = clean.substring(0, posSecondProto + WebGlobal.PROTOCOL_LIST[i].length() );
                        break main_loop;
                    }
                }
            }
        }
        
        return clean;
    }
//------------------------------------------------------------------------------
    /*
    public void prepareRequestMethod( URLData url , HttpMethod requestMethod, boolean redirect ){

        requestMethod.setPath( url.getPath() );
        if( url.getQuery() != "" )
            requestMethod.setQueryString( url.getQuery() );
        
        requestMethod.addRequestHeader("Accept", "text/html");
        requestMethod.addRequestHeader("User-Agent", WebGlobal.USER_AGENT);
        
        requestMethod.setFollowRedirects( redirect );
    }    
//------------------------------------------------------------------------------  
    public static void main( String args[] ){
        HTMLFile fileInfo = null;
        URLCont urlData = null;
        HTMLEditorKit.Parser parser = null;
        HTMLEditorKit.ParserCallback callback = null;
        GetMethod requestMethod = null;
        boolean allGood = true;
        int requestCode;
        URLData url = null;
        String urlLocation = null;
        
        fileInfo = new HTMLFile();

        WebGlobal.setDebug(true);
        
        try{
            url = new URLData("http://192.168.1.101/index.html");
        }
        catch( MalformedURLException urlEx ){
            System.err.println(urlEx);
        }
        
        try{
            ParserGetter kit = new ParserGetter();
            parser = kit.getParser();
            callback = fileInfo;
            
            try{
                urlData = new URLCont( (URLInfo)url );
                requestMethod = new GetMethod("/index.html");//getRequestMethodClass();
                requestMethod.addRequestHeader("Accept", "text/html");
                
                urlData.setMethod(requestMethod);
                
                // 1 Mb should be enought
                ((GetMethod)requestMethod).setMaxBodySize(-1);
                
                // get rid of expired cookies
                urlData.purgeCookies();
                
                try{
                    urlData.open();
                }
                catch( IOException openIoEx ){
                    System.err.println("Webspider::getFileData() : " + openIoEx);
                    //return false;
                }                
                try{
                    requestCode = requestMethod.execute(urlData.getState(), urlData);

                    // Checking return code
                    switch( requestCode ){
                        // section of error codes that indicate that the page is present but we can't see them
                        case HttpStatus.SC_UNAUTHORIZED:            //  401 Unauthorized (HTTP/1.0 - RFC 1945)

                            //setInfoToSite(url);
                            allGood = false;
                            break;                        
                        // ERRORS
                        case HttpStatus.SC_BAD_REQUEST:             //  400 Bad Request (HTTP/1.1 - RFC 2616)
                        case HttpStatus.SC_NOT_FOUND:               //  404 Not Found (HTTP/1.0 - RFC 1945)
                        case HttpStatus.SC_GONE:                    //  410 Gone (HTTP/1.1 - RFC 2616)
                        case HttpStatus.SC_NOT_ACCEPTABLE:  //  406 Not Acceptable (HTTP/1.1 - RFC 2616)
                            
                        // Interesting errors
                        case HttpStatus.SC_INTERNAL_SERVER_ERROR:   //  500 Server Error (HTTP/1.0 - RFC 1945)
                        case HttpStatus.SC_SERVICE_UNAVAILABLE:    //  503 Service Unavailable (HTTP/1.0 - RFC 1945)

                        // Section of error codes that I want to see if I get them
                        case HttpStatus.SC_PAYMENT_REQUIRED:        //  402 Payment Required (HTTP/1.1 - RFC 2616) : This code is reserved for future use.           
                        case HttpStatus.SC_FORBIDDEN:               //  403 Forbidden (HTTP/1.0 - RFC 1945)
                        case HttpStatus.SC_METHOD_NOT_ALLOWED:      //  405 Method Not Allowed (HTTP/1.1 - RFC 2616)
                        case HttpStatus.SC_REQUEST_TIMEOUT:         //  408 Request Timeout (HTTP/1.1 - RFC 2616)
                        case HttpStatus.SC_CONFLICT:                //  409 Conflict (HTTP/1.1 - RFC 2616)
                        case HttpStatus.SC_LENGTH_REQUIRED:         //  411 Length Required (HTTP/1.1 - RFC 2616)
                        case HttpStatus.SC_PRECONDITION_FAILED:     //  412 Precondition Failed (HTTP/1.1 - RFC 2616)
                        case HttpStatus.SC_REQUEST_TOO_LONG:        //  413 Request Entity Too Large (HTTP/1.1 - RFC 2616)
                        case HttpStatus.SC_REQUEST_URI_TOO_LONG:    //  414 Request-URI Too Long (HTTP/1.1 - RFC 2616)
                        case HttpStatus.SC_UNSUPPORTED_MEDIA_TYPE:  //  415 Unsupported Media Type (HTTP/1.1 - RFC 2616)
                        case HttpStatus.SC_REQUESTED_RANGE_NOT_SATISFIABLE:     //  416 Requested Range Not Satisfiable (HTTP/1.1 - RFC 2616)
                        case HttpStatus.SC_EXPECTATION_FAILED:      //  417 Expectation Failed (HTTP/1.1 - RFC 2616)
                        case HttpStatus.SC_INSUFFICIENT_SPACE_ON_RESOURCE:      //  419 Insufficient Space on Resource (WebDAV - draft-ietf-webdav-protocol-05?) or 419 Proxy Reauthentication Required (HTTP/1.1 drafts?)
                        case HttpStatus.SC_METHOD_FAILURE:          //  420 Method Failure (WebDAV - draft-ietf-webdav-protocol-05?)
                        case HttpStatus.SC_UNPROCESSABLE_ENTITY:    //  422 Unprocessable Entity (WebDAV - RFC 2518)
                        case HttpStatus.SC_LOCKED:                  //  423 Locked (WebDAV - RFC 2518)
                        case HttpStatus.SC_FAILED_DEPENDENCY:       //  424 Failed Dependency (WebDAV - RFC 2518)
                        case HttpStatus.SC_NOT_IMPLEMENTED:         //  501 Not Implemented (HTTP/1.0 - RFC 1945)
                        case HttpStatus.SC_HTTP_VERSION_NOT_SUPPORTED:      //  505 HTTP Version Not Supported (HTTP/1.1 - RFC 2616)
                        case HttpStatus.SC_INSUFFICIENT_STORAGE:    //  507 Insufficient Storage (WebDAV - RFC 2518)

                        // section indicating proxy error
                        case HttpStatus.SC_PROXY_AUTHENTICATION_REQUIRED:   // 407 Proxy Authentication Required (HTTP/1.1 - RFC 2616)
                        case HttpStatus.SC_BAD_GATEWAY:             //  502 Bad Gateway (HTTP/1.0 - RFC 1945)
                        case HttpStatus.SC_GATEWAY_TIMEOUT:         //  504 Gateway Timeout (HTTP/1.1 - RFC 2616)
                            allGood = false;
                            break;
                    }
                }
                catch( HttpException httpEx ){
                    System.err.println("Webspider::getFileData() : " + httpEx);
                    allGood = false;
                }
                catch( IOException ioExRequest ){
                    System.err.println("Webspider::getFileData() : " + ioExRequest.toString());
                    allGood = false;
                }
                
                //set the request Information
                url.setResponseInfo( requestMethod.getResponseHeaders() );

                InputStream in = requestMethod.getResponseBodyAsStream();
                InputStreamReader r = new InputStreamReader(in);                                
                parser.parse(r, callback,true);
            }
            catch( IOException ioEx ){
                System.err.println("WebSpider::getFileData() : IOException : " + ioEx );
                allGood = false;
            }
        }
        catch( Exception e ){
            System.err.println("WebSpider::getFileData() : exception : " + e );
            e.printStackTrace();
            allGood = false;     
        }
        finally{
            try{
                urlData.close();
            }
            catch( IOException closeIoEx ){
                System.err.println("WebSpider::getFileData() : " + closeIoEx );
            }
        }
            
        // page 251 :Java Network Programming
        try{
            callback.flush();
        }
        catch( BadLocationException badLocE ){
            System.err.println("WebSpider::getFileData() : " + badLocE );
        }
    }
     */
}
