/*
** Copyright (C) 2001,2002 Sacha Faust <sacha@severus.org>
**
** This program is free software; you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation; either version 2 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program; if not, write to the Free Software
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/

/*
 *  Version : 1.3
 */
package faust.sacha.web.bot.spider.data;

import faust.sacha.web.data.*;
import faust.sacha.web.util.WebGlobal;

import org.apache.commons.httpclient.HttpMethod;

import faust.sacha.web.bot.spider.event.ThreadEventManager;
import faust.sacha.web.bot.spider.event.ThreadEvent;
import faust.sacha.web.bot.spider.event.ProcessBroker;
import faust.sacha.web.bot.spider.util.EnginGlobal;

import java.net.MalformedURLException;

public abstract class FileGetter extends ThreadEventManager {

    protected URLData m_url;
    protected int m_state;
    protected ProcessBroker m_broker;
    
    public FileGetter( String name, URLData url , ProcessBroker broker ) {
        super( name );
        m_url = url;
        m_state = ThreadEvent.EVENT_NEW;
        m_broker = broker;
        
        addToListeners(m_broker);
    }
//------------------------------------------------------------------------------
    public void run(){
        super.run();
        EnginGlobal.THREAD_MANAGER.addThread();
    }
//------------------------------------------------------------------------------    
    protected void prepareRequestMethod( URLData url , HttpMethod requestMethod, boolean redirect ){

        requestMethod.setPath( url.getPath() );
        if( url.getQuery() != "" )
            requestMethod.setQueryString( url.getQuery() );
        
        requestMethod.addRequestHeader("Accept", "text/html");
        requestMethod.addRequestHeader("User-Agent", WebGlobal.USER_AGENT);
        
        requestMethod.setFollowRedirects( redirect );
    }    
//------------------------------------------------------------------------------    
    /*
    public void dumpInfo(){
        System.out.println("Thread info : " + getName() );
        System.out.println("URL : " + m_url.getURL() );
        System.out.println("Server : " + m_url.getServerInfo() );
        System.out.println("Content-Length : " + m_url.getContentLengthInfo());
        System.out.println("Content-Type : " + m_url.getContentTypeInfo() );
        System.out.println("Cookie : " + m_url.getCookieInfo());
        System.out.println("Last modified : " + m_url.getLastModifiedInfo());
        System.out.println("Location : " + m_url.getContentLocationInfo());
        System.out.println("-------------------------------------------------------------");
    }
     */
//------------------------------------------------------------------------------
    protected boolean isPotentialHTMLFile( URLData url ){
        FileURL fileToCheck = null;
        
        try{
            fileToCheck = new FileURL(url);
        }
        catch( MalformedURLException urlEx ){
            return false;
        }
        
        /*
         *  It's a folder, good chance it links to an html file
         */
        if( fileToCheck.getFile() == "" )
            return true;
        
        for( int i = 0; i < WebGlobal.POTENTIAL_HTML_FILE_EXT.length; i++ ){
            if( fileToCheck.getExtension().equalsIgnoreCase(WebGlobal.POTENTIAL_HTML_FILE_EXT[i]) ){
                if( WebGlobal.DEBUG )
                    System.err.println( getName() + "::isPotentialHTMLFile() : found html file for : " + url.getURL() );
                
                return true;
            }
        }
        return false;
    }
//------------------------------------------------------------------------------
    protected void processRedirect( URLData urlWithRedirect ){
        URLData redirectURL = null;
        String urlStr = null;
        ThreadEvent event = null;
        
        /*
         *  We get the new location from the Location header
         */
        urlStr = urlWithRedirect.getHeaderContent("location", 0);
        
        if( urlStr == null ){
            if( WebGlobal.DEBUG )
                System.err.println( getName() + "::processRedirect() : urlStr is null on : " + urlWithRedirect.getURL() );
        }
        else{
            if( WebGlobal.DEBUG )
                System.err.println( getName() + "::processRedirect() : urlWithRedirect = " + urlWithRedirect.getURL() + " urlStr = " + urlStr );
            
            try{
                /*
                 *  Send the url to the process broker so it can place it
                 *  in the todo queue. The process broker will deal with the url
                 *  to see if it is considered as a outside url or it as to be fetched
                 */
                redirectURL = new URLData(urlStr);
                event = new ThreadEvent( (Object)this, (Object)redirectURL, ThreadEvent.EVENT_TO_GET_INFO );
                sendEvent(event, m_broker);
            }
            catch( MalformedURLException urlEx ){
                System.err.println( getName() + "::processRedirect() : " + urlEx );
            }
        }
    }    
//------------------------------------------------------------------------------
    protected void processContentLocation( URLData url ){
        
        URLData newLocationURL = null;
        String urlStr = null;
        ThreadEvent event = null;
        
        /*
         *  We get the new location from the Content-Location header
         */
        urlStr = url.getHeaderContent("content-location", 0);
        
        if( urlStr == null ){
            if( WebGlobal.DEBUG )
                System.err.println( getName() + "::processContentLocation() : urlStr is null on : " + url.getURL() );
        }
        else{
            if( WebGlobal.DEBUG )
                System.err.println( getName() + "::processContentLocation() : url = " + url.getURL() + " urlStr = " + urlStr );
            
            try{
                /*
                 *  Send the url to the process broker so it can place it
                 *  in the todo queue. The process broker will deal with the url
                 *  to see if it is considered as a outside url or it as to be fetched
                 */
                newLocationURL = new URLData(urlStr);
                event = new ThreadEvent( (Object)this, (Object)newLocationURL, ThreadEvent.EVENT_TO_GET_INFO );
                sendEvent(event, m_broker);
            }
            catch( MalformedURLException urlEx ){
                System.err.println( getName() + "::processRedirect() : " + urlEx );
            }
        }        
    }
}
