/*
** Copyright (C) 2001,2002 Sacha Faust <sacha@severus.org>
**
** This program is free software; you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation; either version 2 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program; if not, write to the Free Software
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/

/*
 *  Version : 1.3
 */
package faust.sacha.web.data;

import faust.sacha.web.data.*;
import faust.sacha.web.util.*;
import java.net.*;
import java.util.*;
import faust.sacha.web.bot.spider.*;
import faust.sacha.web.bot.spider.event.*;
import faust.sacha.web.bot.spider.data.*;

public class Site extends ThreadEventManager{

    private Hashtable m_siteFolders;
    private URLInfo m_urlInfo;
    private ProcessBroker m_broker;
    private ArrayList m_urlsDone;
    private WaitUrlQ m_todoURLS;
    private UrlQ m_toGetDataURLS;
    private ArrayList m_emails;
    private ArrayList m_outsideURLs;
    private GetInfoManager m_getInfoManager;    
    private GetDataManager m_getDataManager;
    private SiteURLProcessor m_urlProcessor;
    private SpiderManager m_spiderManager;
    
    public Site( URLInfo url, SpiderManager manager ) throws MalformedURLException{
        super( "Site->" + url.getHost() );

        m_urlInfo = url;
        
        m_siteFolders = new Hashtable();
        m_urlsDone = new ArrayList(0);
        m_emails = new ArrayList(0);
        m_outsideURLs = new ArrayList(0);
        m_toGetDataURLS = new UrlQ();
        m_todoURLS = new WaitUrlQ( "WaitUrlQ->" + getDomain(), this, m_toGetDataURLS );
        m_urlProcessor = new SiteURLProcessor("SiteURLProcessor->" + getDomain(), this, m_todoURLS);
        
        m_broker = new ProcessBroker( "ProcessBroker->" + getDomain(), m_toGetDataURLS, m_todoURLS, m_urlProcessor, this );
        m_getInfoManager = new GetInfoManager( "GetInfoManager->" + getDomain(), m_todoURLS, m_broker );
        m_getDataManager = new GetDataManager( "GetDataManager->" + getDomain(), m_toGetDataURLS, m_broker );
        
        m_spiderManager = manager;
        
        /*
         *  Add the first url todo in the queue
         */
        m_todoURLS.add( new URLData(m_urlInfo) );

        
    }
    
    public Site( String url, SpiderManager manager ) throws MalformedURLException{
        this( new URLInfo(url), manager );
    }
//------------------------------------------------------------------------------
    private synchronized void process(){
        ThreadEvent event = null;
        ThreadEvent eventRemove = null;
        int state = 0;
        int nbSiteSpiderLoop = 0;
        
        m_broker.start();
        m_getInfoManager.start();
        m_getDataManager.start();
        m_urlProcessor.start();
        m_todoURLS.start();
     
        while( true ){
            state = getState();
            
            if( (state == ThreadEvent.EVENT_STOP) || (state == ThreadEvent.EVENT_SITE_FINISH ) )
                break;
            
            if( state == ThreadEvent.EVENT_IS_SITE_FINISH ){
                if( WebGlobal.DEBUG )
                    System.err.println( getName() + "::run() entering EVENT_IS_SITE_FINISH" );
                
                /*
                 *  This is a protection agains sites that will return crap info when
                 *  HEAD a basic HEAD request is sent on the /.
                 *  By counting the number of times the spider did a loop and the number of
                 *  urls done by the spider, we can see if such event might have happened and
                 *  we force the root url of the site to be processed got a GET request
                 *  
                 *  Example of such site:
                 *      www.sharp-usa.com [206.65.179.140] 80 (http) open
                 *      HEAD / HTTP/1.0
                 *
                 *      HTTP/1.1 302 Object moved
                 *      Server: Microsoft-IIS/5.0
                 *      Date: Sun, 31 Mar 2002 02:01:22 GMT
                 *      Location: http://www.sharp-usa.com
                 *      Connection: Keep-Alive
                 *      Content-Length: 145
                 *      Content-Type: text/html
                 *      Set-Cookie: ASPSESSIONIDGQQGQCBG=IHJGOOABMHKDIIEILLLAEPGA; path=/
                 *      Cache-control: private
                 */
                if( nbSiteSpiderLoop == 0 ){
                    if( m_urlsDone.size() == 1 ){
                        try{
                            m_toGetDataURLS.add( new URLData(m_urlInfo) );
                        }
                        catch( MalformedURLException urlEx ){
                        }
                    }
                    
                }
                nbSiteSpiderLoop++;
                
                /*
                 *  Wait for both m_broker and m_urlProcessor to finish
                 *  their processing. This will enfore the other queues
                 *  to get filled with data if some is available after they
                 *  finish processing
                 */
                m_broker.waitForProcessing();
                m_urlProcessor.waitForProcessing();

                if( WebGlobal.DEBUG )
                    System.err.println( getName() + "::run() entering EVENT_IS_SITE_FINISH-2" );
                
                if( !m_todoURLS.isEmpty() ){
                    if( WebGlobal.DEBUG )
                        System.err.println(getName() + "::process : m_todoURLS is not empty");
                    setState(ThreadEvent.EVENT_NEW);
                }
                else if( !m_toGetDataURLS.isEmpty() ){
                    if( WebGlobal.DEBUG )
                        System.err.println(getName() + "::process : m_toGetDataURLS is not empty");
                    setState(ThreadEvent.EVENT_NEW);
                }
                else if( m_getInfoManager.getNbThreads() != 0 ){
                    if( WebGlobal.DEBUG )
                        System.err.println(getName() + "::process : still threads in m_getInfoManager : " + m_getInfoManager.getNbThreads());
                    setState(ThreadEvent.EVENT_NEW);
                }
                else if(m_getDataManager.getNbThreads() != 0){
                    if( WebGlobal.DEBUG )
                        System.err.println(getName() + "::process : still threads in m_getDataManager : " + m_getDataManager.getNbThreads());
                    setState(ThreadEvent.EVENT_NEW);
                }
                else{
                    setState(ThreadEvent.EVENT_SITE_FINISH);
                    break;
                    //continue;
                }
            }
            else{
                try{
                    /*
                     *  We release the lock and wait to be notified
                     */
                    wait();
                }
                catch( InterruptedException intEx ){
                    System.err.println( getName() + "::run() : " + intEx);
                }
            }
        }
        
        state = getState();
        if( state == ThreadEvent.EVENT_STOP ){
            stopAllListenningThreads();
        }
        else if( state == ThreadEvent.EVENT_SITE_FINISH ){
            event = new ThreadEvent(this, null, ThreadEvent.EVENT_SITE_FINISH);
            sendEventAll(event);
        }
        
        /*
         *  Tell the SpiderManager that we are done
         */
        eventRemove = new ThreadEvent(this, null, ThreadEvent.EVENT_FINISH);
        sendEvent(eventRemove, m_spiderManager);
    }    
//------------------------------------------------------------------------------    
    public void run(){
        super.run();
        
        addToListeners(m_broker);
        addToListeners(m_getDataManager);
        addToListeners(m_getInfoManager);
        addToListeners(m_urlProcessor);
        addToListeners(m_todoURLS);

        process();
        
        if( WebGlobal.DEBUG ){
            System.err.println(getName() + "->After process()");
            System.err.println(getName() + "->END RUN()");
        }
    }
//------------------------------------------------------------------------------
    public WaitUrlQ DEBUG_getTODO(){
        return m_todoURLS;
    }
//------------------------------------------------------------------------------    
    protected boolean isEmailAlreadyFound( EmailURL emailToCheck ){
        EmailURL emailIndex = null;

        synchronized( m_emails ){
            for( int i = 0; i < m_emails.size(); i++ ){
                emailIndex = (EmailURL)m_emails.get(i);
                if( emailIndex.sameAs(emailToCheck) )
                    return true;
            }
        }

        return false;
    }
//------------------------------------------------------------------------------
    protected boolean isFileAlreadyPresent( ArrayList elementList, String fileName ){
        FileURL fileIndex = null;
        
        synchronized( elementList ){
            for( int i = 0; i < elementList.size(); i++ ){
                fileIndex = (FileURL)elementList.get(i);

                synchronized( fileIndex ){
                    if( fileIndex.getFileName().compareToIgnoreCase(fileName) == 0 )
                        return true;
                }
            }
        }

        return false;
    }
//------------------------------------------------------------------------------    
    private void updateFileQueryData( String folder, FileURL fileData ){
        ArrayList fileList = null;
        FileURL fileToCheck = null;
        String queryToAdd = null;
        
        synchronized( fileData ){
            fileList = getFolderElements(folder);
            
            synchronized( fileList ){
                queryToAdd = fileData.getQuery();

                if( queryToAdd == "" )
                    return;
                /*
                 *  we go over each files in the folder and compare only
                 *  the filename to see if there is a match.
                 *  if we find a match , we add the queryToAdd to the file
                 */

                for( int i = 0; i < fileList.size(); i++ ){
                    fileToCheck = (FileURL)fileList.get(i);

                    synchronized( fileToCheck ){
                        // it's safe to only compare filenames 
                        if( fileToCheck.getFileName().compareToIgnoreCase(fileData.getFileName()) == 0 ){
                            fileToCheck.addQueryData( fileData );
                        }
                    }
                }
            }
        }
    }
//------------------------------------------------------------------------------
    public ArrayList getFolderElements( String folderName ){
        synchronized( m_siteFolders ){
            return (ArrayList)m_siteFolders.get(folderName);
        }
    }
//------------------------------------------------------------------------------
    public String getSiteURL(){
        return m_urlInfo.getBaseURL();
    }
//------------------------------------------------------------------------------
    public int getPort(){
        return m_urlInfo.getPort();
    }
//------------------------------------------------------------------------------
    public boolean isSecure(){
        return m_urlInfo.isSecure();
    }
//------------------------------------------------------------------------------
    public String getDomain(){
        return m_urlInfo.getHost();
    }
//------------------------------------------------------------------------------    
    public void addFolder( String folderName ){

        synchronized( m_siteFolders ){
            if( !m_siteFolders.containsKey((Object)folderName) ){
                m_siteFolders.put( folderName, (Object)new ArrayList(0) );
                if( WebGlobal.DEBUG )
                    System.err.println( getName() + "::addFolder() : Adding folderKey " + folderName );
            }
            else{
                if( WebGlobal.DEBUG )
                    System.err.println(getName() + "::addFolder() : duplicate key " + folderName);
            }
        }
    }
//------------------------------------------------------------------------------
    public void addFileToFolder( String folderName, FileURL fileToAdd ){
        ArrayList folderElements = null;
        FileURL presentFile = null;
        String queryToAdd = null;
        ArrayList fileList = null;
        
        /*
         *  if there is files in the folder
         *      we check if the file is already present in the folder
         *      if the file is present, we try to add new query data to it
         *      else we add the file to the folder
         *  else we try to add the folder and call back the same method
         */
        synchronized( m_siteFolders ){
            if( (folderElements = getFolderElements(folderName)) != null ){
                if( !isFileAlreadyPresent(folderElements, fileToAdd.getFileName()) )
                    folderElements.add( (Object)fileToAdd );
                else{
                    // if the file already exist, try to add some query data to it
                    updateFileQueryData(folderName, fileToAdd);
                }
            }
            else{
                //the folder doesn't exist yet. create it and callback the same function
                if( WebGlobal.DEBUG )
                    System.err.println(getName() + "::addFileToFolder() : folder wasn't present and adding it for : " + folderName );

                addFolder(folderName);
                addFileToFolder(folderName, fileToAdd);
            }
        }
    }
//------------------------------------------------------------------------------
    public void addURLDone( URLInfo urlToAdd ){
        
        synchronized( m_urlsDone ){
            m_urlsDone.add( (Object)urlToAdd );
        }

        if( WebGlobal.DEBUG )
            System.err.println( getName() + "::addURLDone : added : " + urlToAdd.getURL() );
    }
//------------------------------------------------------------------------------
    public void addEmail( EmailURL emailToAdd ){
        synchronized( m_emails ){
            if( !isEmailAlreadyFound(emailToAdd) )
                m_emails.add( (Object)emailToAdd );
        }
    }
//------------------------------------------------------------------------------
    public ArrayList getUrlsDone(){
        return (ArrayList)m_urlsDone.clone();
    }
//------------------------------------------------------------------------------
    public ArrayList getEmailsFound(){
        return (ArrayList)m_emails.clone();
    }
//------------------------------------------------------------------------------
    public boolean isURLAlreadyDone( URLInfo urlToCheck ){
        URLInfo urlIndex = null;

        synchronized( m_urlsDone ){
            for( int i = 0; i < m_urlsDone.size(); i++ ){
                urlIndex = (URLInfo)m_urlsDone.get(i);

                synchronized( urlIndex ){
                    /*
                     *  comparing the urls without the anchor if one exist
                     */

                    if( urlIndex.getURLWithoutAnchor().compareToIgnoreCase(urlToCheck.getURLWithoutAnchor()) == 0 )
                        return true;
                }
            }
        }
        return false;
    }
//------------------------------------------------------------------------------    
    public URLInfo getInfo(){
        return m_urlInfo;
    }
//------------------------------------------------------------------------------
    public Hashtable getFolderHashTable(){
        return (Hashtable)m_siteFolders.clone();
    } 
//------------------------------------------------------------------------------
    public Enumeration getFoldersNames() {
        return m_siteFolders.keys();
    }
//------------------------------------------------------------------------------
    public void addOutsideURL( URLInfo url ){
        synchronized( m_outsideURLs ){
            if( !isOutsideURLPresent(url) )
                m_outsideURLs.add( (Object)url );
        }
    }
//------------------------------------------------------------------------------
    public boolean isOutsideURLPresent( URLInfo urlToTest ){
        URLInfo url = null;
        synchronized( m_outsideURLs ){
            for( int i = 0; i < m_outsideURLs.size(); i++ ){
                url = (URLInfo)m_outsideURLs.get(i);
                
                synchronized( url ){
                    if( url.getURL().equalsIgnoreCase(urlToTest.getURL()) )
                        return true;
                }
            }
        }
        return false;
    }
//------------------------------------------------------------------------------    
    public ArrayList getOutsideURLs(){
        return (ArrayList)m_outsideURLs.clone();
    }
//------------------------------------------------------------------------------    
}
