/*
** Copyright (C) 2001,2002 Sacha Faust <sacha@severus.org>
**
** This program is free software; you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation; either version 2 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program; if not, write to the Free Software
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/

/*
 *  Version : 1.3
 */
package faust.sacha.web.util;

import org.apache.commons.httpclient.*;
import org.apache.commons.httpclient.methods.*;
import faust.sacha.web.data.*;
import faust.sacha.web.util.*;
import java.util.*;
import java.io.*;
import java.net.*;

/*
 * TODO:
 *  - add support to user@pass
 */
/** Class that defines a HTTP url.
 */
public class URLInfo {
    private String m_fullURL;
    private String m_host;
    private String m_arguments;
    private String m_protocol;
    private String m_baseURL;
    private String m_folder;
    private String m_file;
    private String m_path;
    private String m_query;
    private String m_anchor;
    private int m_port;
    private boolean m_secure;
    
    protected URLInfo(){
        m_fullURL = "";
        m_host = "";
        m_arguments = "";
        m_protocol = "";
        m_baseURL = "";
        m_folder = "";
        m_file = "";
        m_path = "";
        m_query = "";
        m_anchor = "";
        m_port = WebGlobal.DEFAULT_PORT;
        m_secure = false;    
    }
    
    /**
     * @param url The url that you want to define.
     * @throws MalformedURLException
     */    
    public URLInfo( String url ) throws MalformedURLException{
        this();
      
        m_fullURL = url;
        initData();
    }

    /**
     * @param url The url or peace of url that you want to define.
     * @param parentURL The parent url that serves as the base of the url that you want to define.
     * @throws MalformedURLException
     */    
    public URLInfo( String url, URLInfo parentURL ) throws MalformedURLException{
        this();
        
        /*
         * Trying to build a new clean url.
         * I know it's a bit to much code but it makes sense to me and
         * it's clear in my head.
         */
        
        if( url.startsWith("/") ){                                                      // parentURL = anything | url = /
            m_fullURL = parentURL.getBaseURL() + url;
        }
        else if( (parentURL.toString().endsWith("/")) && (!url.startsWith("/")) ){      // parentURL = http://www.site.com/ | url = file.ext
            m_fullURL = parentURL.getBaseURL() + parentURL.getFolder() + url;
        }        
        else if( (!parentURL.toString().endsWith("/")) && (!url.startsWith("/")) ){     // parentURL = http://www.site.com/foder | url = file.ext
            m_fullURL = parentURL.getBaseURL() + parentURL.getFolder() + url;
        }
        else if( url.startsWith("?") ){
            m_fullURL = parentURL.getDirectURL() + url;
        }
        else{
            System.out.println("URLInfo::URLInfo : found exception : url = " + url + " parentURL = " + parentURL.toString() );
            m_fullURL = parentURL.getBaseURL() + parentURL.getFolder() + url;
        }
        
        initData();
    }
//------------------------------------------------------------------------------
    private void initData() throws MalformedURLException{
        
        if( !setBaseURLAndPathdAnchor() )
            throw (new MalformedURLException( new String("Unable to parse url in : " + m_fullURL) ));
        
        if( !setPortAndProtocolAndHost() )
            throw (new MalformedURLException( new String("Unable to parse url in : " + m_fullURL) ));
        
        setQuery();
        
        if( !setFolderAndFile() )
            throw (new MalformedURLException( new String("Unable to parse url in : " + m_fullURL) ));
        
        if( getProtocol().compareToIgnoreCase("https") == 0 )
           m_secure = true;
        
        //  enfore clean url when . or .. was put in initial m_fullURL
        m_fullURL = m_baseURL + m_folder + m_file + m_anchor + m_query;
            
    }    
//------------------------------------------------------------------------------    
    private boolean isURLPointToFolder(){
        
        HeadMethod method = null;
        int retCode;
        boolean isFolder = true;
        URLCont urlCon = null;
        URLInfo testURL = null;
        String testPath = null;
               
        /*
         *  if the url as a query, we know it's a file
         */        
        if( !m_query.equals("") )
            return false;        
        
        testPath = m_baseURL + m_path + "/";
        try{
            testURL = new URLInfo( testPath );
        }
        catch( MalformedURLException urlEx ){
            System.err.println("URLInfo::isURLPointToFolder() : " + urlEx.toString() );
            return false;
        }
        
        urlCon = new URLCont( testURL );
        method = new HeadMethod( testURL.getPath() );

        method.setRequestHeader("User-Agent", WebGlobal.USER_AGENT);

        if( WebGlobal.DEBUG ){
            System.err.println("URLInfo::isUrlPointerToFolder() : m_path = " + m_path + " | testPath = " + testPath);
            
            // To Chase a bug
            System.err.println("URLInfo::isURLPointerToFoldeR() : testURL = " + testURL.getURL() );
            System.err.println("URLInfo::isURLPointToFolder() : m_host = " + m_host );
            System.err.println("URLInfo::isURLPointToFolder() : m_port = " + m_port );
            System.err.println("URLInfo::isURLPointToFolder() : m_secure = " + m_secure );
            System.err.println("URLInfo::isURLPointToFolder() : m_fullURL = " + m_fullURL );
        }
        
        /*
         * Logic:
         *  We append a trailing "/" to the m_path variable and we try to fetch
         *  it. If there is no error, it means the url points to a folder because
         *  we can get access to it by defining it as /folder/. Else, it means it's a
         *  file. I use the isFolder var here instead of returning directly inside
         *  the catch() because I want to make sure the finally() block gets called
         */
        
        try{
            //retCode = connection.executeMethod(getData);
            retCode = method.execute( urlCon.getState(), urlCon );
            
            if( retCode == 404 )
                isFolder = false;
            
            if( WebGlobal.DEBUG )
                System.err.println("URLInfo::isURLPointerToFolder() : " + retCode);   
        }
        catch( IOException ioEx ){
            isFolder = false;
            if( WebGlobal.DEBUG )
                System.err.println("URLInfo::isURLPointerToFolder() : " + ioEx );
        }
        catch ( HttpException httpEx ){
            isFolder = false;
            if( WebGlobal.DEBUG )
                System.err.println("URlInfo::isURLPointerToFolder() : " + httpEx );
        }
        finally{
            try{
                //connection.endSession();
                urlCon.close();
            }
            catch( IOException ioExEnd ){
                System.err.println("URlInfo::isURLPointerToFolder() : " + ioExEnd);
            }
        }
        
        return isFolder;
    }
//------------------------------------------------------------------------------
    /**
     * @return  */    
    protected boolean setBaseURLAndPathdAnchor(){
        int i;
        int slashIndex;
        int totalIndex = 0;
        int argumentIndex = 0;
        int anchorIndex = 0;
        int colonPos = 0;
        int delimiter = 0;

        argumentIndex = m_fullURL.indexOf("?");
        anchorIndex = m_fullURL.indexOf("#");
        
        if( anchorIndex != -1 ){
            if( argumentIndex == -1 ){
                m_anchor = m_fullURL.substring( anchorIndex, m_fullURL.length() );
            }
            else if( anchorIndex < argumentIndex ){
                delimiter = ( argumentIndex != -1 ? argumentIndex : m_fullURL.length() );
                m_anchor = m_fullURL.substring(anchorIndex, delimiter);
            }
            else{
                /*
                 *  the # is defined inside the ? argument to the file so it's
                 *  so we don't consider it so we set anchorIndex to -1
                 */
                anchorIndex = -1;
            }
        }
        
        //resetting delimiter
        delimiter = 0;
        
        /*
         * Logic :
         * we spin and fetch the index of each "/" caracter.
         * we know that when i = 0 and 1, we received the index of the "/"
         * in http:// or https:// 
         * but if i = 2 (witch mean we found 3 instence of "/") we know
         * that a folder is specified and we set the variables correctly.
         * If not trailing / is specified and we only find 2 instence of
         * "/", we know it's a url in this format (http://site.com) and
         * we set m_path to the general "/".
         * We also make sure that don't include arguments or anchor sent to the url
         * by not going past argumentIndex if it's value is different then -1 or anchor
         */
        for( i = 0;; i++ ){
            if( (slashIndex = m_fullURL.indexOf("/", totalIndex)) == -1 )
                break;

            totalIndex += slashIndex;
            if( i == 2 ){   //we found the start of the path and slashIndex is at the start
                m_baseURL = m_fullURL.substring(0, slashIndex);
                
                if( argumentIndex == -1 )
                    delimiter = (anchorIndex == -1 ? m_fullURL.length() : anchorIndex);
                else{
                    if( anchorIndex == -1 )
                        delimiter = argumentIndex;
                    else
                        delimiter = (anchorIndex < argumentIndex ? anchorIndex : argumentIndex);
                }
                m_path = m_fullURL.substring(slashIndex, delimiter );
                
                // making sure the path starts with a /
                if( !m_path.startsWith("/") )
                    m_path = "/" + m_path;
                
                return true;
            }
        }
        
        if( i <= 2 ){ // there is no path specified. Example : http://site.com
            if( argumentIndex != -1 )
                m_baseURL = m_fullURL.substring(0, argumentIndex);
            else
                m_baseURL = m_fullURL;
            
            m_path = "/";
        }
        else{
            return false;
        }
        
        return true;
    }
//------------------------------------------------------------------------------
    protected boolean setFolderAndFile(){
        
        int lastSlash;
        int anchorIndex;
        String tmpFileName = null;
        FolderCleaner cleaner = null;
        
        if( m_path.endsWith("/") ){
            m_folder = m_path;
            m_file = "";
        }
        else{
            /*
             * Logic
             *  checking if the element after the last / as a extension
             *  If it does, we know it's a filename with a extension
             *  else we pass it to isURLPointToFolder().
             *  This fix some of the issues with isURLPointToFolder() when it
             *  tries to check on a cgi.
             */
            lastSlash = m_path.lastIndexOf("/");
            if( lastSlash == -1 ){
                return false;
                //System.out.println("URLInfo::setFolderandFile : WARNING : LASTSLATH == -1 on " + m_fullURL + " please report this");
                //lastSlash = 0;
            }
            else
                lastSlash++;    // adding 1 to get the right data in the substring()
            
            tmpFileName = m_path.substring( lastSlash, m_path.length() );
            if( tmpFileName.indexOf(".") != -1 ){
                m_folder = m_path.substring(0, lastSlash);
                m_file = m_path.substring(lastSlash, m_path.length());
                
            }
            else if( m_query != "" ){
                /*
                 *  we know it's a link to a file since there is arguments
                 *  sent to it
                 */
                m_folder = m_path.substring(0, lastSlash);
                m_file = m_path.substring(lastSlash, m_path.length());
            }
            else if( isURLPointToFolder() ){
                /*
                 * make sure the m_folder is clean and as a trailing "/"
                 * also make sure the m_fullURL and m_baseURL finish with
                 * a trailing "/"
                 */
                m_folder = m_path;
                m_file = "";
            }
            else{
                m_folder = m_path.substring(0, lastSlash);
                m_file = m_path.substring(lastSlash, m_path.length());
            }
        }
                
        // Cleanup the m_path and the m_folder
        cleaner = new FolderCleaner(m_folder);
        m_folder = cleaner.toString();
        m_path = m_folder + m_file;
        
        return true;
    }
//------------------------------------------------------------------------------    
    protected boolean setPortAndProtocolAndHost(){
        
        StringTokenizer strToken = null;
        String strChunk = null;
        String token = ":";
        int i;
        
        /*
         * Find if a port was specified
         * Example:
         *  http://site.com:80
         *
         * Logic:
         *  We split the string in tokens with the : separator
         *  if the number of tokens are 3 we know there is 
         *  a port defined
         */
        
        strToken = new StringTokenizer(m_baseURL, token);
        for( i = 0; strToken.hasMoreTokens(); i++){
            strChunk = strToken.nextToken(token);
            
            if( i == 0 ){
                m_protocol = strChunk;
                token = "//";
            }
            else{
                if( i == 2 ){
                    m_host = strChunk.substring(2, strChunk.length());
                }
                token = ":";
            }
        }
        if( i == 4 ){
            try{
                m_port = (new Integer(strChunk)).intValue();
            }
            catch( NumberFormatException numberEx ){
                System.err.println("URLInfo::setPortAndProtocolAndHost() : " + numberEx + "| strChunk = " + strChunk + " | m_fullURL = " + m_fullURL);
                return false;                
            }
        }
        else{
            if( m_protocol.compareToIgnoreCase("http") == 0 )
                m_port = WebGlobal.HTTP_DEFAULT_PORT;
            else if( m_protocol.compareToIgnoreCase("https") == 0 )
                m_port = WebGlobal.HTTPS_DEFAULT_PORT;
            else
                m_port = WebGlobal.DEFAULT_PORT;
        }
        
        return true;
    }
//------------------------------------------------------------------------------
    protected void setQuery(){
        int queryIndex = m_fullURL.indexOf("?");
        
        if( queryIndex == -1 )
            m_query = "";
        else
            m_query = m_fullURL.substring( queryIndex );    // we include the ?
    }
//------------------------------------------------------------------------------
    protected void setQuery( String query ){
        /*
         *  make sure m_query starts with ?
         */
        if( !query.startsWith("?") )
            m_query = "?" + query;
        else
            m_query = query;
    }
//------------------------------------------------------------------------------    
    /** Returns the base url for this url.
     * The output is in the following format:
     * [protocol][domain]:[port is different then 80 or 443 for https].
     * Example:
     * The baseurl of http://site.com/folder/file.html is http://site.com/
     * @return Get the base url as a String of this url.
     *
     */    
    public String getBaseURL(){
        return m_baseURL;
    }
//------------------------------------------------------------------------------
    /** Returns the folder peace of the url.
     * @return The folder peace.
     */    
    public String getFolder(){
        return m_folder;
    }
//------------------------------------------------------------------------------
    /** Returns the file part of the url.
     * @return file part of this url.
     */    
    public String getFile(){
        return m_file;
    }
//------------------------------------------------------------------------------
    /** Returns the path part of this url (folder/file).
     * @return The path part of this url.
     */    
    public String getPath(){
        return m_path;
    }
//------------------------------------------------------------------------------    
    /** Returns the port used by this url.
     * @return The port used.
     */    
    public int getPort(){
        return m_port;
    }
//------------------------------------------------------------------------------
    /** Returns the protocol used by this url.
     * @return The protocol used by this url.
     */    
    public String getProtocol(){
        return m_protocol;
    }
//------------------------------------------------------------------------------
    /** Returns the query part of this url.
     * @return -   If this urls reference a script,
     *    returns the argument string.
     * -   Else, returns null
     */    
    public String getQuery(){
        return m_query;
    }
//------------------------------------------------------------------------------
    /** Returns the url in the following format:
     * [protocol][domain][folder][file][script argument or anchor]
     * @return The full url.
     */    
    public String getURL(){
        return m_fullURL;
    }
//------------------------------------------------------------------------------
    /** Returns the url without the cgi arguments and anchor.
     * @return The url without the arguments and anchor.
     */    
    public String getDirectURL(){
        return (getBaseURL() + m_folder + m_file);
    }
//------------------------------------------------------------------------------    
    /** Returns if the url reference to a HTTPS connection.
     * @return - true if the url reference a https connection
     * - false if it doesn't
     */    
    public boolean isSecure(){
        return m_secure;
    }
//------------------------------------------------------------------------------
    /**
     * @return the host of this url.
     */    
    public String getHost(){
        return m_host;
    }
//------------------------------------------------------------------------------    
    /** This will compare two url reference to see if they refer to the same
     * resource.
     * @param toCompare url to compare againts.
     * @return - true if the url is the same.
     * - fause if it's not.
     */    
    public boolean sameAs( URLInfo toCompare ){

        if( m_fullURL.compareToIgnoreCase(toCompare.getURL()) == 0 )
            return true;
        else
            return false;
    }
//------------------------------------------------------------------------------
    /**
     * @return the url type.
     */    
    public int getURLType(){
        if( m_file == "" )
            return WebGlobal.URL_TYPE_FOLDER;
        else
            return WebGlobal.URL_TYPE_FILE;
    }
//------------------------------------------------------------------------------
    public String getAnchor(){
        return m_anchor;
    }
//------------------------------------------------------------------------------
    public String toString(){
        String cleanURL = m_baseURL + m_folder + m_file;
        
        return cleanURL;
    }
//------------------------------------------------------------------------------
    public String getURLWithoutAnchor(){

        String url = m_baseURL + m_folder + m_file + m_query;
        
        return url;
    }
//------------------------------------------------------------------------------
    /*
    public void DEBUG_DUMP_INFO(){
        System.out.println("url : " + getURL() );
        System.out.println("toString : " + toString() );
        System.out.println("direct url : " + getDirectURL() );
        System.out.println("without anchor : " + getURLWithoutAnchor() );
        System.out.println("base url : " + getBaseURL() );
        System.out.println("full path: " + getPath());
        System.out.println("host : " + getHost());
        System.out.println("folder: " + getFolder());
        System.out.println("file: " + getFile());
        System.out.println("Port : " + getPort());
        System.out.println("Protocol : " + getProtocol());
        System.out.println("Query : " + getQuery());
        System.out.println("Anchor : " + getAnchor());
        System.out.println("toString() : " + toString() );
    }
//------------------------------------------------------------------------------
    
    public static void main( String args[] ){
        try{
            URLInfo u = new URLInfo("https://accesd.desjardins.com/file");
            u.DEBUG_DUMP_INFO();
        }
        catch( MalformedURLException urlEx ){
            System.err.println(urlEx);
        }
    }
     */
}
