Package org.jab.docsearch.utils

Source Code of org.jab.docsearch.utils.Utils

/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
*/
package org.jab.docsearch.utils;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;

import org.apache.commons.io.IOUtils;
import org.apache.log4j.Logger;
import org.jab.docsearch.constants.OSType;
import org.jab.docsearch.spider.SpiderUrl;

/**
* This utility class primarily performs string manipulation functions.
*
* @version $Id: Utils.java 130 2009-07-21 10:26:00Z henschel $
*/
public final class Utils {
    /**
     * Log4J
     */
    private final static Logger logger = Logger.getLogger(Utils.class.getName());
    private final static String PATH_SEPARATOR = FileUtils.PATH_SEPARATOR;
    public final static String LINE_SEPARATOR = System.getProperty("line.separator");


    /**
     * Replace string
     *
     * @param search   search string
     * @param s        string
     * @param replace  replace string
     * @return         A string where "search" has been replaced with "replace" given a string "s"
     */
    public static String replaceAll(final String search, String s, final String replace) {
        if (search == null || s == null || replace == null) {
            return s;
        }

        // first pos of "search"
        int p = s.indexOf(search);

        // replace till all chars replaced
        while (p != -1) {

            if (p > 0) {
                s = s.substring(0, p) + replace  + s.substring(p + search.length());
            }
            else {
                s = replace  + s.substring(p + search.length());
            }

            // search after replace
            p = s.indexOf(search, p + replace.length());
        }

        return s;
    }


    public static double getAverageSearchScore(ArrayList<LogSearch> loggedSearches) {
        int numIs = loggedSearches.size();
        double returnD = 0.0;
        double totalD = 0.0;
        double temD = 0.0;
        LogSearch ls;
        if (numIs > 0) {
            Iterator<LogSearch> it = loggedSearches.iterator();
            while (it.hasNext()) {
                ls = it.next();
                temD = ls.score;
                totalD += temD;
            } // end while
            if (totalD > 0.0)
                returnD = totalD / numIs;
        }
        return returnD;
    } // end for getAverageSearchScore


    public static boolean hasUser(String toMatch, ArrayList<String> listing) {
        int numUs = listing.size();
        boolean returnB = false;
        if (numUs > 0) {
            Iterator<String> it = listing.iterator();
            while (it.hasNext()) {
               String temC = it.next();
                if (temC.equals(toMatch)) {
                    returnB = true;
                    break;
                }
            } // end while
        } // end has users
        return returnB;
    } // end foir hasUserBool


    /**
     * @return true if a URL ends with a slash
     */
    public static boolean endsWithSlash(String toCkeck) {
        boolean retB = false;
        if ((toCkeck.endsWith("/")) || (toCkeck.endsWith("\\")))
            retB = true;
        return retB;
    }


    /**
     * @return url given a filename and the text to find (match) and replace it
     *         with
     */
    public static String getURL(String fileName, String match, String replace) {
        String returnString = "";
        returnString = replace + fileName.substring(match.length(), fileName.length());
        returnString = Utils.replaceAll("\\", returnString, "/");

        // System.out.println("Converted file("+fileName+") to
        // "+returnString+"\nmatch:"+match+"\nreplace:"+replace);
        return returnString;
    }


    /**
     * @return number of slashes (/) in a URL
     */
    public static int countSlash(String urlToCount) {
        int returnInt = 0;
        int totalLen = urlToCount.length();
        int startSpot = urlToCount.indexOf(PATH_SEPARATOR);
        if (startSpot != -1) {
            returnInt++;
            do {
                startSpot++;
                if (startSpot > totalLen) {
                    break;
                }

                startSpot = urlToCount.indexOf(PATH_SEPARATOR, startSpot);
                if (startSpot == -1) {
                    break;
                }

                returnInt++;

                // System.out.println("Found slash");
            } while (startSpot != -1);
        }

        return returnInt;
    }


    /**
     * @return the number of rows in a html file containing an html table
     */
    public static int getNumObjectRows(String fileName) {
        int returnInt = 0;
        // load file and count <tr tags
        FileInputStream fi = null;
        try {
            File origFile = new File(fileName);
            if (origFile.exists()) {
                fi = new FileInputStream(origFile);
                int curI = 0; // reset i
                char curChar = ' ';
                char lastChar = ' ';
                char nextToLastChar = ' ';
                StringBuffer rowString;
                while (curI != -1) {
                    curI = fi.read();
                    if (curI != -1) {
                        //
                        lastChar = nextToLastChar;
                        nextToLastChar = curChar;
                        curChar = (char) curI;
                        rowString = new StringBuffer();
                        rowString.append(lastChar);
                        rowString.append(nextToLastChar);
                        rowString.append(curChar);
                        if (rowString.toString().toLowerCase().equals("<tr"))
                            returnInt++;
                    } // curI!=-1
                    else {
                        break;
                    }
                }
            } // end for file exists
            else {
                returnInt = -1;
            }
        } // end of trying
        catch (IOException ioe) {
            returnInt = 0;
        }
        finally {
          IOUtils.closeQuietly(fi);
        }
        return returnInt;
    }


    /**
     * Method returns the filename with out parent directories
     *
     * @param fileString  fileString
     * @return            a filename without its parent directory included in the name
     */
    public static String getNameOnly(final String fileString) {
        if (fileString == null) {
            return null;
        }

        // remove parent objects
        if (fileString.indexOf(PATH_SEPARATOR) != -1) {
            return fileString.substring(fileString.lastIndexOf(PATH_SEPARATOR) + 1, fileString.length());
        }
        else if (fileString.indexOf("\\") != -1) {
            return fileString.substring(fileString.lastIndexOf("\\") + 1, fileString.length());
        }
        else if (fileString.indexOf("/") != -1) {
            return fileString.substring(fileString.lastIndexOf("/") + 1, fileString.length());
        }
        else {
            return fileString;
        }
    }


    /**
     * @return parent folder for a file named in fileString
     */
    public static String getFolderOnly(String fileString) {
        if (fileString.indexOf(PATH_SEPARATOR) != -1)
            return fileString.substring(0, fileString.lastIndexOf(PATH_SEPARATOR));
        else if (fileString.indexOf("\\") != -1)
            return fileString.substring(0, fileString.lastIndexOf("\\"));
        else if (fileString.indexOf("/") != -1)
            return fileString.substring(0, fileString.lastIndexOf("/"));
        else
            return "";
    }


    /**
     * Gets OS type
     *
     * @see OSType
     * @return  OSType number
     */
    public static int getOSType() {
        String os = System.getProperty("os.name").toLowerCase();

        if (os.indexOf("linux") != -1) {
            return OSType.LINUX;
        }
        else if (os.indexOf("windows") != -1) {
            return OSType.WIN_32;
        }
        else if (os.indexOf("nix") != -1) {
            return OSType.UNIX;
        }
        else if (os.indexOf("mac") != -1) {
            return OSType.MAC;
        }
        else {
            return OSType.UNKNOWN;
        }
    }


    /**
     * Gets user home
     *
     * @param osNum
     * @param defaultOsUserHome
     * @return                   users home directory - varies by platform
     */
    public static String getUserHome(int osNum, String defaultOsUserHome) {
        String returnString = defaultOsUserHome;

        switch (osNum) {
            case OSType.WIN_32: {
                String tempHome = Messages.getString("win32.home");
                if (tempHome.equals("$HOME")) {
                    returnString = System.getProperty("user.home");
                }
                else if (tempHome.equals(".")) {
                    returnString = System.getProperty("user.dir");
                }
                break;
            }
            case OSType.LINUX: {
                String tempHome = Messages.getString("lin.home");
                if (tempHome.equals("$HOME")) {
                    returnString = System.getProperty("user.home");
                }
                else if (tempHome.equals(".")) {
                    returnString = System.getProperty("user.dir");
                }
                break;
            }
            case OSType.UNIX: {
                returnString = System.getProperty("user.dir");
                break;
            }
            case OSType.MAC: {
                returnString = System.getProperty("user.dir");
                break;
            }
            default: { // don't know what it is
                returnString = System.getProperty("user.dir");
                break;
            }
        }

        logger.log(NoticeLevel.NOTICE, "getUserHome() using HOME directory=" + returnString);

        return returnString;
    }


    /**
     * Gets CDROM directory
     *
     * @return path to a CD ROM - varies depending on the platform
     */
    public static String getCDROMDir(int osNum) {
        if (logger.isDebugEnabled()) {
            logger.debug("getCDROMDir('" + osNum + "')");
        }

        String tempCdDir = System.getProperty("user.dir");
        if (! tempCdDir.equals("")) {
            File testFile = new File(tempCdDir);
            if (testFile.exists()) {
                logger.info("getCDROMDir() CDROM Dir: " + tempCdDir);
                return tempCdDir;
            }
            else {
                logger.error("getCDROMDir() NO CDROM DIR FOUND... ");
                return "";
            }
        }
        else {
            logger.info("getCDROMDir() NO CDROM DIR FOUND... ");
            return "";
        }
    }


    /**
     * @return a concatenated string of to 37 chars in length- stips of the
     *         beginning
     */
    public static String concatStr(String toConcat) {
        if (toConcat.length() > 37)
            return "~" + toConcat.substring(toConcat.length() - 37, toConcat.length());
        else
            return toConcat;
    } // end for concat


    /**
     * @return a concatenated string of the specified length in chars - stips of
     *         the ending
     */
    public static String concatStrTo(String toConcat, int len) {
        if (toConcat.length() > len)
            return "~" + toConcat.substring(toConcat.length() - len, toConcat.length());
        else
            return toConcat;
    } // end for concat


    /**
     * @return a concatenated string of the specified length in chars - stips of
     *         the beginning
     */
    public static String concatStrToEnd(String toConcat, int len) {
        if (toConcat.length() > len)
            return toConcat.substring(0, len) + "...";
        else
            return toConcat;
    } // end for concat


    /**
     * Looks for tag attribute in tag
     *
     * @param toLookFor
     * @param toLookIn
     * @return attribute from the text of an html tag...
     */
    public static String getTagString(String toLookFor, String toLookIn) {
        String toLookInLower = toLookIn.toLowerCase();

        if (toLookInLower.indexOf(toLookFor) == -1) {
            return "";
        }
        else {
            boolean firstQFnd = true;
            StringBuffer tempS = new StringBuffer();
            int endPos = toLookIn.length();
            int startPos = toLookInLower.indexOf(toLookFor) + toLookFor.length();

            for (int i = startPos; i < endPos; i++) {
                char tC = toLookIn.charAt(i);
                if (toLookIn.charAt(i) == '"') {
                    if (! firstQFnd) {
                        break;
                    }
                    else {
                        firstQFnd = false;
                    }
                }
                else if (toLookIn.charAt(i) == '>') {
                    break;
                }
                else {
                    tempS.append(tC);
                }
            }

            if (tempS.toString().trim().equals("")) {
                return "";
            }
            else {
                return tempS.toString();
            }
        }
    }


    /**
     * @return text given some HTML --- strips out markup
     */
    public static String nonTagText(String removeStr) {
        StringBuffer retBuf = new StringBuffer();
        int strLen = removeStr.length();
        boolean inTag = false;
        char curChar = ' ';
        for (int i = 0; i < strLen; i++) {
            curChar = removeStr.charAt(i);
            if (curChar == '<')
                inTag = true;
            else if (curChar == '>') {
                retBuf.append(" ");
                inTag = false;
            }
            if ((!inTag) && (curChar != '>'))
                retBuf.append(curChar);
            else if ((!inTag) && (curChar == '>'))
                retBuf.append(" ");
        } // end for string
        return retBuf.toString();
    } // end for nonTagText


    /**
     * Gets domain from URL
     *
     * @param  url  URL
     * @return      website name of a web page given a url
     */
    public static String getDomainURL(final String url) {
        if (url != null) {

            int slashCount = 0;
            for (int i = 0; i < url.length(); i++) {
                if (url.charAt(i) == '/') {
                    slashCount++;
                    if (slashCount == 3) {
                        return url.substring(0, i +1);
                    }
                }
            }

            // here no third slash found
            if (slashCount == 2) {
                return url + '/';
            }
        }

        logger.warn("getDomainUrl() couldn't retrieve domain from: " + url);
        return "";
    }


    /**
     * @return fully qualified URL from a relative hyperlink in a web page
     *         specified by baseUrl
     */
    public static String getRealUrl(String link, String baseUrl) {
        String returnS = "";
        String domainUrl = Utils.getDomainURL(baseUrl);
        int anchorPos = link.indexOf("#");
        if (anchorPos != -1)
            link = link.substring(0, anchorPos);
        // DETERMINE THE BASE URL
        if (baseUrl.endsWith("/")) {
            baseUrl = baseUrl.substring(0, baseUrl.length() - 1);
        }
        if (link.startsWith("http")) {
            returnS = link;
        }
        else if (link.indexOf("/") == -1) {
            returnS = baseUrl + "/" + link;
        }
        else if (link.startsWith("/")) {
            returnS = domainUrl + link;
        }
        else if (link.indexOf("../") == -1) {
            returnS = baseUrl + "/" + link;
        }

        if (returnS.equals("")) {
            logger.error("getRealUrl() Failed to construct full URL from link:" + link + "\nFound on page:" + baseUrl);
        }
        return returnS;
    }


    /**
     * Convert HTML entities to their text equivalents
     *
     * @param link  link
     * @return      url with HTML entities converted to their text equivalents
     */
    public static String getNormalUrl(String link) {
        link = Utils.replaceAll("&amp;", link, "&");
        link = Utils.replaceAll("%2B", link, "+");
        link = Utils.replaceAll("%2F", link, "/");
        link = Utils.replaceAll("%2f", link, "/");
        link = Utils.replaceAll("%2C", link, ",");
        link = Utils.replaceAll("%22", link, "\"");
        link = Utils.replaceAll("%23", link, "#");
        link = Utils.replaceAll("%24", link, "$");
        link = Utils.replaceAll("%3A", link, ":");
        link = Utils.replaceAll("%25", link, "%");
        link = Utils.replaceAll("+", link, " ");
        link = Utils.replaceAll("%26", link, "&");
        link = Utils.replaceAll("%27", link, "'");
        link = Utils.replaceAll("%20", link, " ");
        link = Utils.replaceAll("%28", link, "(");
        link = Utils.replaceAll("%29", link, ")");
        link = Utils.replaceAll("%5E", link, "^");
        link = Utils.replaceAll("%3f", link, "?");
        link = Utils.replaceAll("|", link, "%7C");
        return link;
    }


    /**
     * @return text concatenated to a specified size
     */
    public static String concatEnd(String toShorten, int size) {
        String returnString = "";
        int comS = toShorten.length();
        if (comS > size) {
            returnString = "..." + toShorten.substring(comS - size, comS);
        } else
            returnString = toShorten;
        return returnString;
    } // end of concat


    /**
     * Gets the parent folder of thr URL.
     *
     * @param url URL
     * @return    parent folder for a URL
     */
    public static String getBaseURLFolder(final String url) {
        String baseURL = null;

        if (url != null && url.trim().length() != 0) {

            // Is URL without slash after toplevel domain?
            int doubleSlash = url.indexOf("//");
            if (doubleSlash != -1) {

                if (url.indexOf("/", doubleSlash + 2) != -1) {
                    // Removes all after last slash
                    int lastSlash = url.lastIndexOf("/");
                    if (lastSlash != -1) {
                        baseURL = url.substring(0, lastSlash + 1);
                    }
                }
                else {
                    baseURL = url + "/";
                }
            }
        }

        if (baseURL == null) {
            baseURL = url;
        }

        if (logger.isDebugEnabled()) {
            logger.debug("getBaseUrlFolder() Requiring BASE URL : " + baseURL);
        }

        return baseURL;
    }


    /**
     * Gets all SpiderLinks from file
     *
     * @param  filename
     * @return           an ArrayList of SpiderUrls for links in a downloaded file
     */
    public static ArrayList<SpiderUrl> getSpiderLinks(String filename) {
        ArrayList<SpiderUrl> spiderList = new ArrayList<SpiderUrl>();

        if (filename == null) {
            logger.warn("getSpiderLinks() filename is null");
            return spiderList;
        }

        BufferedReader reader = null;
        try {
            File loadFile = new File(filename);
            if (! loadFile.exists()) {
                logger.warn("getSpiderLinks() Spider URL file does not exist " + filename);
            }
            else {
                reader = new BufferedReader(new FileReader(loadFile));

                String line;
                while ((line = reader.readLine()) != null) {
                    SpiderUrl tempUrl = new SpiderUrl(line);
                    spiderList.add(tempUrl);
                }
            }
        }
        catch (IOException ioe) {
            logger.error("getSpiderLinks() Error loading spider url links from file " + filename, ioe);
        }
        finally {
            try {
                if (reader != null) {
                    reader.close();
                }
            }
            catch (IOException ioe) {
                logger.fatal("getSpiderLinks() can't close BufferedReader");
            }
        }

        // return the arraylist of spider url objects
        return spiderList;
    }


    /**
     * @return text with html entities in place of the and sign and greater than
     *         and less than signs
     */
    public static String convertTextToHTML(String text) {
        String returnString = text;

        returnString = Utils.replaceAll("&", returnString, "&amp;"); // and symbol
        returnString = Utils.replaceAll("\n", returnString, "&nbsp;"); // spacer
        returnString = Utils.replaceAll("<", returnString, "&lt;"); // less than
        returnString = Utils.replaceAll(">", returnString, "&gt;"); // greater than
        returnString = Utils.replaceAll("\"", returnString, "&quot;"); // quot

        return returnString;
    }


    /**
     * Gets percent string from float with format xx.x %
     * TODO rewrite method, also with locale for use corrent . or ,
     *
     * @param var  float for converting
     * @return     PErcent string in format xx.x %
     */
    public static String getPercentStringFromScore(float var) {
        var *= 100.0f;

        return getStringFromFloat(var) + " %";
    }


    /**
     * Gets string from float with format xx.x
     * TODO rewrite method, also with locale for use corrent . or ,
     *
     * @param var  float for converting
     * @return     String in format xx.x
     */
    public static String getStringFromFloat(double var) {
        String temp = Double.toString(var);
        int length = temp.length();
        StringBuilder buf = new StringBuilder();

        int pastDot = 0;
        boolean foundDot = false;
        for (int i = 0; i < length; i++) {
            char curChar = temp.charAt(i);

            if (curChar == '.') {
                foundDot = true;
            }
            else if (foundDot) {
                pastDot++;
            }

            buf.append(curChar);

            if (pastDot >= 1) {
                break;
            }
        }

        return buf.toString();
    }


    /**
     * Get values in kilo format
     * TODO rewrite method, also with kilo byte and locale for Ko for France!! update also all references
     * TODO an also with locale for use corrent . or ,
     *
     * @param toGet
     * @return
     */
    public static String getKStyle(String toGet) {
        String returnString = null;

        try {
            double temp = Double.parseDouble(toGet) / 1024.0;
            returnString = getStringFromFloat(temp);
        }
        catch (Exception e) {
            returnString = toGet;
        }

        return returnString + " k";
    }

}
TOP

Related Classes of org.jab.docsearch.utils.Utils

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.