Package com.google.code.ftspc.lector.indexers

Source Code of com.google.code.ftspc.lector.indexers.CommonFunctions

package com.google.code.ftspc.lector.indexers;

import com.google.code.ftspc.lector.ini_and_vars.Vars;
import com.google.code.ftspc.lector.parsers.Archives.ZIP.MoveDir;
import com.google.code.ftspc.lector.parsers.Parser;
import java.io.File;
import java.util.Date;
import java.util.Random;
import org.mozilla.universalchardet.UniversalDetector;

/**
* Class with common functions that use several other classes.
* @author Arthur Khusnutdinov
*/
public class CommonFunctions extends Thread {

    private File fileForIndexing;
    private Random random = new Random();

    /**
     * Constructor of the class of common functions    
     */
    public CommonFunctions() {
    }

    /**
     * Constructor of the class of common functions
     * @param fileForIndexing The path to the file or folder to be indexed
     */
    public CommonFunctions(File fileForIndexing) {
        this.fileForIndexing = fileForIndexing;
    }

    /**
     * Method to start indexing
     */
    public void indexDocs_main() {
        indexDocs_main(fileForIndexing);
        System.gc();
    }

    private void indexDocs_main(File fileForIndexing) {
        if (fileForIndexing.canRead()) {
            if (fileForIndexing.isDirectory()) {
                String[] files = fileForIndexing.list();
                if (files != null) {
                    for (int i = 0; i < files.length; i++) {
                        indexDocs_main(new File(fileForIndexing, files[i]));
                    }
                }
            } else {
                try {
                    while (Vars.current_run_indexes > Vars.max_threads) {
                        synchronized (this) {
                            wait(300);
                        }
                    }
                    indexDocs_extracting_and_adding(fileForIndexing.getAbsolutePath());
                } catch (Exception ex) {
                    Vars.logger.fatal("Error: ", ex);
                }
            }
        }
    }

    private void indexDocs_extracting_and_adding(String filePath) {
        Parser selectedParser;
        String fileName = filePath.substring(filePath.lastIndexOf(Vars.fileSeparator)+1);
        File someFileForTypeRecognizing = new File(filePath);
        String hash;
        String newFilePath;

        Vars.totalSizeOfProcessedFiles += someFileForTypeRecognizing.length();

        try {
            String type = Vars.tika.detect(someFileForTypeRecognizing);
            if (Vars.parsersFromXML.get(type) != null) {
                selectedParser = (Parser) (Class.forName(
                        Vars.parsersFromXML.get(type).
                        get("class").toString())).newInstance();
                Vars.current_run_indexes++;
                if (selectedParser.getClass().getName().indexOf("ZIP") < 1) {
                    hash = (new Date()).getTime() + Long.toHexString(random.nextLong())
                            + Long.toHexString(random.nextLong());
                    MoveDir moveDir = new MoveDir();
                    newFilePath = Vars.pathToDayDir + "/" + hash;
                    moveDir.copyFile(someFileForTypeRecognizing, new File(newFilePath));
                    someFileForTypeRecognizing.delete();
                    filePath = newFilePath;
                }

                selectedParser.start_th(filePath, fileName);
                type = null;
            } else {
                System.out.println("UNKNOWN TYPE " + type + " " + filePath);
            }
        } catch (Exception ex) {
            Vars.logger.fatal("Error: ", ex);
        }
        selectedParser = null;
        someFileForTypeRecognizing = null;
    }

    /**
     * Method to determine the text encoding of the new algorithm.
     * @param fileName File encoding is to be determined.
     * @return Returns the encoding of the text file.
     * @throws java.io.IOException
     */
    protected String detectEncoding(String fileName) throws java.io.IOException {
        byte[] buf = new byte[4096];
        java.io.FileInputStream fis = new java.io.FileInputStream(fileName);
        UniversalDetector detector = new UniversalDetector(null);
        String encoding;
        int nread;

        while ((nread = fis.read(buf)) > 0 && !detector.isDone()) {
            detector.handleData(buf, 0, nread);
        }
        detector.dataEnd();
        fis.close();

        encoding = detector.getDetectedCharset();
        detector.reset();
        if (encoding != null) {
            return encoding;
        } else {
            return "UTF-8";
        }
    }
}
TOP

Related Classes of com.google.code.ftspc.lector.indexers.CommonFunctions

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.