Package dk.dren.hunspell

Source Code of dk.dren.hunspell.Hunspell$Dictionary

package dk.dren.hunspell;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;

import com.sun.jna.Native;
import com.sun.jna.Pointer;
import com.sun.jna.ptr.PointerByReference;

/**
* The simple hunspell library frontend which takes care of creating
* and singleton'ing the library instance (no need to load it more than once
* per process) .
*
* The Hunspell java bindings are licensed under the same terms as Hunspell itself (GPL/LGPL/MPL tri-license),
* see the file COPYING.txt in the root of the distribution for the exact terms.
*
* @author Flemming Frandsen (flfr at stibo dot com)
*/

public class Hunspell {

    /**
     * The Singleton instance of Hunspell
     */
    private static Hunspell hunspell = null;

    /**
     * The native library instance, created by JNA.
     */
    private HunspellLibrary hsl = null;

  /**
   * The library file that was loaded.
   */
  private String libFile;

    /**
     * The instance of the HunspellManager, looks for the native lib in the
     * default directories
     */
    public static Hunspell getInstance() throws UnsatisfiedLinkError, UnsupportedOperationException {
    return getInstance(null);
    }

    /**
     * The instance of the HunspellManager, looks for the native lib in
     * the directory specified.
     *
     * @param libDir Optional absolute directory where the native lib can be found.
     */
    public static Hunspell getInstance(String libDir) throws UnsatisfiedLinkError, UnsupportedOperationException {
        if (hunspell != null) {
            return hunspell;
        }

        hunspell = new Hunspell(libDir);
        return hunspell;
    }

    protected void tryLoad(String libFile) throws UnsupportedOperationException {
    hsl = (HunspellLibrary)Native.loadLibrary(libFile, HunspellLibrary.class);
    }


    /**
     * Constructor for the library, loads the native lib.
     *
     * Loading is done in the first of the following three ways that works:
     * 1) Unmodified load in the provided directory.
     * 2) libFile stripped back to the base name (^lib(.*)\.so on unix)
     * 3) The library is searched for in the classpath, extracted to disk and loaded.
     *
     * @param libDir Optional absolute directory where the native lib can be found.
     * @throws UnsupportedOperationException if the OS or architecture is simply not supported.
     */
    protected Hunspell(String libDir) throws UnsatisfiedLinkError, UnsupportedOperationException {

    libFile = libDir != null ? libDir+"/"+libName() : libNameBare();
    try {
      hsl = (HunspellLibrary)Native.loadLibrary(libFile, HunspellLibrary.class);
    } catch (UnsatisfiedLinkError urgh) {

      // Oh dear, the library was not found in the file system, let's try the classpath
      libFile = libName();
      InputStream is = Hunspell.class.getResourceAsStream("/"+libFile);
      if (is == null) {
        throw new UnsatisfiedLinkError("Can't find "+libFile+
                         " in the filesystem nor in the classpath\n"+
                         urgh);
      }

      // Extract the library from the classpath into a temp file.
      File lib;
      FileOutputStream fos = null;
      try {
        lib = File.createTempFile("jna", "."+libFile);
        lib.deleteOnExit();
        fos = new FileOutputStream(lib);
        int count;
        byte[] buf = new byte[1024];
        while ((count = is.read(buf, 0, buf.length)) > 0) {
          fos.write(buf, 0, count);
        }

      } catch(IOException e) {
        throw new Error("Failed to create temporary file for "+libFile, e);

      } finally {
        try { is.close(); } catch(IOException e) { }
        if (fos != null) {
          try { fos.close(); } catch(IOException e) { }
        }
      }
      //System.out.println("Loading temp lib: "+lib.getAbsolutePath());
      hsl = (HunspellLibrary)Native.loadLibrary(lib.getAbsolutePath(), HunspellLibrary.class);
    }
    }

  public String getLibFile() {
    return libFile;
  }

    /**
     * Calculate the filename of the native hunspell lib.
     * The files have completely different names to allow them to live
     * in the same directory and avoid confusion.
     */
    public static String libName() throws UnsupportedOperationException {
    String os = System.getProperty("os.name").toLowerCase();
    if (os.startsWith("windows")) {
      return libNameBare()+".dll";

    } else if (os.startsWith("mac os x")) {
      //      return libNameBare()+".dylib";
      return "lib"+libNameBare()+".dylib";

    } else {
      return "lib"+libNameBare()+".so";
    }
    }

    public static String libNameBare() throws UnsupportedOperationException {
    String os = System.getProperty("os.name").toLowerCase();
    String arch = System.getProperty("os.arch").toLowerCase();

    // Annoying that Java doesn't have consistent names for the arch types:
    boolean x86  = arch.equals("x86")    || arch.equals("i386"|| arch.equals("i686");
    boolean amd64= arch.equals("x86_64") || arch.equals("amd64") || arch.equals("ia64n");

    if (os.startsWith("windows")) {
      if (x86) {
        return "hunspell-win-x86-32";
      }
      if (amd64) {
        return "hunspell-win-x86-64";
      }

    } else if (os.startsWith("mac os x")) {
      if (x86) {
        return "hunspell-darwin-x86-32";
      }
      if (amd64) {
        return "hunspell-darwin-x86-64";
      }
      if (arch.equals("ppc")) {
        return "hunspell-darwin-ppc-32";
      }

    } else if (os.startsWith("linux")) {
      if (x86) {
        return "hunspell-linux-x86-32";
      }
      if (amd64) {
        return "hunspell-linux-x86-64";
      }

    } else if (os.startsWith("sunos")) {
      //if (arch.equals("sparc")) {
      //  return "hunspell-sunos-sparc-64";
      //}
    }

    throw new UnsupportedOperationException("Unknown OS/arch: "+os+"/"+arch);
    }

    /**
     * This is the cache where we keep the already loaded dictionaries around
     */
    private HashMap<String, Dictionary> map = new HashMap<String, Dictionary>();

    /**
     * This is the where we keep the last modified date for dictionary files
     */
  private HashMap<String, Long> modMap = new HashMap<String, Long>();

    /**
     * Gets an instance of the dictionary.
     *
     * @param baseFileName the base name of the dictionary,
     * passing /dict/da_DK means that the files /dict/da_DK.dic
     * and /dict/da_DK.aff get loaded
     */
    public Dictionary getDictionary(String baseFileName)
    throws FileNotFoundException, UnsupportedEncodingException {

    // Check the last modified date to detect if the dictionary files have changed and reload if they have.
    File dicFile = new File(baseFileName + ".dic");
    File affFile = new File(baseFileName + ".aff");

    // TODO: Perhaps we should limit the frequency of stat'ing these files as we're spamming system calls this way:
    Long lastModified;

    try {
      lastModified = new Long(dicFile.lastModified() + affFile.lastModified());
    } catch (SecurityException e) {
      // Meh, there's nothing we can do about it, but it should never happen anyway.
      lastModified = new Long(0);
    }

    if (modMap.containsKey(baseFileName) && modMap.get(baseFileName) != lastModified) {
      destroyDictionary(baseFileName);
    }

    if (map.containsKey(baseFileName)) {
      return map.get(baseFileName);

    } else {
      Dictionary d = new Dictionary(baseFileName);
      map.put(baseFileName, d);
      modMap.put(baseFileName, lastModified);
      return d;
    }
    }

    /**
     * Removes a dictionary from the internal cache
     *
     * @param baseFileName the base name of the dictionary, as passed to
     * getDictionary()
     */
    public void destroyDictionary(String baseFileName) {
    if (map.containsKey(baseFileName)) {
      map.remove(baseFileName);
      modMap.remove(baseFileName);
    }
    }

    /**
     * Class representing a single dictionary.
     */
    public class Dictionary {
    /**
     * The pointer to the hunspell object as returned by the hunspell
     * constructor.
     */
    private Pointer hunspellDict = null;

    /**
     * The encoding used by this dictionary
     */
    private String encoding;


    /**
     * Creates an instance of the dictionary.
     * @param baseFileName the base name of the dictionary,
     */
    Dictionary(String baseFileName) throws FileNotFoundException,
                         UnsupportedEncodingException {
      File dic = new File(baseFileName + ".dic");
      File aff = new File(baseFileName + ".aff");

      if (!dic.canRead() || !aff.canRead()) {
        throw new FileNotFoundException("The dictionary files "+
                        baseFileName+
                        "(.aff|.dic) could not be read");
      }

      hunspellDict = hsl.Hunspell_create(aff.toString(), dic.toString());
      encoding = hsl.Hunspell_get_dic_encoding(hunspellDict);

      // This will blow up if the encoding doesn't exist
      stringToBytes("test");
    }

    /**
     * Deallocate the dictionary.
     */
    public void destroy() {
      if (hsl != null && hunspellDict != null) {
        hsl.Hunspell_destroy(hunspellDict);
        hunspellDict = null;
      }
    }

    /**
     * Check if a word is spelled correctly
     *
     * @param word The word to check.
     */
    public boolean misspelled(String word) {
      try {
        return hsl.Hunspell_spell(hunspellDict, stringToBytes(word)) == 0;
      } catch (UnsupportedEncodingException e) {
        return true; // this should probably never happen.
      }
    }

    /**
     * Convert a Java string to a zero terminated byte array, in the
     * encoding of the dictionary, as expected by the hunspell functions.
     */
    protected byte[] stringToBytes(String str)
      throws UnsupportedEncodingException {
      return (str+"\u0000").getBytes(encoding);
    }

    /**
     * Returns a list of suggestions
     *
     * @param word The word to check and offer suggestions for
     */
    public List<String> suggest(String word) {
      try {
        int suggestionsCount = 0;
        PointerByReference suggestions = new PointerByReference();
                suggestionsCount = hsl.Hunspell_suggest(
                            hunspellDict, suggestions, stringToBytes(word));

        return pointerToCStringsToList(suggestions, suggestionsCount);
      } catch (UnsupportedEncodingException ex) {
        // Shouldn't happen...
        return Collections.emptyList();
      }
    }

    /**
     * Returns a list of analyses
     *
     * @param word The word to analyze
     */
    public List<String> analyze(String word) {
      try {
        int analysesCount = 0;
        PointerByReference analyses = new PointerByReference();
                analysesCount = hsl.Hunspell_analyze(
                            hunspellDict, analyses, stringToBytes(word));

        return pointerToCStringsToList(analyses, analysesCount);
      } catch (UnsupportedEncodingException ex) {
        // Shouldn't happen...
        return Collections.emptyList();
      }
    }

    /**
     * Returns a list of stems
     *
     * @param word The word to find the stem for
     */
    public List<String> stem(String word) {
      try {
        int stemsCount = 0;
        PointerByReference stems = new PointerByReference();
                stemsCount = hsl.Hunspell_stem(
                            hunspellDict, stems, stringToBytes(word));

        return pointerToCStringsToList(stems, stemsCount);
      } catch (UnsupportedEncodingException ex) {
        // Shouldn't happen...
        return Collections.emptyList();
      }
    }

    private List<String> pointerToCStringsToList(PointerByReference slst, int n) {
      if ( n == 0 ) {
        return Collections.emptyList();
      }

      List<String> strings = new ArrayList<String>(n);

      try {
        // Get each of the suggestions out of the pointer array.
        Pointer[] pointerArray = slst.getValue().
          getPointerArray(0, n);

        for (int i=0; i<n; i++) {

          /* This only works for 8 bit chars, luckily hunspell uses either
             8 bit encodings or utf8, if someone implements support in
             hunspell for utf16 we are in trouble.
          */
          long len = pointerArray[i].indexOf(0, (byte)0);
          if (len != -1) {
            if (len > Integer.MAX_VALUE) {
              throw new RuntimeException(
                             "String improperly terminated: " + len);
            }
            byte[] data = pointerArray[i].getByteArray(0, (int)len);
            strings.add(new String(data, encoding));
          }
        }

      } catch (UnsupportedEncodingException e) {
        // Shouldn't happen...
      } finally {
        hsl.Hunspell_free_list(hunspellDict, slst, n);
      }

      return strings;
    }
    }
}
TOP

Related Classes of dk.dren.hunspell.Hunspell$Dictionary

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.