Package org.apache.tika.language

Examples of org.apache.tika.language.LanguageProfile$Counter


  private static void addProfile(Language language) {
    final String PROFILE_SUFFIX = ".ngp";
    final String PROFILE_ENCODING = "UTF-8";

    try {
      final LanguageProfile profile = new LanguageProfile();

      final String languageCode = language.getShortName();
      final String detectionFile = "/" + languageCode + "/" + languageCode + PROFILE_SUFFIX;
      if (!JLanguageTool.getDataBroker().resourceExists(detectionFile)) {
        // that's okay, not every language comes with its own detection file,
        // as Tika supports most languages out of the box.
        return;
      }
      final InputStream stream = JLanguageTool.getDataBroker().getFromResourceDirAsStream(detectionFile);
      try {
        final InputStreamReader in = new InputStreamReader(stream, PROFILE_ENCODING);
        final BufferedReader reader =
                new BufferedReader(in);
        String line = reader.readLine();
        while (line != null) {
          if (line.length() > 0 && !line.startsWith("#")) {
            final int space = line.indexOf(' ');
            profile.add(
                    line.substring(0, space),
                    Long.parseLong(line.substring(space + 1)));
          }
          line = reader.readLine();
        }
View Full Code Here


  }

  private void extractLanguage(JCas plainTextView) {
    try {
      LanguageIdentifier li = new LanguageIdentifier(new LanguageProfile(plainTextView.getDocumentText()));
      if (li.getLanguage() != null && !"".equals(li.getLanguage()))
        plainTextView.setDocumentLanguage(li.getLanguage());
    }
    catch (Exception e) {
      this.getContext().getLogger().log(Level.WARNING, new StringBuffer("Could not extract language due to ")
View Full Code Here

    }
  }

  private static void addProfile(Language language) {
    try {
      final LanguageProfile profile = new LanguageProfile();

      final String languageCode = language.getShortName();
      final String detectionFile = "/" + languageCode + "/" + languageCode + PROFILE_SUFFIX;
      if (!JLanguageTool.getDataBroker().resourceExists(detectionFile)) {
        // that's okay, not every language comes with its own detection file,
        // as Tika supports most languages out of the box.
        return;
      }
      try (InputStream stream = JLanguageTool.getDataBroker().getFromResourceDirAsStream(detectionFile)) {
        final InputStreamReader in = new InputStreamReader(stream, PROFILE_ENCODING);
        final BufferedReader reader =
                new BufferedReader(in);
        String line = reader.readLine();
        while (line != null) {
          if (line.length() > 0 && !line.startsWith("#")) {
            final int space = line.indexOf(' ');
            profile.add(
                    line.substring(0, space),
                    Long.parseLong(line.substring(space + 1)));
          }
          line = reader.readLine();
        }
View Full Code Here

     * @throws Exception
     */
    @Override
    public String translate(String text, String targetLanguage) throws Exception {
        LanguageIdentifier language = new LanguageIdentifier(
                new LanguageProfile(text));
        String sourceLanguage = language.getLanguage();
        return translate(text, sourceLanguage, targetLanguage);
    }
View Full Code Here

    }

    @Override
    public String translate(String text, String targetLanguage) throws Exception {
        LanguageIdentifier language = new LanguageIdentifier(
                new LanguageProfile(text));
        String sourceLanguage = language.getLanguage();
        return translate(text, sourceLanguage, targetLanguage);
    }
View Full Code Here

     * @param targetLanguage The target language of translation.
     * @return true if the cache contains a translation of the text, false otherwise.
     */
    public boolean contains(String text, String targetLanguage) {
        LanguageIdentifier language = new LanguageIdentifier(
                new LanguageProfile(text));
        String sourceLanguage = language.getLanguage();
        return contains(text, sourceLanguage, targetLanguage);
    }
View Full Code Here

  public String translate(String text, String targetLanguage)
      throws Exception {
    if (!this.isAvailable)
      return text;
    LanguageIdentifier language = new LanguageIdentifier(
        new LanguageProfile(text));
    String sourceLanguage = language.getLanguage();
    return translate(text, sourceLanguage, targetLanguage);
  }
View Full Code Here

    public String translate(String text, String targetLanguage)
            throws Exception {
        if (!this.isAvailable)
            return text;
        LanguageIdentifier language = new LanguageIdentifier(
                new LanguageProfile(text));
        String sourceLanguage = language.getLanguage();
        return translate(text, sourceLanguage, targetLanguage);
    }
View Full Code Here

TOP

Related Classes of org.apache.tika.language.LanguageProfile$Counter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.