Package org.apache.tika.language

Examples of org.apache.tika.language.LanguageIdentifier


     *
     * @throws IOException if there is an error when reading the text
     */
    @Test
    public void testLangId() throws IOException {
        LanguageIdentifier tc = new LanguageIdentifier(text);
        String language = tc.getLanguage();
        assertEquals("en", language);
    }
View Full Code Here


    /**
     * @return the identified language of the content
     */
    public String getLanguage() {
        return new LanguageIdentifier(content).getLanguage();
    }
View Full Code Here

  @Test
  public void testLanguageIndentifier() {
    try {
      long total = 0;
      LanguageIdentifier identifier;
      BufferedReader in = new BufferedReader(new InputStreamReader(this
          .getClass().getResourceAsStream("test-referencial.txt")));
      String line = null;
      while ((line = in.readLine()) != null) {
        String[] tokens = line.split(";");
        if (!tokens[0].equals("")) {
          StringBuilder content = new StringBuilder();
          // Test each line of the file...
          BufferedReader testFile = new BufferedReader(new InputStreamReader(
              this.getClass().getResourceAsStream(tokens[0]), "UTF-8"));
          String testLine = null, lang = null;
          while ((testLine = testFile.readLine()) != null) {
            content.append(testLine + "\n");
            testLine = testLine.trim();
            if (testLine.length() > 256) {
              identifier = new LanguageIdentifier(testLine);
              lang = identifier.getLanguage();
              assertEquals(tokens[1], lang);
            }
          }
          testFile.close();

          // Test the whole file
          long start = System.currentTimeMillis();
          System.out.println(content.toString());
          identifier = new LanguageIdentifier(content.toString());
          lang = identifier.getLanguage();
          System.out.println(lang);
          total += System.currentTimeMillis() - start;
          assertEquals(tokens[1], lang);
        }
      }
View Full Code Here

        InputStream in = this.getClass().getClassLoader().getResourceAsStream(
                testFileName);
        assertNotNull("failed to load resource " + testFileName, in);

        String text = IOUtils.toString(in);
        LanguageIdentifier tc = new LanguageIdentifier(text);
        String language = tc.getLanguage();
        assertEquals("en", language);
    }
View Full Code Here

        // truncate text to some piece from the middle if probeLength > 0
        int checkLength = probeLength;
        if (checkLength > 0 && text.length() > checkLength) {
            text = text.substring(text.length() / 2 - checkLength / 2, text.length() / 2 + checkLength / 2);
        }
        LanguageIdentifier languageIdentifier = new LanguageIdentifier(text);
        String language = languageIdentifier.getLanguage();
        log.info("language identified as " + language);

        // add language to metadata
        MGraph g = ci.getMetadata();
        ci.getLock().writeLock().lock();
View Full Code Here

     * @return The translated text.
     * @throws Exception
     */
    @Override
    public String translate(String text, String targetLanguage) throws Exception {
        LanguageIdentifier language = new LanguageIdentifier(
                new LanguageProfile(text));
        String sourceLanguage = language.getLanguage();
        return translate(text, sourceLanguage, targetLanguage);
    }
View Full Code Here

        return translatedText;
    }

    @Override
    public String translate(String text, String targetLanguage) throws Exception {
        LanguageIdentifier language = new LanguageIdentifier(
                new LanguageProfile(text));
        String sourceLanguage = language.getLanguage();
        return translate(text, sourceLanguage, targetLanguage);
    }
View Full Code Here

     * @param text What string to check for.
     * @param targetLanguage The target language of translation.
     * @return true if the cache contains a translation of the text, false otherwise.
     */
    public boolean contains(String text, String targetLanguage) {
        LanguageIdentifier language = new LanguageIdentifier(
                new LanguageProfile(text));
        String sourceLanguage = language.getLanguage();
        return contains(text, sourceLanguage, targetLanguage);
    }
View Full Code Here

  @Override
  public String translate(String text, String targetLanguage)
      throws Exception {
    if (!this.isAvailable)
      return text;
    LanguageIdentifier language = new LanguageIdentifier(
        new LanguageProfile(text));
    String sourceLanguage = language.getLanguage();
    return translate(text, sourceLanguage, targetLanguage);
  }
View Full Code Here

    @Override
    public String translate(String text, String targetLanguage)
            throws Exception {
        if (!this.isAvailable)
            return text;
        LanguageIdentifier language = new LanguageIdentifier(
                new LanguageProfile(text));
        String sourceLanguage = language.getLanguage();
        return translate(text, sourceLanguage, targetLanguage);
    }
View Full Code Here

TOP

Related Classes of org.apache.tika.language.LanguageIdentifier

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.