Examples of SrxDocument


Examples of net.sourceforge.segment.srx.SrxDocument

      srxReader = new BufferedReader(new InputStreamReader(
              JLanguageTool.getDataBroker().getFromResourceDirAsStream(RULES), "utf-8"));
      final Map<String, Object> parserParameters = new HashMap<>();
      parserParameters.put(Srx2SaxParser.VALIDATE_PARAMETER, true);
      final SrxParser srxParser = new Srx2SaxParser(parserParameters);
      final SrxDocument document = srxParser.parse(srxReader);
      return document;
    } catch (IOException e) {
      throw new RuntimeException("Could not load rules " + RULES + " from resource dir "
              + JLanguageTool.getDataBroker().getResourceDir(), e);
    } finally {
View Full Code Here

Examples of net.sourceforge.segment.srx.SrxDocument

   * @return initialized document
   */
  public SrxDocument parse(Reader reader) {
    Srx srx = (Srx) bind.unmarshal(reader);

    SrxDocument document = new SrxDocument();
    document.setCascade("yes".equals(srx.getHeader().getCascade()));

    Body body = srx.getBody();

    Map<String, LanguageRule> languageRuleMap = new HashMap<String, LanguageRule>();
    for (Languagerule lr : body.getLanguagerules().getLanguagerule()) {
      LanguageRule languageRule = new LanguageRule(lr
          .getLanguagerulename());
      for (net.sourceforge.segment.srx.io.bind.Rule r : lr.getRule()) {
        boolean breakRule = !"no".equals(r.getBreak());

        String before;
        if (r.getBeforebreak() != null) {
          before = r.getBeforebreak().getContent();
        } else {
          before = "";
        }

        String after;
        if (r.getAfterbreak() != null) {
          after = r.getAfterbreak().getContent();
        } else {
          after = "";
        }

        Rule rule = new Rule(breakRule, before, after);
        languageRule.addRule(rule);
      }
      languageRuleMap.put(languageRule.getName(), languageRule);
    }

    for (Languagemap lm : body.getMaprules().getLanguagemap()) {
      LanguageRule languageRule = languageRuleMap.get(lm
          .getLanguagerulename());
      if (languageRule == null) {
        log.warn("Language map \"" + lm.getLanguagepattern()
            + "\": language rule \"" + lm.getLanguagerulename()
            + "\" not found.");
      } else {
        document.addLanguageMap(lm.getLanguagepattern(), languageRule);
      }
    }

    return document;
  }
View Full Code Here

Examples of net.sourceforge.segment.srx.SrxDocument

      if (preload) {
        preloadText(reader, profile);
      }

      SrxDocument document = createSrxDocument(commandLine, profile);

      createAndSegment(commandLine, document, reader, writer, profile);

      if (twice) {
       
View Full Code Here

Examples of net.sourceforge.segment.srx.SrxDocument

  }
 

  private SrxDocument createSrxDocument(CommandLine commandLine,
      boolean profile) throws IOException {
    SrxDocument document;

    long start = System.currentTimeMillis();

    if (commandLine.hasOption("generate-srx")) {
      if (profile) {
View Full Code Here

Examples of net.sourceforge.segment.srx.SrxDocument

      SrxTransformer transformer = new SrxAnyTransformer();
      srxReader = transformer.transform(srxReader, parameterMap);
    }

    SrxParser srxParser = new SrxAnyParser();
    SrxDocument document = srxParser.parse(srxReader);
    srxReader.close();

    return document;
  }
View Full Code Here

Examples of net.sourceforge.segment.srx.SrxDocument

    int ruleLength = Integer.parseInt(parts[1]);
    if (ruleLength < 1) {
      throw new RuntimeException("Rule length must be greater or equal to one: " + ruleCount + ".");
    }

    SrxDocument srxDocument = new SrxDocument();
    LanguageRule languageRule = generateLanguageRule(ruleCount, ruleLength);
    srxDocument.addLanguageMap(".*", languageRule);
    return srxDocument;
  }
View Full Code Here

Examples of net.sourceforge.segment.srx.SrxDocument

  @Test
  public void testSrx2ParseTicket1() {
      Reader reader = getReader(getResourceStream(TICKET_1_DOCUMENT_NAME));

      SrxParser parser = new Srx2Parser();
      SrxDocument document = parser.parse(reader);

      assertTrue(document.getCascade());

      List<LanguageRule> languageRuleList = document
      .getLanguageRuleList("en");
     
      LanguageRule languageRule = languageRuleList.get(0);
      assertEquals("Default", languageRule.getName());
View Full Code Here

Examples of net.sourceforge.segment.srx.SrxDocument

  }

  public void testSrx1Parse(SrxParser parser) {
    Reader reader = getReader(getResourceStream(SRX_1_DOCUMENT_NAME));

    SrxDocument document = parser.parse(reader);

    assertFalse(document.getCascade());

    List<LanguageRule> languageRuleList = document
        .getLanguageRuleList("en");
    assertEquals(1, languageRuleList.size());

    LanguageRule languageRule = languageRuleList.get(0);
    assertEquals("Default", languageRule.getName());
View Full Code Here

Examples of net.sourceforge.segment.srx.SrxDocument

  }

  public void testSrx2Parse(SrxParser parser) {
    Reader reader = getReader(getResourceStream(SRX_2_DOCUMENT_NAME));

    SrxDocument document = parser.parse(reader);

    assertTrue(document.getCascade());

    List<LanguageRule> languageRuleList = document
        .getLanguageRuleList("fr_FR");
    assertEquals(2, languageRuleList.size());

    LanguageRule languageRule = languageRuleList.get(0);
    assertEquals("French", languageRule.getName());
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.