Package org.apache.ctakes.core.nlp.tokenizer

Examples of org.apache.ctakes.core.nlp.tokenizer.TokenizerPTB


   *
   * @throws FileNotFoundException
   * @throws IOException
   */
  private void initTokenizer() throws FileNotFoundException, IOException {
    this.tokenizer = new TokenizerPTB();
  }
View Full Code Here


  @Override
  public void initialize(UimaContext aContext) throws ResourceInitializationException {
    super.initialize(aContext);
    logger.info("Initializing " + this.getClass().getName());
    tokenizer = new TokenizerPTB();
    skipSegmentsSet = new HashSet<>();
    if(skipSegmentsArray != null){
      Collections.addAll(skipSegmentsSet, skipSegmentsArray);
    }
  }
View Full Code Here

    System.gc();

    if (args.length == 1) { // If no file of hyphenated words given
      try {
        directoryOfDelimitedFiles = args[0];
        tokenizer = new TokenizerPTB();
        new CreateAssertionLuceneIndexFromDelimitedFile(tokenizer);
      } catch (Exception e) {
        e.printStackTrace();
      }
    } else if (args.length == 3) { // else, use the file of hyphenated words
                    // during tokenization
      try {

        directoryOfDelimitedFiles = args[0];
        // ** hyphnated file no longer needed. using the new PTB
        // tokenizer instead. **
        // String hyphFileLoc = args[1];
        // int freqCutoff = Integer.parseInt(args[2]);
        // Map hyphMap = loadHyphMap(hyphFileLoc);
        // System.out.println("Processing hyphMap from : " +
        // hyphFileLoc);

        tokenizer = new TokenizerPTB();
        new CreateAssertionLuceneIndexFromDelimitedFile(tokenizer);
      } catch (Exception e) {
        e.printStackTrace();
      }
    } else {
View Full Code Here

    System.gc();

    if (args.length == 1) { // If no file of hyphenated words given
      try {
        directoryOfDelimitedFiles = args[0];
        tokenizer = new TokenizerPTB();
        new CreateLuceneIndexFromDelimitedFile(tokenizer);
      } catch (Exception e) {
        e.printStackTrace();
      }
    } else if (args.length == 3) { // else, use the file of hyphenated words
                    // during tokenization
      try {

        directoryOfDelimitedFiles = args[0];
        // ** hyphnated file no longer needed. using the new PTB
        // tokenizer instead. **
        // String hyphFileLoc = args[1];
        // int freqCutoff = Integer.parseInt(args[2]);
        // Map hyphMap = loadHyphMap(hyphFileLoc);
        // System.out.println("Processing hyphMap from : " +
        // hyphFileLoc);

        tokenizer = new TokenizerPTB();
        new CreateLuceneIndexFromDelimitedFile(tokenizer);
      } catch (Exception e) {
        e.printStackTrace();
      }
    } else {
View Full Code Here

   */
  private void configInit() throws ResourceAccessException {

    skipSegmentsSet = ParamUtil.getStringParameterValuesSet(PARAM_SEGMENTS_TO_SKIP, context);

    tokenizer = new TokenizerPTB();

  }
View Full Code Here

TOP

Related Classes of org.apache.ctakes.core.nlp.tokenizer.TokenizerPTB

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.