Package com.chenlb.mmseg4j

Examples of com.chenlb.mmseg4j.MMSeg$ReadCharDigit


  private OffsetAttribute offsetAtt;
  private TypeAttribute typeAtt;
 
  public MMSegTokenizer(Seg seg, Reader input) {
    super(input);
    mmSeg = new MMSeg(input, seg);
   
    termAtt = (CharTermAttribute)addAttribute(CharTermAttribute.class);
    offsetAtt = (OffsetAttribute)addAttribute(OffsetAttribute.class);
    typeAtt = (TypeAttribute)addAttribute(TypeAttribute.class);
  }
View Full Code Here


      }
     
    });
    long time = 0;
    for(File txt : txts) {
      MMSeg mmSeg = new MMSeg(new InputStreamReader(new FileInputStream(txt)), seg);
      Word word = null;
      OutputStreamWriter osw = new OutputStreamWriter(new FileOutputStream(new File(txt.getAbsoluteFile()+"."+mode+".word")));
      BufferedWriter bw = new BufferedWriter(osw);
      long start = System.currentTimeMillis();
      while((word=mmSeg.next())!=null) {

        bw.append(new String(word.getString())).append("\r\n");
      }
      time += System.currentTimeMillis() - start;
      bw.close();
View Full Code Here

  }
 
  public String segWords(Reader input, String wordSpilt) throws IOException {
    StringBuilder sb = new StringBuilder();
    Seg seg = getSeg()//取得不同的分词具体算法
    MMSeg mmSeg = new MMSeg(input, seg);
    Word word = null;
    boolean first = true;
    while((word=mmSeg.next())!=null) {
      if(!first) {
        sb.append(wordSpilt);
      }
      String w = word.getString();
      sb.append(w);
View Full Code Here

  private OffsetAttribute offsetAtt;
  private TypeAttribute typeAtt;

  public MMSegTokenizer(Seg seg, Reader input) {
    super(input);
    mmSeg = new MMSeg(input, seg);

    termAtt = addAttribute(CharTermAttribute.class);
    offsetAtt = addAttribute(OffsetAttribute.class);
    typeAtt = addAttribute(TypeAttribute.class);
  }
View Full Code Here

  private OffsetAttribute offsetAtt;
  private TypeAttribute typeAtt;

  public MMSegTokenizer(Seg seg, Reader input) {
    super(input);
    mmSeg = new MMSeg(input, seg);

    termAtt = addAttribute(CharTermAttribute.class);
    offsetAtt = addAttribute(OffsetAttribute.class);
    typeAtt = addAttribute(TypeAttribute.class);
  }
View Full Code Here

  private OffsetAttribute offsetAtt;
  private TypeAttribute typeAtt;
 
  public MMSegTokenizer(Seg seg, Reader input) {
    super(input);
    mmSeg = new MMSeg(input, seg);
   
    termAtt = (CharTermAttribute)addAttribute(CharTermAttribute.class);
    offsetAtt = (OffsetAttribute)addAttribute(OffsetAttribute.class);
    typeAtt = (TypeAttribute)addAttribute(TypeAttribute.class);
  }
View Full Code Here

  }
 
  public String segWords(Reader input, String wordSpilt) throws IOException {
    StringBuilder sb = new StringBuilder();
    Seg seg = getSeg()//取得不同的分词具体算法
    MMSeg mmSeg = new MMSeg(input, seg);
    Word word = null;
    boolean first = true;
    while((word=mmSeg.next())!=null) {
      if(!first) {
        sb.append(wordSpilt);
      }
      String w = word.getString();
      sb.append(w);
View Full Code Here

  private OffsetAttribute offsetAtt;
  private TypeAttribute typeAtt;

  public MMSegTokenizer(Seg seg, Reader input) {
    super(input);
    mmSeg = new MMSeg(input, seg);

    termAtt = addAttribute(CharTermAttribute.class);
    offsetAtt = addAttribute(OffsetAttribute.class);
    typeAtt = addAttribute(TypeAttribute.class);
  }
View Full Code Here

  private OffsetAttribute offsetAtt;
  private TypeAttribute typeAtt;

  public MMSegTokenizer(Seg seg, Reader input) {
    super(input);
    mmSeg = new MMSeg(input, seg);

    termAtt = addAttribute(CharTermAttribute.class);
    offsetAtt = addAttribute(OffsetAttribute.class);
    typeAtt = addAttribute(TypeAttribute.class);
  }
View Full Code Here

  public static void afterClass() {
  }

  public static List<String> toMMsegWords(String txt, Seg seg) {
    List<String> words = new ArrayList<String>();
    MMSeg mmSeg = new MMSeg(new StringReader(txt), seg);
    Word word = null;
    try {
      while ((word = mmSeg.next()) != null) {
        String w = word.getString();
        words.add(w);
      }
    } catch (IOException e) {
      e.printStackTrace();
View Full Code Here

TOP

Related Classes of com.chenlb.mmseg4j.MMSeg$ReadCharDigit

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.