Examples of HMapSIW


Examples of edu.umd.cloud9.io.map.HMapSIW

      }
      eSent = sentences[1];
      fSent = sentences[0];
      eLen = eTok.getNumberTokens(eSent);
      fLen = fTok.getNumberTokens(fSent);
      HMapSIW eSrcTfs = new HMapSIW();
      eVector = helper.createEDocVector(eSent, eSrcTfs);
      HMapSIW fSrcTfs = new HMapSIW();
      fVector = helper.createFDocVector(fSent, fSrcTfs);
  
      if (eVector == null || fVector == null) {
        reporter.incrCounter(Sentences.ignored, 1)
        return;
View Full Code Here

Examples of edu.umd.cloud9.io.map.HMapSIW

    try {
      fis1 = new FileInputStream(eFile);
      fis2 = new FileInputStream(fFile);
      dis1 = new BufferedReader(new InputStreamReader(fis1, "UTF-8"));
      dis2 = new BufferedReader(new InputStreamReader(fis2, "UTF-8"));
      HMapSIW fSent = new HMapSIW();
      HMapSIW eSent = new HMapSIW();
      String eLine = null, fLine = null;
      int cntE = 0, cntF = 0, lastSentLenE = 0, lastSentLenF = 0;

      while ((eLine = dis1.readLine()) != null) {
        fLine = dis2.readLine().trim();
        eLine = eLine.trim();

        String[] tokens;
        if (fTokenizer == null) {
          tokens = fLine.split(" ");
        } else {
          tokens = fTokenizer.processContent(fLine);
        }
        lastSentLenF = tokens.length;

        for (String token : tokens) {
          if (!fSent.containsKey(token)) { // if this is first time we saw token in this sentence
            dfD.increment(token);
          }
          fSent.increment(token);

        }

        tokens = eTokenizer.processContent(eLine);
        lastSentLenE = tokens.length;

        for (String token : tokens) {
          if (!eSent.containsKey(token)) {
            dfE.increment(token);
          }
          eSent.increment(token);
        }

        sumFLengs += lastSentLenF;
        sumELengs += lastSentLenE;

        enSentLengths.add(lastSentLenE);
        deSentLengths.add(lastSentLenF);

        eSentTfs.add(eSent);
        fSentTfs.add(fSent);

        eSents.add(eLine);
        fSents.add(fLine);

        cntE++;
        cntF++;
        fSent = new HMapSIW();
        eSent = new HMapSIW();
      }

      // dispose all the resources after using them.
      fis1.close();
      dis1.close();
View Full Code Here

Examples of edu.umd.cloud9.io.map.HMapSIW

    String label;
    long time = System.currentTimeMillis();

    for (int i = 0; i < transVectors.size(); i++) {
      HMapSFW transVector = transVectors.get(i);
      HMapSIW fTfMap = fTfs.get(i);
      String fSent = fSents.get(i);
      for (int j = 0; j < eVectors.size(); j++) {
        HMapSFW eVector = eVectors.get(j);
        HMapSIW eTfMap = eTfs.get(j);
        String eSent = eSents.get(j);
        if (i == j) {
          label = "parallel";
        } else {
          label = "non_parallel";
View Full Code Here

Examples of edu.umd.cloud9.io.map.HMapSIW

    if ( cmdline == null ) {
      printUsage();
      return;
    }
    BitextClassifierUtils dt = new BitextClassifierUtils();
    numSentencesPerDocE = new HMapSIW();
    numSentencesPerDocF = new HMapSIW();
   
    long startTime = System.currentTimeMillis();

    dt.runPrepareSentenceExtractionData(cmdline.getOptionValue(FLANG_OPTION),
        cmdline.getOptionValue(ELANG_OPTION),
View Full Code Here

Examples of edu.umd.cloud9.io.map.HMapSIW

      }
      eSent = sentences[1];
      fSent = sentences[0];
      eLen = eTok.getNumberTokens(eSent);
      fLen = fTok.getNumberTokens(fSent);
      HMapSIW eSrcTfs = new HMapSIW();
      eVector = helper.createEDocVector(eSent, eSrcTfs);
      HMapSIW fSrcTfs = new HMapSIW();
      fVector = helper.createFDocVector(fSent, fSrcTfs);

      if (eVector == null || fVector == null) {
        reporter.incrCounter(Sentences.ignored, 1)
        return;
View Full Code Here

Examples of edu.umd.cloud9.io.map.HMapSIW

    Map<String,HMapSFW> scfgDist = new HashMap<String,HMapSFW>();

    // phrase2count table is a set of (source_phrase --> X) maps, where X is a set of (phrase_trans --> count) maps
    HMapSFW phraseDist = new HMapSFW();

    HMapSIW srcTokenCnt = new HMapSIW();

    Set<String> bagOfTargetTokens = new HashSet<String>();

    try {
      FSDataInputStream fis = fs.open(new Path(grammarFile));
View Full Code Here

Examples of edu.umd.cloud9.io.map.HMapSIW

    Map<String,HMapSFW> token2tokenDist = new HashMap<String,HMapSFW>();
   
    // target phrase --> prob
    HMapSFW phraseDist = new HMapSFW();
   
    HMapSIW srcTokenCnt = new HMapSIW();

    Set<String> bagOfTargetTokens = new HashSet<String>();

    for (int k = 0; k < n; k++) {
      transProbs[k] = transProbs[k]/sumOfProbs;
View Full Code Here

Examples of edu.umd.cloud9.io.map.HMapSIW

      List<HMapSIW> sentTfs, List<String> sents, HMapSIW dfTable) throws IOException {
    String line = null;
    boolean isNewDoc = true;
    int cnt = 0;
    float sumLengths = 0;
    HMapSIW sent = new HMapSIW();

    while ((line = reader.readLine()) != null) {
      line = line.trim();

      if (isNewDoc) {
        title2SentCnt.put(line, cnt);
        isNewDoc = false;
      } else if (line.equals("")){
        isNewDoc = true;      
      }else {
        String[] tokens = tokenizer.processContent(line);
        sentLengths.add(tokens.length);
        sumLengths += tokens.length;

        for (String token : tokens) {
          if (!sent.containsKey(token)) {
            dfTable.increment(token);
          }
          sent.increment(token);
        }
        sentTfs.add(sent);
        sents.add(line);
        cnt++;
        sent.clear();
      }
    }
    reader.close();

    return (sumLengths / cnt);   
View Full Code Here

Examples of edu.umd.cloud9.io.map.HMapSIW

    float sumFLengs = 0, sumELengs = 0;

    try {
      BufferedReader dis1 = new BufferedReader(new InputStreamReader(new FileInputStream(new File(eReadFile)), "UTF-8"));
      BufferedReader dis2 = new BufferedReader(new InputStreamReader(new FileInputStream(new File(fReadFile)), "UTF-8"));
      HMapSIW fDoc = new HMapSIW();
      HMapSIW eDoc = new HMapSIW();
      String eLine = null, fLine = null;
      int cntEDocs = 0, cntFDocs = 0, lastDocLenE = 0, lastDocLenF = 0, numSents = 0;

      while ((eLine = dis1.readLine()) != null) {
        fLine = dis2.readLine().trim();
        eLine = eLine.trim();

        String[] tokens = fTokenizer.processContent(fLine);     
        lastDocLenF += tokens.length;

        for (String token : tokens) {
          if (!fDoc.containsKey(token)) { // if this is first time we saw token in this sentence
            dfD.increment(token);
          }
          fDoc.increment(token);
        }

        tokens = eTokenizer.processContent(eLine);
        lastDocLenE += tokens.length;

        for (String token : tokens) {
          if (!eDoc.containsKey(token)) {
            dfE.increment(token);
          }
          eDoc.increment(token);
        }
       
        numSents++;
       
        if (numSents == sentsPerDoc) {
          sumFLengs += lastDocLenF;
          sumELengs += lastDocLenE;

          enSentLengths.add(lastDocLenE);
          deSentLengths.add(lastDocLenF);

          eDocTfs.add(eDoc);
          fDocTfs.add(fDoc);
          cntEDocs++;
          cntFDocs++;
         
          // reset variables
          fDoc = new HMapSIW();
          eDoc = new HMapSIW();
          numSents = 0;
          lastDocLenE = 0;
          lastDocLenF = 0;
        }
        eSents.add(eLine);
View Full Code Here

Examples of edu.umd.cloud9.io.map.HMapSIW

    String label;
    long time = System.currentTimeMillis();

    for (int i = 0; i < transVectors.size(); i++) {
      HMapSFW transVector = transVectors.get(i);
      HMapSIW fTfMap = fTfs.get(i);
      String fSent = fSents.get(i);
      for (int j = 0; j < eVectors.size(); j++) {
        HMapSFW eVector = eVectors.get(j);
        HMapSIW eTfMap = eTfs.get(j);
        String eSent = eSents.get(j);
        if (parallelPairs.get(i) == j) {
          label = "parallel";
        } else {
          label = "non_parallel";
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.