Examples of LexiconEntry

Examples of business.LexiconEntry


    public void load( String topic ) {
        ResultSet result;
        LexiconEntry entry;
        String where = "language = '" + language + "'";

        if( topic != null ) {
            where += " AND topic = '" + topic + "'";

        result = driver.select( "lexiconentry", null, where );

        try {
            while( result.next() ) {
                entry = new LexiconEntry();
                entry.init( driver );
                entry.populate( result );
                lexicons.put( entry.getKey(), entry.getValue() );
        } catch( SQLException e ) {

Examples of org.terrier.structures.LexiconEntry

    for (int i = 0; i < queryLength; i++) {
      //get the entry statistics - perhaps this came from "far away"
      EntryStatistics entryStats = queryTerms.getStatistics(queryTermStrings[i]);
      //we seek the query term in the lexicon
      LexiconEntry lEntry = lexicon.getLexiconEntry(queryTermStrings[i]);
      if (entryStats == null)
        entryStats = lEntry;
      //and if it is not found, we continue with the next term
      if (lEntry==null)
        //logger.info("Term Not Found: "+queryTermStrings[i]);
      queryTerms.setTermProperty(queryTermStrings[i], lEntry);
      logger.debug((i + 1) + ": " + queryTermStrings[i].trim() + " with " + entryStats.getDocumentFrequency()
          + " documents (TF is " + entryStats.getFrequency() + ").");
      //check if the IDF is very low.
      if (IGNORE_LOW_IDF_TERMS && collectionStatistics.getNumberOfDocuments() < lEntry.getFrequency()) {
        logger.debug("query term " + queryTermStrings[i] + " has low idf - ignored from scoring.");
      //the weighting models are prepared for assigning scores to documents
Examples of org.terrier.structures.LexiconEntry

    FSOMapFileLexiconOutputStream.addLexiconToIndex(this.index, defaultStructureName, lexiconEntryFactoryValueClass+"$Factory");
  protected LexiconEntry newLexiconEntry(int termid)
    LexiconEntry rtr = valueFactory.newInstance();
    return rtr;
Examples of org.terrier.structures.LexiconEntry

    String targetTerm= null;
    int targetTermId  = -1;
    LexiconEntry nextEntryToWrite = null;
    while(terms.size() > 0)
      //what term are we working on
      targetTerm = terms.poll();
      //logger.debug("Current term is "+targetTerm + "length="+targetTerm.length());
      //for each input lexicon
      for(int i=0;i<numLexicons;i++)
        //does this lexicon contain the term
        //logger.debug("Checking lexicon "+i+" for "+targetTerm+"="+lis[i].getTerm());
        if(hasMore[i] && currentEntries[i].getKey().equals(targetTerm))
          if (targetTermId == -1)
          {  //obtain the termid for this term from the first lexicon that has the term
            nextEntryToWrite = newLexiconEntry(targetTermId = currentEntries[i].getValue().getTermId());
          else if (targetTermId != currentEntries[i].getValue().getTermId())
          {  //check the termids match for this term
            logger.error("Term "+targetTerm+" had two termids ("+targetTermId+","+currentEntries[i].getValue().getTermId()+")");
          //logger.debug("Term "+targetTerm + " found in "+i + "termid="+ lis[i].getTermId());
          hasMore[i] = lis[i].hasNext();
          if (hasMore[i])
            currentEntries[i] = lis[i].next();
Examples of org.terrier.structures.LexiconEntry

      BitIndexPointer pin = new SimpleBitIndexPointer();
        Map.Entry<String,LexiconEntry> lee = lexiconStream.next();
        LexiconEntry value = lee.getValue();
        los.writeNextEntry(lee.getKey(), value);
Examples of org.terrier.structures.LexiconEntry

    lastFreq = 0;
    lastDocFreq= 0;
    lastDocument = -1;
    long startOffset = this.getByteOffset();
    byte startBitOffset = this.getBitOffset();
    LexiconEntry le = null;
    // for each run in the list
    int counter = 0;
    //for one term: for each set of postings for that term
    while (run.hasNext()) {
      PostingInRun posting = run.next();
      lastTermWritten = posting.getTerm();
      if (posting.getDf() > maxDF)
        maxDF = posting.getDf();
      //final int _runMapID = TaskID.forName(_run.getMapNo()).getId();
      //final int runNumber = run.getRunNo();
      final int docOffset = getDocumentOffset(_run.getSplitNo(), _run.getRunNo());
      lastDocument = posting.append(bos, lastDocument, docOffset);
      if (le == null)
        le = posting.getLexiconEntry();
      lastFreq += posting.getTF();
      lastDocFreq += posting.getDf();
    ((BasicLexiconEntry)le).setOffset(startOffset, startBitOffset);
    lexStream.writeNextEntry(lastTermWritten, le);
    numberOfPointers += lastDocFreq;
Examples of org.terrier.structures.LexiconEntry

      DataInputStream dis = new DataInputStream(Files.openFileStream(LexiconFilename.concat(".tmp2")));
      BitIndexPointer pin = new SimpleBitIndexPointer();
        Map.Entry<String,LexiconEntry> lee = lexiconStream.next();
        LexiconEntry value = lee.getValue();
        los.writeNextEntry(lee.getKey(), value);
Examples of org.terrier.structures.LexiconEntry

      if (! lexiconStream.hasNext())
      Map.Entry<String,LexiconEntry> lee = lexiconStream.next();
      LexiconEntry le = lee.getValue();
      numberOfPointersThisIteration += le.getDocumentFrequency();   
      //the class TIntIntHashMap return zero when you look up for a
      //the value of a key that does not exist in the hash map.
      //For this reason, the values that will be inserted in the
      //hash map are increased by one.
      codesHashMap.put(le.getTermId(), j + 1);
      //increment counter
    if (logger.isDebugEnabled())
Examples of org.terrier.structures.LexiconEntry

      if (! lexiconStream.hasNext())
      Map.Entry<String,LexiconEntry> lee = lexiconStream.next();
      LexiconEntry le = lee.getValue();
      TIntArrayList[] tmpArray = new TIntArrayList[2 + fieldCount];
      final int tmpNT = le.getDocumentFrequency();
      for (int i=0;i<2+fieldCount;i++)
        tmpArray[i] = new TIntArrayList(tmpNT);
      numberOfPointersThisIteration += tmpNT;
      tmpStorage[j] = tmpArray;
      //the class TIntIntHashMap return zero when you look up for a
      //the value of a key that does not exist in the hash map.
      //For this reason, the values that will be inserted in the
      //hash map are increased by one.
      codesHashMap.put(le.getTermId(), j + 1);
    if (logger.isDebugEnabled())
        numberOfPointersThisIteration + " pointers == "+
        j +" terms");
Examples of org.terrier.structures.LexiconEntry

    // We purge the query terms not present in the lexicon and retrieve the information from the lexicon
    String[] queryTermStrings = queryTerms.getTerms();
    queryTermsToMatchList = new ArrayList<Map.Entry<String,LexiconEntry>>(queryTermStrings.length);
    for (String queryTerm: queryTermStrings) {
      LexiconEntry t = lexicon.getLexiconEntry(queryTerm);
      if (t != null) {
        //check if the term IDF is very low.
        if (IGNORE_LOW_IDF_TERMS && collectionStatistics.getNumberOfDocuments() < t.getFrequency()) {
          //logger.warn("query term " + queryTerm + " has low idf - ignored from scoring.");
        // check if the term has weighting models
        WeightingModel[] termWeightingModels = queryTerms.getTermWeightingModels(queryTerm);
        if (termWeightingModels.length == 0) {
          //logger.warn("No weighting models for term " + queryTerm +", skipping scoring");
        queryTermsToMatchList.add(new MapEntry<String, LexiconEntry>(queryTerm, t));
        logger.debug("Term Not Found: " + queryTerm);     

    ////logger.warn("queryTermsToMatchList = " + queryTermsToMatchList.size());
    int queryLength = queryTermsToMatchList.size();
    wm = new WeightingModel[queryLength][];
    for (int i = 0; i < queryLength; i++)
      Map.Entry<String, LexiconEntry> termEntry    = queryTermsToMatchList.get(i);
      String               queryTerm    = termEntry.getKey();
      LexiconEntry           lexiconEntry = termEntry.getValue();
      //get the entry statistics - perhaps this came from "far away"
      EntryStatistics entryStats = queryTerms.getStatistics(queryTerm);
      //if none were provided with the query we seek the entry statistics query term in the lexicon
      if (entryStats == null)
