Package org.terrier.utility

Examples of org.terrier.utility.TagSet


        logger.error("The topics file " + queryfilename + " does not exist, or it cannot be read.");
        return false;
      } else {
        br = Files.openFileReader(queryfilename,desiredEncoding);
        TRECFullTokenizer queryTokenizer = new TRECFullTokenizer(
              new TagSet(TagSet.TREC_QUERY_TAGS),
              new TagSet(TagSet.EMPTY_TAGS),
              br);
        queryTokenizer.setIgnoreMissingClosingTags(true);
        while (!queryTokenizer.isEndOfFile()) {
          String docnoToken = null;
          StringBuilder query = new StringBuilder();
View Full Code Here


   * opening and closing document and document number
   * tags.
   */
  protected void setTags(String TagSet)
  {
    TagSet tagSet = new TagSet(TagSet);
    tags_CaseSensitive = tagSet.isCaseSensitive();
    docnotag = tagSet.getDocTag();
    String tmpDocTag = "<" + tagSet.getDocTag() + ">";
    String tmpEndDocTag = "</" + tagSet.getDocTag() + ">";
    String tmpDocnoTag = "<" + tagSet.getIdTag() + ">";
    String tmpEndDocnoTag = "</" + tagSet.getIdTag() + ">";
    start_docTag = tmpDocTag.toCharArray();
    start_docTagLength = start_docTag.length;
    start_docnoTag = tmpDocnoTag.toCharArray();
    start_docnoTagLength = start_docnoTag.length;
    end_docTag = tmpEndDocTag;
View Full Code Here

    } catch (UnsupportedEncodingException uee) {
      //logger.warn("Desired encoding ("+charset+") unsupported. Resorting to platform default.", uee);
      this.br = new BufferedReader(new InputStreamReader(docStream));
    }
    this.properties = docProperties; 
    this._tags = new TagSet(TagSet.TREC_DOC_TAGS);
    this._exact = new TagSet(TagSet.TREC_EXACT_DOC_TAGS);
    this._fields = new TagSet(TagSet.FIELD_TAGS);
    this.tokeniser = _tokeniser;
    this.currentTokenStream = Tokeniser.EMPTY_STREAM;
    for(int i=0;i<abstractCount;i++)
    {
      abstracts[i] = new StringBuilder(abstractlengths[i]);
View Full Code Here

   */
  public TaggedDocument(Reader docReader, Map<String, String> docProperties, Tokeniser _tokeniser)
  {
    this.br = docReader;
    properties = docProperties; 
    this._tags = new TagSet(TagSet.TREC_DOC_TAGS);
    this._exact = new TagSet(TagSet.TREC_EXACT_DOC_TAGS);
    this._fields = new TagSet(TagSet.FIELD_TAGS);
    this.tokeniser = _tokeniser;
    this.currentTokenStream = Tokeniser.EMPTY_STREAM;
  }
View Full Code Here

   */
  public TRECFullTokenizer() {
    inTagToProcess = false;
    inTagToSkip = false;
    inDocnoTag = false;
    tagSet = new TagSet(TagSet.TREC_DOC_TAGS);
    exactTagSet = new TagSet(TagSet.TREC_EXACT_DOC_TAGS);
    EOD = false;
    EOF = false;
  }
View Full Code Here

  public TRECFullTokenizer(BufferedReader _br) {
    inTagToProcess = false;
    inTagToSkip = false;
    inDocnoTag = false;
    this.br = _br;
    tagSet = new TagSet(TagSet.TREC_DOC_TAGS);
    exactTagSet = new TagSet(TagSet.TREC_EXACT_DOC_TAGS);
    EOD = false;
    EOF = false;
  }
View Full Code Here

TOP

Related Classes of org.terrier.utility.TagSet

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.