Package edu.smu.tspell.wordnet.impl

Examples of edu.smu.tspell.wordnet.impl.ExampleSentences


    int wordCount = Integer.parseInt(tokenizer.nextToken(), HEX);
    senseKeys = new SenseKey[wordCount];
    SampleIndexFactory indexFactory = SampleIndexFactory.getInstance();
    SampleTemplateFactory templateFactory =
        SampleTemplateFactory.getInstance();
    ExampleSentences sentences = new ExampleSentences();

    WordPositions positions = null;
    for (int i = 0; i < wordCount; i++)
    {
      //  Word form (e.g., "WordNet").
      wordForm = TextTranslator.translateToExternalFormat(
          tokenizer.nextToken());
      markerStart = wordForm.indexOf(MARKER_START);
      if (markerStart != -1)
      {
        markerEnd = wordForm.indexOf(MARKER_END, markerStart);
        if (markerEnd == -1)
        {
          throw new ParseException("Marker start embedded in form " +
              "'" + wordForm + "' but no marker end text found.");
        }
        position = wordForm.substring(markerStart + 1, markerEnd);
        wordForm = wordForm.substring(0, markerStart);
        if (positions == null)
        {
          positions = new WordPositions();
        }
        positions.setPosition(wordForm, position);
      }
      //  Lexical ID (e.g., "0")
      lexicalID = Integer.parseInt(tokenizer.nextToken(), HEX);
      senseKeys[i] = new SenseKey(wordForm, synsetType, lexicalFile,
          lexicalID);
      keyText = indexFactory.getSampleKeys(senseKeys[i]);
      templates = new String[keyText.length];
      for (int j = 0; j < keyText.length; j++)
      {
        templates[j] = templateFactory.getSample(
            synsetType, keyText[j]);
      }
      sentences.setTemplates(senseKeys[i].getLemma(), templates);
    }

    //  Pointer count
    int pointerCount = Integer.parseInt(tokenizer.nextToken());
    RelationshipPointers pointers = new RelationshipPointers();
    for (int i = 0; i < pointerCount; i++)
    {
      //  Pointer symbol (e.g., "@i").
      relationship = RelationshipType.getRelationshipType(
          tokenizer.nextToken());
      //  Synset offset (e.g., "06550617").
      targetOffset = Integer.parseInt(tokenizer.nextToken());
      //  Part of speech / synset type
      typeCode = tokenizer.nextToken().charAt(0);
      type = SynsetTypeConverter.getType(typeCode);
      //  Source / target words
      sourceTarget = tokenizer.nextToken();
      sourceWord = Integer.parseInt(sourceTarget.substring(0, 2), HEX);
      targetWord = Integer.parseInt(sourceTarget.substring(2, 4), HEX);
      //  If source and target are both zero, add a semantic relationship
      if ((sourceWord == 0) && (targetWord == 0))
      {
        pointer = new SynsetPointer(type, targetOffset);
        pointers.addSemanticRelationship(relationship, pointer);
      }
      //  Otherwise it must be a lexical relationship
      else
      {
        wordForm = senseKeys[sourceWord - 1].getLemma();
        sensePointer = new WordSensePointer(
            type, targetOffset, targetWord);
        pointers.addLexicalRelationship(
            wordForm, relationship, sensePointer);
      }
    }

    nextToken = tokenizer.nextToken();
    //  If we didn't get the frame terminator, there must be frame numbers
    if (!(nextToken.equals(FRAME_TERMINATOR)))
    {
      int frameCount = Integer.parseInt(nextToken);
      SampleFrameFactory factory = SampleFrameFactory.getInstance();
      //  Loop through the list of frame entries
      for (int i = 0; i < frameCount; i++)
      {
        //  Get the header character ("+")
        nextToken = tokenizer.nextToken();
        if (!(nextToken.equals(FRAME_HEADER)))
        {
          throw new ParseException("Expected frame header " +
              "text '" + FRAME_HEADER + "' but found '" +
              nextToken + "' instead: " + data);
        }
        //  Get the frame number and resolve it to frame text
        nextToken = tokenizer.nextToken();
        frameText = factory.getSample(synsetType, nextToken);
        //  Get index of word that frame is associated with
        sourceWord = Integer.parseInt(tokenizer.nextToken(), HEX);
        //  If word number is zero, frame is for the entire synset
        if (sourceWord == 0)
        {
          sentences.addCommonFrame(frameText);
        }
        //  Frame applies only to a particular word
        else
        {
          wordForm = senseKeys[sourceWord - 1].getLemma();
          sentences.addFrame(frameText, wordForm);
        }
      }
      //  We should be finished with frames now; get the frame terminator
      nextToken = tokenizer.nextToken();
    }
View Full Code Here

TOP

Related Classes of edu.smu.tspell.wordnet.impl.ExampleSentences

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.