int wordCount = Integer.parseInt(tokenizer.nextToken(), HEX);
senseKeys = new SenseKey[wordCount];
SampleIndexFactory indexFactory = SampleIndexFactory.getInstance();
SampleTemplateFactory templateFactory =
SampleTemplateFactory.getInstance();
ExampleSentences sentences = new ExampleSentences();
WordPositions positions = null;
for (int i = 0; i < wordCount; i++)
{
// Word form (e.g., "WordNet").
wordForm = TextTranslator.translateToExternalFormat(
tokenizer.nextToken());
markerStart = wordForm.indexOf(MARKER_START);
if (markerStart != -1)
{
markerEnd = wordForm.indexOf(MARKER_END, markerStart);
if (markerEnd == -1)
{
throw new ParseException("Marker start embedded in form " +
"'" + wordForm + "' but no marker end text found.");
}
position = wordForm.substring(markerStart + 1, markerEnd);
wordForm = wordForm.substring(0, markerStart);
if (positions == null)
{
positions = new WordPositions();
}
positions.setPosition(wordForm, position);
}
// Lexical ID (e.g., "0")
lexicalID = Integer.parseInt(tokenizer.nextToken(), HEX);
senseKeys[i] = new SenseKey(wordForm, synsetType, lexicalFile,
lexicalID);
keyText = indexFactory.getSampleKeys(senseKeys[i]);
templates = new String[keyText.length];
for (int j = 0; j < keyText.length; j++)
{
templates[j] = templateFactory.getSample(
synsetType, keyText[j]);
}
sentences.setTemplates(senseKeys[i].getLemma(), templates);
}
// Pointer count
int pointerCount = Integer.parseInt(tokenizer.nextToken());
RelationshipPointers pointers = new RelationshipPointers();
for (int i = 0; i < pointerCount; i++)
{
// Pointer symbol (e.g., "@i").
relationship = RelationshipType.getRelationshipType(
tokenizer.nextToken());
// Synset offset (e.g., "06550617").
targetOffset = Integer.parseInt(tokenizer.nextToken());
// Part of speech / synset type
typeCode = tokenizer.nextToken().charAt(0);
type = SynsetTypeConverter.getType(typeCode);
// Source / target words
sourceTarget = tokenizer.nextToken();
sourceWord = Integer.parseInt(sourceTarget.substring(0, 2), HEX);
targetWord = Integer.parseInt(sourceTarget.substring(2, 4), HEX);
// If source and target are both zero, add a semantic relationship
if ((sourceWord == 0) && (targetWord == 0))
{
pointer = new SynsetPointer(type, targetOffset);
pointers.addSemanticRelationship(relationship, pointer);
}
// Otherwise it must be a lexical relationship
else
{
wordForm = senseKeys[sourceWord - 1].getLemma();
sensePointer = new WordSensePointer(
type, targetOffset, targetWord);
pointers.addLexicalRelationship(
wordForm, relationship, sensePointer);
}
}
nextToken = tokenizer.nextToken();
// If we didn't get the frame terminator, there must be frame numbers
if (!(nextToken.equals(FRAME_TERMINATOR)))
{
int frameCount = Integer.parseInt(nextToken);
SampleFrameFactory factory = SampleFrameFactory.getInstance();
// Loop through the list of frame entries
for (int i = 0; i < frameCount; i++)
{
// Get the header character ("+")
nextToken = tokenizer.nextToken();
if (!(nextToken.equals(FRAME_HEADER)))
{
throw new ParseException("Expected frame header " +
"text '" + FRAME_HEADER + "' but found '" +
nextToken + "' instead: " + data);
}
// Get the frame number and resolve it to frame text
nextToken = tokenizer.nextToken();
frameText = factory.getSample(synsetType, nextToken);
// Get index of word that frame is associated with
sourceWord = Integer.parseInt(tokenizer.nextToken(), HEX);
// If word number is zero, frame is for the entire synset
if (sourceWord == 0)
{
sentences.addCommonFrame(frameText);
}
// Frame applies only to a particular word
else
{
wordForm = senseKeys[sourceWord - 1].getLemma();
sentences.addFrame(frameText, wordForm);
}
}
// We should be finished with frames now; get the frame terminator
nextToken = tokenizer.nextToken();
}