System.err.println("Reading from: " + path.getPath());
try {
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(path), "UTF-8"));
TokenizerFactory<CoreLabel> tf = ArabicTokenizer.factory();
tf.setOptions(tokOptions);
Mapper lexMapper = new DefaultLexicalMapper();
lexMapper.setup(null, "StripSegMarkersInUTF8", "StripMorphMarkersInUTF8");
int lineId = 0;
for(String line; (line = br.readLine()) != null; lineId++) {
line = line.trim();
// Tokenize with the tokenizer
List<CoreLabel> tokenizedLine = tf.getTokenizer(new StringReader(line)).tokenize();
System.out.println(Sentence.listToString(tokenizedLine));
// Tokenize with the mapper
StringBuilder sb = new StringBuilder();
String[] toks = line.split("\\s+");
for (String tok : toks) {
String mappedTok = lexMapper.map(null, tok);
sb.append(mappedTok).append(" ");
}
List<String> mappedToks = Arrays.asList(sb.toString().trim().split("\\s+"));
// Evaluate the output