/**
*
*/
package cc.mallet.pipe;
import cc.mallet.types.Alphabet;
import cc.mallet.types.FeatureSequence;
import cc.mallet.types.Instance;
import cc.mallet.types.TokenSequence;
/**
* @author lmyao
* Convert Feature sequence
*/
public class FeatureSequenceConvolution extends Pipe {
/**
*
*/
public FeatureSequenceConvolution() {
// TODO Auto-generated constructor stub
super(new Alphabet(), null);
}
/**
* construct word co-occurrence features from the original sequence
* do combinatoric, n choose 2, can be extended to n choose 3
public void convolution() {
int fi = -1;
int pre = -1;
int i,j;
int curLen = length;
for(i = 0; i < curLen-1; i++) {
for(j = i + 1; j < curLen; j++) {
pre = features[i];
fi = features[j];
Object preO = dictionary.lookupObject(pre);
Object curO = dictionary.lookupObject(fi);
Object coO = preO.toString() + "_" + curO.toString();
add(coO);
}
}
}*/
public Instance pipe (Instance carrier)
{
FeatureSequence fseq = (FeatureSequence) carrier.getData();
FeatureSequence ret =
new FeatureSequence ((Alphabet)getDataAlphabet());
int i,j, curLen;
curLen=fseq.getLength();
//first add fseq to ret
for(i = 0; i < curLen; i++) {
ret.add(fseq.getObjectAtPosition(i));
}
//second word co-occurrence
int pre, cur;
Object coO;
for(i = 0; i < curLen-1; i++) {
for(j = i + 1; j < curLen; j++) {
pre = fseq.getIndexAtPosition(i);
cur = fseq.getIndexAtPosition(j);
coO = pre + "_" + cur;
ret.add(coO);
}
}
if(carrier.isLocked()) {
carrier.unLock();
}
carrier.setData(ret);
return carrier;
}
}