package edu.umd.hooka;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ShortBuffer;
import java.util.ArrayList;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
public class Alignment implements org.apache.hadoop.io.Writable,
java.lang.Iterable<Alignment.IntPair>,
Cloneable {
public class IntPair {
public int f;
public int e;
public IntPair(int f,int e) { this.f = f; this.e = e; }
public String toString() {
return f + "-" + e;
}
}
public class AIterator implements java.util.Iterator<IntPair> {
int cur;
boolean[] d;
int w;
protected AIterator(Alignment a) {
this.d = a._aligned._data;
this.w = a._aligned._w;
this.cur = 0;
advance();
}
protected void advance() {
while (cur < d.length && !d[cur]) { cur++; }
}
public boolean hasNext() {
return cur < d.length;
}
public IntPair next() {
IntPair res = new IntPair(cur % w, cur / w);
cur++; advance();
return res;
}
public void remove() {
return;
}
}
protected final static class M2 implements Cloneable{
public short _w;
public boolean[] _data;
public Object clone() {
M2 res = new M2();
res._data = _data.clone();
res._w = _w;
return res;
}
public M2() { _w =0; _data = null; }
public M2(int f, int e) {
//System.err.println("x:"+x+"y:"+y);
_data = new boolean[f*e];
_w = (short)f;
}
void eraseFirstEWord() {
boolean[] nd = new boolean[_data.length - _w];
System.arraycopy(nd, 0, _data, 0, _data.length-_w);
_data = nd;
}
boolean equals(M2 other) {
if (other._w != _w) { return false; }
return java.util.Arrays.equals(_data, other._data);
}
public boolean get(int f,int e)
{
return _data[_w*e + f];
}
public void set(int f, int e)
{
//System.out.println("Set("+x+", "+y+")");
try {
_data[_w*e + f] = true;
} catch (ArrayIndexOutOfBoundsException ee) {
throw new RuntimeException("Set(" + f + ", " + e + "): caught " + ee);
}
}
public void reset(int f, int e)
{
_data[_w*e + f] = false;
}
public void readFields(DataInput in) throws IOException {
_w = in.readShort();
int size = in.readChar();
if (size < 1)
throw new RuntimeException("Error: " + size + " is not good for alignment!");
_data = new boolean[size];
int bbLen = in.readInt();
short[] faps = new short[bbLen/2];
short[] eaps = new short[bbLen/2];
ByteBuffer bb=ByteBuffer.allocate(bbLen);
in.readFully(bb.array());
ShortBuffer sb = bb.asShortBuffer();
sb.get(faps);
bb.clear();
in.readFully(bb.array());
sb = bb.asShortBuffer();
sb.get(eaps);
for (int i = 0; i<faps.length; i++) {
set(faps[i], eaps[i]);
}
}
public void write(DataOutput out) throws IOException {
out.writeShort(_w);
out.writeShort((short)_data.length);
int c = 0;
for (int i=0; i< _data.length; i++)
if (_data[i]) c++;
short[] faps = new short[c];
short[] eaps = new short[c];
c = 0;
for (int i=0; i< _data.length; i++)
if (_data[i]) {
faps[c] = (short)(i % _w);
eaps[c] = (short)(i / _w);
c++;
}
int bbLen = faps.length * 2;
out.writeInt(bbLen);
ByteBuffer bb=ByteBuffer.allocate(bbLen);
ShortBuffer sb = bb.asShortBuffer();
sb.put(faps);
out.write(bb.array());
sb.clear();
sb.put(eaps);
out.write(bb.array());
}
}
protected short _elen;
protected short _flen;
boolean[] faligned;
boolean[] ealigned;
M2 _aligned;
static Pattern eline_re = Pattern.compile("([^\\s]+)\\s+\\(\\{\\s+((?:\\d+\\s+)*)\\}\\)");
public static final int[][] DIAG_NEIGHBORS = //{{-1,-1},{0,-1},{1,-1},{-1,0},{1,0},{-1,1},{0,1},{1,1}};
{{0,-1},{-1,0},{1,0},{0,1},{1,1},{-1,-1},{1,-1},{-1,1}};
public static final int[][] NEIGHBORS = {{0,-1},{-1,0},{1,0},{0,1}};
public static final int[][][] LNEIGHBORS = {{{ 0,+1},{+1,+1},{+1, 0}},
{{+1, 0},{+1,-1},{ 0,-1}},
{{ 0,-1},{-1,-1},{-1, 0}},
{{-1, 0},{-1,+1},{ 0,+1}}};
public int countAlignmentPoints() {
int count = 0;
for (int j=0; j < _flen; j++)
for (int i = 0; i < _elen; i++)
if (this.aligned(j, i))
count += 1;
return count;
}
public Alignment mergeEnglishWords(int i, int j) {
if (i+1 != j)
throw new IllegalArgumentException("mergeEnglishWords can only combine adjacent positions! " + i + "," + j);
Alignment res = new Alignment(_flen, _elen - 1);
for (int a = 0; a < _flen; a++)
for (int b = 0; b < _elen; b++)
if (this.aligned(a, b)) {
int ee = b;
if (b > i) ee--;
res.align(a, ee);
}
return res;
}
public Alignment splitEnglishWords(int i) {
Alignment res = new Alignment(_flen, _elen + 1);
for (int a = 0; a < _flen; a++)
for (int b = 0; b < _elen; b++)
if (this.aligned(a, b)) {
int ee = b;
if (b == i)
res.align(a, i);
if (b >= i)
ee++;
res.align(a, ee);
}
return res;
}
public Alignment splitForeignWords(int j) {
Alignment res = new Alignment(_flen + 1, _elen);
for (int a = 0; a < _flen; a++)
for (int b = 0; b < _elen; b++)
if (this.aligned(a, b)) {
int ee = a;
if (a == j)
res.align(j, b);
if (a >= j)
ee++;
res.align(ee, b);
}
return res;
}
public void readFields(DataInput in) throws IOException {
if (_aligned == null)
_aligned = new M2();
_aligned.readFields(in);
_flen = _aligned._w;
_elen = (short)(_aligned._data.length / _flen);
faligned = new boolean[_flen];
ealigned = new boolean[_elen];
for (int f=0; f<_flen; f++)
for (int e=0; e<_elen; e++)
if (aligned(f,e)) {
faligned[f]=true;
ealigned[e]=true;
}
}
public byte getType() {
return 1;
}
public Object clone() {
Alignment res = new Alignment();
res._aligned = (M2)_aligned.clone();
res._elen = _elen;
res._flen = _flen;
res.ealigned = ealigned.clone();
res.faligned = faligned.clone();
return res;
}
public void write(DataOutput out) throws IOException {
_aligned.write(out);
}
public boolean equals(Object o) {
if (!(o instanceof Alignment)) { return false; }
return _aligned.equals(((Alignment)o)._aligned);
}
public boolean neighborAligned(int i, int j)
{
return countNeighbors(i, j, DIAG_NEIGHBORS) > 0;
}
public boolean lneighborAligned(int i, int j)
{
for (int x=0;x<LNEIGHBORS.length;x++) {
if (countNeighbors(i, j, LNEIGHBORS[x]) >= 2)
return true;
}
return false;
}
public java.util.Iterator<Alignment.IntPair> iterator() {
return new AIterator(this);
}
public final int countNeighbors(int f, int e, int[][] rels)
{
int res = 0;
for (int x=0; x<rels.length; x++) {
int cf = f + rels[x][0];
int ce = e + rels[x][1];
if (cf >= 0 && cf < _flen &&
ce >= 0 && ce < _elen && aligned(cf, ce)) {
res++; }
}
return res;
}
public final boolean rookAligned(int i, int j)
{
return faligned[i] || ealigned[j];
}
public final boolean doubleRookAligned(int i, int j)
{
return faligned[i] && ealigned[j];
}
public final int getELength()
{
return _elen;
}
public final int getFLength()
{
return _flen;
}
public Alignment()
{
_elen = 0;
_flen = 0;
_aligned = null;
}
public Alignment(int flen, int elen)
{
_elen = (short)(elen);
_flen = (short)(flen);
alloc();
}
public Alignment(int flen, int elen, String pa) {
_elen = (short)elen;
_flen = (short)flen;
alloc();
if (pa == null || pa.length() == 0) return;
String[] aps = pa.split("\\s+");
for (String ap : aps) {
String[] pair = ap.split("-");
if (pair.length != 2)
throw new IllegalArgumentException("Malformed alignment string: " + pa);
int f = Integer.parseInt(pair[0]);
int e = Integer.parseInt(pair[1]);
if (f >= _flen || e >= _elen)
throw new IndexOutOfBoundsException("out of bounds: " + f + "," + e);
align(f, e);
}
}
private void alloc()
{
faligned = new boolean[_flen];
ealigned = new boolean[_elen];
_aligned = new M2(_flen,_elen);
}
public final boolean aligned(int f, int e)
{
return _aligned.get(f,e);
}
public final void align(int f, int e)
{
_aligned.set(f,e);
faligned[f] = true;
ealigned[e] = true;
}
public final boolean isEAligned(int e) {
return ealigned[e];
}
public final boolean isFAligned(int f) {
return faligned[f];
}
public final void unalignF(int f) {
faligned[f] = false;
for (int i=0; i<_elen; i++)
_aligned.reset(f, i);
}
public final void unalignE(int e) {
ealigned[e] = false;
for (int i=0; i<_flen; i++)
_aligned.reset(i, e);
}
public static Alignment fromGiza(String eline, String fline, boolean transpose) {
Matcher es = eline_re.matcher(fline);
es.find();
boolean skipNull = false;
if (es.group(1).equals("NULL")) {
skipNull = true;
} else {
es.reset();
}
ArrayList<String> afwords = new ArrayList<String>();
while (es.find()) {
// System.out.format("Str: %s aligns: '%s'\n", es.group(1), es.group(2));
afwords.add(es.group(1));
}
String[] ewords = eline.split("\\s+");
Alignment al = null;
if (transpose) {
al = new Alignment(ewords.length, afwords.size());
} else {
al = new Alignment(afwords.size(), ewords.length);
}
es.reset();
if (skipNull) { es.find(); }
int i = 0;
while (es.find()) {
String saligns = es.group(2);
if (!saligns.matches("^\\s*$")) {
String[] aligns = saligns.split("\\s+");
for (int k=0; k<aligns.length; k++)
{
int j = Integer.parseInt(aligns[k]) - 1;
if (transpose)
al.align(j, i);
else
al.align(i, j);
}
}
i++;
}
return al;
}
public Alignment getTranspose() {
Alignment res = new Alignment(_elen, _flen);
for (int ei=0; ei<_elen; ei++)
for (int fi=0; fi<_flen; fi++)
if (aligned(fi, ei)) res.align(ei, fi);
return res;
}
public String toStringVisual() {
StringBuffer sb = new StringBuffer();
sb.append(' ');
for (int j=0; j<_flen; j++)
sb.append(j % 10);
sb.append('\n');
for (int i=0; i<_elen; i++) {
sb.append(i % 10);
for (int j=0; j<_flen; j++) {
if (aligned(j,i))
sb.append('*');
else
sb.append('.');
}
sb.append('\n');
}
return sb.toString();
}
public String toString()
{
StringBuffer sb = new StringBuffer();
for (int i=0; i<_flen; i++)
for (int j=0; j<_elen; j++)
if (aligned(i, j))
sb.append(i).append('-').append(j).append(' ');
if (sb.length() > 0)
sb.delete(sb.length()-1, sb.length());
return sb.toString();
}
public static Alignment intersect(Alignment a1, Alignment a2)
{
Alignment a = new Alignment(a1._flen, a1._elen);
for (int i=0; i<a1._flen; i++)
for (int j=0; j<a1._elen; j++)
if (a1.aligned(i, j) &&
a2.aligned(i, j))
a.align(i,j);
return a;
}
public static Alignment union(Alignment a1, Alignment a2)
{
Alignment a = new Alignment(a1._flen, a1._elen);
for (int i=0; i<a1._flen; i++)
for (int j=0; j<a1._elen; j++)
if (a1.aligned(i, j) ||
a2.aligned(i, j))
a.align(i,j);
return a;
}
}