String stem = mCandidate.getExp();
int stemLen = stem.length();
String preStem = stem.substring(0, stemLen - 1);
char lastCh = stem.charAt(stemLen - 1), preLastCh = 0, mo = 0;
Hangul lastHg = Hangul.split(lastCh), preLastHg = null;
if( stemLen > 1 ) {
preLastCh = stem.charAt(stemLen - 2);
preLastHg = Hangul.split(preLastCh);
} else {
preLastCh = 0;
}
String exp = null;
MCandidate mCandidateClone = null;
// TODO
// 사 주다 -> 사+아+주+다 와 같이 한글자 어간 'ㅏ'로 끝나는 말
// 2007-07-06 너무 많은 후보군들이 생성되버려서 문제 생김
// 많이 사용되는 것만 따로 사전에 추가하도록 함
// 2009-10-17 일단 넣어줌.
if( stem.length() == 1 && !lastHg.hasJong() && lastHg.cho != 'ㅎ' ) {
exp = stem;
if( lastHg.jung == 'ㅏ' ) {
mCandidateClone = mCandidate.copy();
mCandidateClone.add(new Morpheme("아", POSTag.ECS));
mCandidateClone.setExp(exp);
mCandidateClone.setAutoExtd(true);
mCandidateClone.clearHavingCondition();
mCandidateClone.initHavingCond(exp);
mCandidateClone.addHavingCond(Condition.AH);
mCandidateClone.setRealDicLen((byte) exp.length());
ret.add(mCandidateClone);
} else if( lastHg.jung == 'ㅓ' ) {
mCandidateClone = mCandidate.copy();
mCandidateClone.add(new Morpheme("어", POSTag.ECS));
mCandidateClone.setExp(exp);
mCandidateClone.setAutoExtd(true);
mCandidateClone.clearHavingCondition();
mCandidateClone.initHavingCond(exp);
mCandidateClone.addHavingCond(Condition.AH);
mCandidateClone.setRealDicLen((byte) exp.length());
ret.add(mCandidateClone);
}
}
// 겹모음 'ㄶ'의 경우 'ㅎ'을 빼먹고 사용하는 경우가 많으므로 이를 처리해줌
if( lastCh == '찮' || lastCh == '잖' ) {
mCandidateClone = mCandidate.copy();
exp = preStem + Hangul.combine(lastHg.cho, lastHg.jung, 'ㄴ');
mCandidateClone.setExp(exp);
mCandidateClone.setRealDicLen((byte)exp.length());
mCandidateClone.decreaseNumOfPrfrdCond();
mCandidateClone.setAutoExtd(true);
mCandidateClone.setExp(exp);
ret.add(mCandidateClone);
}
// 과거형 붙여주기
if( lastCh == '하' ) {
// 했 -> 하였
mCandidateClone = mCandidate.copy();
exp = preStem + "했";
mCandidateClone.add(new Morpheme("였", POSTag.EPT));
mCandidateClone.setExp(exp);
mCandidateClone.setAutoExtd(true);
mCandidateClone.clearHavingCondition();
mCandidateClone.initHavingCond(exp);
mCandidateClone.addHavingCond(Condition.EUT);
mCandidateClone.setCandDicLen((byte)exp.length());
ret.add(mCandidateClone);
// 해 -> 하여
mCandidateClone = mCandidate.copy();
exp = preStem + "해";
mCandidateClone.add(new Morpheme("여", POSTag.ECS));
mCandidateClone.setExp(exp);
mCandidateClone.setAutoExtd(true);
mCandidateClone.clearHavingCondition();
mCandidateClone.initHavingCond(exp);
mCandidateClone.addHavingCond(Condition.AH);
mCandidateClone.setRealDicLen((byte)exp.length());
ret.add(mCandidateClone);
// 종결형
mCandidateClone = mCandidate.copy();
exp = preStem + "해";
mCandidateClone.add(new Morpheme("여", POSTag.EFN));
mCandidateClone.setExp(exp);
mCandidateClone.setAutoExtd(true);
mCandidateClone.clearHavingCondition();
mCandidateClone.initHavingCond(exp);
mCandidateClone.addHavingCond(Condition.AH);
mCandidateClone.setRealDicLen((byte)exp.length());
ret.add(mCandidateClone);
// 형용사는 하지 -> 치 로 줄여질 수 있다.
if( mCandidate.isTagOf(POSTag.VA | POSTag.VXA) ) {
mCandidateClone = mCandidate.copy();
exp = preStem + "치";
mCandidateClone.add(new Morpheme("지", POSTag.ECS));
mCandidateClone.setExp(exp);
mCandidateClone.setAutoExtd(true);
mCandidateClone.clearHavingCondition();
mCandidateClone.initHavingCond(exp);
mCandidateClone.setRealDicLen((byte) exp.length());
ret.add(mCandidateClone);
}
}
// '이'로 끝나는 말
else if( !lastHg.hasJong() && lastHg.jung == 'ㅣ' ) {
// ㅣ -> ㅣ었->ㅕㅆ
mCandidateClone = mCandidate.copy();
exp = preStem + Hangul.combine(lastHg.cho, 'ㅕ', 'ㅆ');
mCandidateClone.add(new Morpheme("었", POSTag.EPT));
mCandidateClone.setExp(exp);
mCandidateClone.setAutoExtd(true);
mCandidateClone.clearHavingCondition();
mCandidateClone.initHavingCond(exp);
mCandidateClone.addHavingCond(Condition.EUT);
mCandidateClone.setCandDicLen((byte)exp.length());
ret.add(mCandidateClone);
// ㅣ -> ㅣ어->ㅕ
mCandidateClone = mCandidate.copy();
exp = preStem + Hangul.combine(lastHg.cho, 'ㅕ', ' ');
mCandidateClone.add(new Morpheme("어", POSTag.ECS));
mCandidateClone.setExp(exp);
mCandidateClone.setAutoExtd(true);
mCandidateClone.clearHavingCondition();
mCandidateClone.initHavingCond(exp);
mCandidateClone.addHavingCond(Condition.MOEUM | Condition.EUMSEONG | Condition.AH);
mCandidateClone.setRealDicLen((byte)exp.length());
ret.add(mCandidateClone);
}
// ㅆ, ㅏㅆ, ㅐㅆ, ㅕㅆ 결합에 의한 어간 출력
else if( !lastHg.hasJong() && MO_SET1.contains(lastHg.jung) ) {
mCandidateClone = mCandidate.copy();
exp = preStem + Hangul.combine(lastHg.cho, lastHg.jung, 'ㅆ');
mCandidateClone.add(new Morpheme("었", POSTag.EPT));
mCandidateClone.setExp(exp);
mCandidateClone.setAutoExtd(true);
mCandidateClone.clearHavingCondition();
mCandidateClone.initHavingCond(exp);
mCandidateClone.addHavingCond(Condition.EUT);
mCandidateClone.setCandDicLen((byte)exp.length());
ret.add(mCandidateClone);
}
// '르'불규칙
else if( lastCh == '르' ) {
// 았
mCandidateClone = mCandidate.copy();
mCandidateClone.clearHavingCondition();
if( preLastCh == '따' ) {
exp = preStem + "랐";
mCandidateClone.add(new Morpheme("았", POSTag.EPT));
mCandidateClone.addHavingCond(Condition.EUT);
} else if( preLastCh == '푸' ) {
exp = stem + "렀";
mCandidateClone.add(new Morpheme("었", POSTag.EPT));
mCandidateClone.addHavingCond(Condition.EUT);
} else {
mo = getMoeum(lastHg, preLastHg);
exp = stem.substring(0, stemLen - 2)
+ Hangul.combine(preLastHg.cho, preLastHg.jung, 'ㄹ')
+ Hangul.combine(lastHg.cho, mo, 'ㅆ');
if( mo == 'ㅏ' ) {
mCandidateClone.add(new Morpheme("았", POSTag.EPT));
} else {
mCandidateClone.add(new Morpheme("었", POSTag.EPT));
}
mCandidateClone.addHavingCond(Condition.EUT);
}
mCandidateClone.setExp(exp);
mCandidateClone.setAutoExtd(true);
mCandidateClone.initHavingCond(exp);
mCandidateClone.setCandDicLen((byte)exp.length());
ret.add(mCandidateClone);
// 아
mCandidateClone = mCandidate.copy();
mCandidateClone.clearHavingCondition();
if( preLastCh == '따' ) {
exp = preStem + "라";
mCandidateClone.add(new Morpheme("아", POSTag.ECS));
mCandidateClone.addHavingCond(Condition.AH);
} else if( preLastCh == '푸' ) {
exp = stem + "러";
mCandidateClone.add(new Morpheme("어", POSTag.ECS));
mCandidateClone.addHavingCond(Condition.AH);
} else {
mo = getMoeum(lastHg, preLastHg);
exp = stem.substring(0, stemLen - 2)
+ Hangul.combine(preLastHg.cho, preLastHg.jung, 'ㄹ')
+ Hangul.combine(lastHg.cho, mo, ' ');
if( mo == 'ㅏ' ) {
mCandidateClone.add(new Morpheme("아", POSTag.ECS));
mCandidateClone.addHavingCond(Condition.AH);
} else {
mCandidateClone.add(new Morpheme("어", POSTag.ECS));
mCandidateClone.addHavingCond(Condition.AH);
}
}
mCandidateClone.setExp(exp);
mCandidateClone.setAutoExtd(true);
mCandidateClone.initHavingCond(exp);
mCandidateClone.setRealDicLen((byte)exp.length());
ret.add(mCandidateClone);
}
// 'ㅡ' 결합에 의한 어간 출력
else if( !lastHg.hasJong() && lastHg.jung == 'ㅡ' ) {
// 양성으로 한번 결합
mo = getMoeum(lastHg, preLastHg);
mCandidateClone = mCandidate.copy();
mCandidateClone.clearHavingCondition();
exp = preStem + Hangul.combine(lastHg.cho, mo, 'ㅆ');
if( mo == 'ㅏ' ) {
mCandidateClone.add(new Morpheme("았", POSTag.EPT));
mCandidateClone.addHavingCond(Condition.EUT);
} else {
mCandidateClone.add(new Morpheme("었", POSTag.EPT));
mCandidateClone.addHavingCond(Condition.EUT);
}
mCandidateClone.setExp(exp);
mCandidateClone.setAutoExtd(true);
mCandidateClone.initHavingCond(exp);
mCandidateClone.setCandDicLen((byte)exp.length());
ret.add(mCandidateClone);
// ㅓ, ㅏ
mCandidateClone = mCandidate.copy();
mCandidateClone.clearHavingCondition();
exp = preStem + Hangul.combine(lastHg.cho, mo, ' ');
if( mo == 'ㅏ' ) {
mCandidateClone.add(new Morpheme("아", POSTag.ECS));
mCandidateClone.addHavingCond(Condition.AH);
} else {
mCandidateClone.add(new Morpheme("어", POSTag.ECS));
mCandidateClone.addHavingCond(Condition.AH);
}
mCandidateClone.setExp(exp);
mCandidateClone.setAutoExtd(true);
mCandidateClone.initHavingCond(exp);
mCandidateClone.setRealDicLen((byte)exp.length());
ret.add(mCandidateClone);
}
// ㅜ, ㅗ결합에 의한 어간 출력
else if( !lastHg.hasJong() && MO_SET2.contains(lastHg.jung) ) {
// 었, 았
mCandidateClone = mCandidate.copy();
exp = preStem + Hangul.combine(lastHg.cho, getMoeum(lastHg, preLastHg), 'ㅆ');
if( lastHg.jung == 'ㅜ' ) {
mCandidateClone.add(new Morpheme("었", POSTag.EPT));
} else {
mCandidateClone.add(new Morpheme("았", POSTag.EPT));
}
mCandidateClone.setExp(exp);
mCandidateClone.setAutoExtd(true);
mCandidateClone.clearHavingCondition();
mCandidateClone.initHavingCond(exp);
mCandidateClone.addHavingCond(Condition.EUT);
mCandidateClone.setCandDicLen((byte)exp.length());
ret.add(mCandidateClone);
// 어, 아
mCandidateClone = mCandidate.copy();
exp = preStem + Hangul.combine(lastHg.cho, getMoeum(lastHg, preLastHg), ' ');
if( lastHg.jung == 'ㅜ' ) {
mCandidateClone.add(new Morpheme("어", POSTag.ECS));
} else {
mCandidateClone.add(new Morpheme("아", POSTag.ECS));
}
mCandidateClone.setExp(exp);
mCandidateClone.setAutoExtd(true);
mCandidateClone.clearHavingCondition();
mCandidateClone.initHavingCond(exp);
mCandidateClone.addHavingCond(Condition.AH);
mCandidateClone.setRealDicLen((byte)exp.length());
ret.add(mCandidateClone);
}
// 겹모음 처리
else if( !lastHg.hasJong() && lastHg.jung != 'ㅚ' ) {
// 'ㅓ' 결합
mCandidateClone = mCandidate.copy();
exp = preStem + Hangul.combine(lastHg.cho, 'ㅙ', ' ');
mCandidateClone.add(new Morpheme("어", POSTag.ECS));
mCandidateClone.setExp(exp);
mCandidateClone.setAutoExtd(true);
mCandidateClone.clearHavingCondition();
mCandidateClone.initHavingCond(exp);
mCandidateClone.addHavingCond(Condition.AH);
mCandidateClone.setRealDicLen((byte)exp.length());
ret.add(mCandidateClone);
// '었' 결합
mCandidateClone = mCandidate.copy();
exp = preStem + Hangul.combine(lastHg.cho, 'ㅙ', 'ㅆ');
mCandidateClone.add(new Morpheme("었", POSTag.EPT));
mCandidateClone.setExp(exp);
mCandidateClone.setAutoExtd(true);
mCandidateClone.clearHavingCondition();
mCandidateClone.initHavingCond(exp);
mCandidateClone.addHavingCond(Condition.EUT);
mCandidateClone.setCandDicLen((byte)exp.length());
ret.add(mCandidateClone);
}
// ㅂ 불규칙
// ㅂ불규칙 활용하는 어간의 마지막 어절
// '뵙뽑씹업입잡접좁집' 들은 활용 안함~
if( "갑겁겹곱굽깁깝껍꼽납눕답덥돕둡땁떱랍럽렵롭립맙맵밉볍섭쉽습엽줍쭙춥탑".indexOf(lastCh) > -1 ) {
// ㅂ탈락된 음절 생성
char bChar = Hangul.combine(lastHg.cho, lastHg.jung, ' ');
// 럽은 '러운' 뿐만 아니라 짧게 '런' 등으로도 활용됨
if( lastCh == '럽' ) {
mCandidateClone = mCandidate.copy();
exp = preStem + '런';
mCandidateClone.add(new Morpheme("ㄴ", POSTag.ETD));
mCandidateClone.setExp(exp);
mCandidateClone.setAutoExtd(true);
mCandidateClone.clearHavingCondition();
mCandidateClone.initHavingCond(exp);
mCandidateClone.decreaseNumOfPrfrdCond();
mCandidateClone.setRealDicLen((byte)exp.length());
ret.add(mCandidateClone);
}
// 워, 와
mCandidateClone = mCandidate.copy();
if( lastHg.jung == 'ㅗ') {
mo = 'ㅘ';
mCandidateClone.add(new Morpheme("아", POSTag.ECS));
} else {
mo = 'ㅝ';
mCandidateClone.add(new Morpheme("어", POSTag.ECS));
}
exp = preStem + bChar + Hangul.combine('ㅇ', mo, ' ');
mCandidateClone.setExp(exp);
mCandidateClone.setAutoExtd(true);
mCandidateClone.clearHavingCondition();
mCandidateClone.initHavingCond(exp);
mCandidateClone.addHavingCond(Condition.AH);
mCandidateClone.setRealDicLen((byte)exp.length());
ret.add(mCandidateClone);
// 웠, 왔
mCandidateClone = mCandidate.copy();
mCandidateClone.clearHavingCondition();
if( lastHg.jung == 'ㅗ') {
mo = 'ㅘ';
mCandidateClone.add(new Morpheme("았", POSTag.EPT));
} else {
mo = 'ㅝ';
mCandidateClone.add(new Morpheme("었", POSTag.EPT));
}
exp = preStem + bChar + Hangul.combine('ㅇ', mo, 'ㅆ');
mCandidateClone.setExp(exp);
mCandidateClone.setAutoExtd(true);
mCandidateClone.initHavingCond(exp);
mCandidateClone.addHavingCond(Condition.EUT);
mCandidateClone.setCandDicLen((byte)exp.length());
ret.add(mCandidateClone);
// 우
mCandidateClone = mCandidate.copy();
exp = preStem + bChar + '우';
mCandidateClone.setExp(exp);
mCandidateClone.setAutoExtd(true);
mCandidateClone.clearHavingCondition();
mCandidateClone.initHavingCond(exp);
mCandidateClone.setCandDicLen((byte)exp.length());
ret.add(mCandidateClone);
// ㄴ, ㄹ, ㅁ 에 의한 활용
mCandidateClone = mCandidate.copy();
exp = preStem + bChar + '운';
mCandidateClone.add(new Morpheme("ㄴ", POSTag.ETD));
mCandidateClone.setExp(exp);
mCandidateClone.setAutoExtd(true);
mCandidateClone.clearHavingCondition();
mCandidateClone.initHavingCond(exp);
mCandidateClone.setRealDicLen((byte)exp.length());
ret.add(mCandidateClone);
mCandidateClone = mCandidate.copy();
mCandidateClone.add(new Morpheme("ㄹ", POSTag.ETD));
exp = preStem + bChar + '울';
mCandidateClone.setExp(exp);
mCandidateClone.setAutoExtd(true);
mCandidateClone.clearHavingCondition();
mCandidateClone.initHavingCond(exp);
mCandidateClone.setRealDicLen((byte)exp.length());
ret.add(mCandidateClone);
mCandidateClone = mCandidate.copy();
mCandidateClone.add(new Morpheme("ㅁ", POSTag.ETN));
exp = preStem + bChar + '움';
mCandidateClone.setExp(exp);
mCandidateClone.setAutoExtd(true);
mCandidateClone.clearHavingCondition();
mCandidateClone.initHavingCond(exp);
mCandidateClone.setRealDicLen((byte)exp.length());
ret.add(mCandidateClone);
}
// 'ㅅ' 뷸규칙
else if( "젓짓긋낫붓잇".indexOf(lastCh) > -1 )
{
mCandidateClone = mCandidate.copy();
exp = preStem + Hangul.combine(lastHg.cho, lastHg.jung, ' ');
mCandidateClone.setExp(exp);
mCandidateClone.setAutoExtd(true);
mCandidateClone.clearHavingCondition();
mCandidateClone.initHavingCond(exp);
mCandidateClone.addHavingCond(Condition.MINUS_SIOT);
mCandidateClone.decreaseNumOfPrfrdCond();
mCandidateClone.setCandDicLen((byte)exp.length());
ret.add(mCandidateClone);
}
// 'ㄷ' 뷸규칙
else if( lastHg.jong == 'ㄷ' ) {
mCandidateClone = mCandidate.copy();
exp = preStem + Hangul.combine(lastHg.cho, lastHg.jung, 'ㄹ');
mCandidateClone.setExp(exp);
mCandidateClone.setAutoExtd(true);
mCandidateClone.clearHavingCondition();
mCandidateClone.initHavingCond(exp);
mCandidateClone.addHavingCond(Condition.MINUS_SIOT);
mCandidateClone.decreaseNumOfPrfrdCond();
mCandidateClone.setCandDicLen((byte)exp.length());
ret.add(mCandidateClone);
}
// 그외 처리
else if( !lastHg.hasJong() || lastHg.jong == 'ㄹ'
// ㅎ 불규칙 처리
|| lastCh == '맣' || lastCh == '갛' || lastCh == '랗'
)
{
// ㄴ, ㄹ, ㅁ, ㅂ 에 의한 활용