package com.adobe.epubcheck.ctc;
import com.adobe.epubcheck.api.Report;
import com.adobe.epubcheck.messages.MessageId;
import com.adobe.epubcheck.messages.MessageLocation;
import com.adobe.epubcheck.ocf.EncryptionFilter;
import com.adobe.epubcheck.util.EPUBVersion;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
class EntitySearch
{
private final ZipFile zip;
private final Hashtable<String, EncryptionFilter> enc;
private final Report report;
private final EPUBVersion version;
static final Pattern entityPattern = Pattern.compile("&([A-Za-z0-9]+)([;|\\s])");
static final HashSet<String> legalEntities2_0;
static final HashSet<String> legalEntities3_0;
MessageId ENTITY_INVALID = MessageId.HTM_023;
MessageId ENTITY_OK = MessageId.HTM_006;
MessageId ENTITY_IMPROPER = MessageId.HTM_024;
static
{
legalEntities3_0 = new HashSet<String>();
Collections.addAll(legalEntities3_0, "&", "'", """, "<", ">");
legalEntities2_0 = new HashSet<String>();
Collections.addAll(legalEntities2_0, " ", "¡", "¢", "£", "¤",
"¥", "¦", "§", "¨", "©", "ª", "«", "¬", "­", "®",
"¯", "°", "±", "²", "³", "´", "µ", "¶", "·", "¸",
"¹", "º", "»", "¼", "½", "¾", "¿", "À", "Á",
"Â", "Ã", "Ä", "Å", "Æ", "Ç", "È", "É", "Ê", "Ë",
"Ì", "Í", "Î", "Ï", "Ð", "Ñ", "Ò", "Ó", "Ô", "Õ",
"Ö", "×", "Ø", "Ù", "Ú", "Û", "Ü", "Ý", "Þ", "ß",
"à", "á", "â", "ã", "ä", "å", "æ", "ç", "è", "é",
"ê", "ë", "ì", "í", "î", "ï", "ð", "ñ", "ò", "ó",
"ô", "õ", "ö", "÷", "ø", "ù", "ú", "û", "ü", "ý",
"þ", "ÿ", "Œ", "œ", "Š", "š", "Ÿ", "ƒ", "ˆ", "˜",
"Α", "Β", "Γ", "Δ", "Ε", "Ζ", "Η", "Θ", "Ι", "Κ",
"Λ", "Μ", "Ν", "Ξ", "Ο", "Π", "Ρ", "Σ", "Τ", "Υ", "Φ", "Χ",
"Ψ", "Ω", "α", "β", "γ", "δ", "ε", "ζ", "η", "θ", "ι",
"κ", "λ", "μ", "ν", "ξ", "ο", "π", "ρ", "ς", "σ", "τ",
"υ", "φ", "χ", "ψ", "ω", "ϑ", "ϒ", "ϖ", " ", " ", " ",
"‌", "‍", "‎", "‏", "–", "—", "‘", "’", "‚", "“", "”",
"„", "†", "‡", "•", "…", "‰", "′", "″", "‹", "›",
"‾", "⁄", "€", "ℑ", "℘", "ℜ", "™", "ℵ", "←", "↑",
"→", "↓", "↔", "↵", "⇐", "⇑", "⇒", "⇓", "⇔", "∀", "∂",
"∃", "∅", "∇", "∈", "∉", "∋", "∏", "∑", "−", "∗", "√",
"∝", "∞", "∠", "∧", "∨", "∩", "∪", "∫", "∴", "∼", "≅", "≈",
"≠", "≡", "≤", "≥", "⊂", "⊃", "⊄", "⊆", "⊇", "⊕", "⊗", "⊥",
"⋅", "⋮", "⌈", "⌉", "⌊", "⌋", "⟨", "⟩", "◊", "♠", "♣",
"♥", "♦");
}
public EntitySearch(EPUBVersion version, ZipFile zip, Report report)
{
this.zip = zip;
this.enc = new Hashtable<String, EncryptionFilter>();
this.report = report;
this.version = version;
}
InputStream getInputStream(String name) throws
IOException
{
ZipEntry entry = zip.getEntry(name);
if (entry == null)
{
return null;
}
InputStream in = zip.getInputStream(entry);
EncryptionFilter filter = enc.get(name);
if (filter == null)
{
return in;
}
if (filter.canDecrypt())
{
return filter.decrypt(in);
}
return null;
}
public Vector<String> Search(String entry)
{
Vector<String> result = new Vector<String>();
InputStream is = null;
try
{
is = getInputStream(entry);
Scanner in = new Scanner(is);
int lineCounter = 1;
while (in.hasNextLine())
{
String line = in.nextLine();
Matcher matcher = entityPattern.matcher(line);
int position = 0;
while (matcher.find(position))
{
MessageId messageCode = ENTITY_INVALID;
position = matcher.end();
String matchedText = line.substring(matcher.start(), matcher.end());
if (version == EPUBVersion.VERSION_2)
{
if (legalEntities3_0.contains(matchedText) || legalEntities2_0.contains(matchedText))
{
// its in either the legal 2.0 list or the 3.0 list. Simply emit a usage message
messageCode = ENTITY_OK;
}
}
else if (version == EPUBVersion.VERSION_3)
{
if (legalEntities3_0.contains(matchedText))
{
// its in the 3.0 list. just emit a usage message
messageCode = ENTITY_OK;
}
else if (legalEntities2_0.contains(matchedText))
{
// its in the 2.0 list. Emit a usage message saying that only & ' etc. are allowed
messageCode = ENTITY_IMPROPER;
}
}
int contextStart = Math.max(0, matcher.start() - 20);
int contextEnd = Math.min(contextStart + 40, line.length() - 1);
String context = line.substring(contextStart, contextEnd);
if (messageCode == ENTITY_INVALID)
{
// emit the erroneous text along with the message
report.message(messageCode, new MessageLocation(entry, lineCounter, matcher.start(), context.trim()), matchedText);
}
else
{
report.message(messageCode, new MessageLocation(entry, lineCounter, matcher.start(), context.trim()));
}
}
lineCounter++;
}
}
catch (FileNotFoundException e1)
{
String fileName = new File(zip.getName()).getName();
report.message(MessageId.RSC_001, new MessageLocation(fileName, -1, -1), entry);
}
catch (IOException e1)
{
String fileName = new File(zip.getName()).getName();
report.message(MessageId.PKG_008, new MessageLocation(fileName, -1, -1), entry);
}
catch (Exception e)
{
e.printStackTrace();
report.message(MessageId.RSC_005, new MessageLocation(entry, -1, -1), e.getMessage());
}
finally
{
if (is != null)
{
try
{
is.close();
}
catch (Exception ignored)
{
}
}
}
return result;
}
}