Package com.gentics.cr.lucene.indexer.transformer

Source Code of com.gentics.cr.lucene.indexer.transformer.CleanupTextTransformerTest

package com.gentics.cr.lucene.indexer.transformer;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertSame;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URISyntaxException;
import java.net.URL;

import org.apache.commons.io.IOUtils;
import org.junit.Before;
import org.junit.Test;

import com.gentics.cr.CRConfigUtil;
import com.gentics.cr.CRResolvableBean;
import com.gentics.cr.exceptions.CRException;
import com.gentics.cr.util.CRUtil;

public class CleanupTextTransformerTest {

  private static final String CONTENT_ATTRIBUTE = "content";

  /**
   * öäüÄÜÖ߀
   */
  private static final String UMLAUTS = "\u00F6\u00E4\u00FC\u00C4\u00DC\u00D6\u00DF\u20AC";

  CRConfigUtil config = null;

  @Before
  public void setUp() throws Exception {
    URL confPath = new File(this.getClass().getResource("/config/nodelog.properties").toURI()).getParentFile().toURI().toURL();
    System.setProperty(CRUtil.PORTALNODE_CONFPATH, confPath.getPath());
    config = new CRConfigUtil();
    config.set("attribute", CONTENT_ATTRIBUTE);
  }

  @Test
  public void testStripWhitespace() throws IOException, CRException, URISyntaxException {
    config.set("trimContent", "true");
    String testContent = readFile("whitespacefile.txt");
    assertEquals(readFile("cleanedwhitespacefile.txt"), transform(testContent));
  }

  private String readFile(final String fileName) throws URISyntaxException, FileNotFoundException, IOException {
    FileInputStream inputStream = new FileInputStream(new File(this.getClass().getResource(fileName).toURI()));
    try {
      return IOUtils.toString(inputStream);
    } finally {
      inputStream.close();
    }
  }

  @Test
  public void testIndexPoints() throws CRException {
    String result = transform("1. Index\n"
        + "Title ....................................................................................................................... 1\n"
        + "1. First chapter ...................................................................................................................... 2\n"
        + "2. Second chapter .................................................................................................................................... 7");
    assertEquals(
      "Index points are not reduced correctly.",
      "1. Index Title ... 1 1. First chapter ... 2 2. Second chapter ... 7",
      result);

    result = transform("First chapter . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . Page 33 Second chapter . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . Page 66");
    assertEquals("Index points are not reduced correctly.", "First chapter ... Page 33 Second chapter ... Page 66", result);
  }

  @Test
  public void testUmlauts() throws CRException {
    assertEquals("Umlauts cannot be processed correctly.", UMLAUTS, transform(UMLAUTS + ""));
  }

  @Test
  public void testNotPrintableCharacters() throws CRException {
    String stringWithNonPrintableCharacters = "Drittstaaten:  HYPERLINK \"http://www.help.gv.at/Content.Node/12/Seite.120000.html\" \\o \"Öffnet in neuem Fenster\" \\t \"_blank\" Aufenthaltsberechtigung";
    String expectedResult = "Drittstaaten: HYPERLINK \"http://www.help.gv.at/Content.Node/12/Seite.120000.html\" \\o \"Öffnet in neuem Fenster\" \\t \"_blank\" Aufenthaltsberechtigung";
    assertEquals("Special characters are not elminiated correctly.", expectedResult, transform(stringWithNonPrintableCharacters + ""));

    stringWithNonPrintableCharacters = "Person Familienname:  FORMTEXT       Vorname:  FORMTEXT       Standort:  FORMTEXT       Stock:  FORMTEXT ";
    expectedResult = "Person Familienname: FORMTEXT Vorname: FORMTEXT Standort: FORMTEXT Stock: FORMTEXT ";
    assertEquals("Special characters are not elminiated correctly.", expectedResult, transform(stringWithNonPrintableCharacters + ""));
  }

  @Test
  public void testMultipleSpaces() throws CRException {
    final String stringWithMultipleSpaces = "test1  test2   test3    test4";
    final String expectedResult = "test1 test2 test3 test4";
    assertEquals("Multiple Spaces are not replaced correctly.", expectedResult, transform(stringWithMultipleSpaces));
  }

  @Test
  public void testPendingSpace() throws CRException {
    assertEquals("Pending Space was not handled correctly.", "a ", transform("a "));
    assertEquals("Pending Space was not handled correctly.", "a... ", transform("a.... "));
  }

  @Test
  public void testByteArray() throws CRException, UnsupportedEncodingException {
    CRResolvableBean bean = new CRResolvableBean();
    bean.set(CONTENT_ATTRIBUTE, UMLAUTS.getBytes());
    ContentTransformer transformer = new CleanupTextTransformer(config);
    transformer.processBean(bean);
    String result = bean.getString(CONTENT_ATTRIBUTE);
    assertEquals("Cannot handle the byte array correctly.", UMLAUTS, result);
  }

  @Test
  public void testUnchanged() throws CRException {
    String testStringNotToChange = "test";
    assertSame("String had not to be changed.", testStringNotToChange, transform(testStringNotToChange));

  }

  private String transform(String string) throws CRException {
    CRResolvableBean bean = new CRResolvableBean();
    bean.set(CONTENT_ATTRIBUTE, string);
    ContentTransformer transformer = new CleanupTextTransformer(config);
    transformer.processBean(bean);
    return bean.getString(CONTENT_ATTRIBUTE);
  }
}
TOP

Related Classes of com.gentics.cr.lucene.indexer.transformer.CleanupTextTransformerTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.