{
// create a new pipe assembly to create the word count across all the pages, and the word count in a single page
Pipe pipe = new Pipe( sourceName );
// convert the html to xhtml using the TagSouParser. return only the fields "url" and "xml", discard the rest
pipe = new Each( pipe, new Fields( "page" ), new TagSoupParser( new Fields( "xml" ) ), new Fields( "url", "xml" ) );
// apply the given XPath expression to the xml in the "xml" field. this expression extracts the 'body' element.
XPathGenerator bodyExtractor = new XPathGenerator( new Fields( "body" ), XPathOperation.NAMESPACE_XHTML, "//xhtml:body" );
pipe = new Each( pipe, new Fields( "xml" ), bodyExtractor, new Fields( "url", "body" ) );
// apply another XPath expression. this expression removes all elements from the xml, leaving only text nodes.
// text nodes in a 'script' element are removed.