Package org.apache.oro.text.regex

Examples of org.apache.oro.text.regex.PatternMatcherInput


  public ArrayList<String> extractHrefs() {

    ArrayList<String> aHrefs = new ArrayList<String>();
  try {
      if (null==oFullHref) oFullHref = oCompiler.compile("<a ((accesskey|charset|class|coords|dir|hreflang|id|lang|name|rel|rev|shape|style|tabindex|target|title)\\s*=\\s*[\"']?([^'\"\\r\\n]+)[\"']?)* href\\s*=\\s*[\"']?([^'\"\\r\\n]+)[\"']?", Perl5Compiler.CASE_INSENSITIVE_MASK);
      PatternMatcherInput oPinpt = new PatternMatcherInput(sBody);
      while (oMatcher.contains(oPinpt, oFullHref)) {
      aHrefs.add(oMatcher.getMatch().group(4));
      if (oPinpt.endOfInput()) break;
      } // wend
    } catch (MalformedPatternException neverthrown) { }
    return aHrefs;
  } // extractHrefs()
View Full Code Here


   */
  public ArrayList<String> extractLocalUrls()
    throws ArrayIndexOutOfBoundsException {

    ArrayList<String> aLocalUrls = new ArrayList<String>();
    PatternMatcherInput oPinpt;
    String sSrcUrl, sHrefUrl;
   
    try {
      synchronized(oCompiler) {
        if (null==oFullHref) oFullHref = oCompiler.compile("<a( (accesskey|charset|class|coords|dir|hreflang|id|lang|name|rel|rev|shape|style|tabindex|target|title)\\s*=\\s*[\"']?([^'\"\\r\\n]+)[\"']?)* href\\s*=\\s*[\"']?([^'\"\\r\\n]+)[\"']?", Perl5Compiler.CASE_INSENSITIVE_MASK);
        if (null==oGoodHref) oGoodHref = oCompiler.compile("<a( (accesskey|charset|class|coords|dir|hreflang|id|lang|name|rel|rev|shape|style|tabindex|target|title)\\s*=\\s*[\"']?([^'\"\\r\\n]+)[\"']?)* href\\s*=\\s*[\"']?(http://|https://|mailto:)\\w+([^'\"\\r\\n]+)[\"']?", Perl5Compiler.CASE_INSENSITIVE_MASK);
        if (null==oHostHref) oHostHref = oCompiler.compile("<a( (accesskey|charset|class|coords|dir|hreflang|id|lang|name|rel|rev|shape|style|tabindex|target|title)\\s*=\\s*[\"']?([^'\"\\r\\n]+)[\"']?)* href\\s*=\\s*[\"']?(http://|https://)localhost([^'\"\\r\\n]+)[\"']?", Perl5Compiler.CASE_INSENSITIVE_MASK);
        if (null==oFullSrc) oFullSrc = oCompiler.compile("<img( (align|alt|border|class|dir|height|hspace|id|ismap|lang|longdesc|style|title|usemap|vspace|width)\\s*=\\s*[\"']?([^'\"\\r\\n]+)[\"']?)* src\\s*=\\s*[\"']?([^'\"\\r\\n]+)[\"']?", Perl5Compiler.CASE_INSENSITIVE_MASK);
      if (null==oGoodSrc) oGoodSrc = oCompiler.compile("<img( (align|alt|border|class|dir|height|hspace|id|ismap|lang|longdesc|style|title|usemap|vspace|width)\\s*=\\s*[\"']?([^'\"\\r\\n]+)[\"']?)* src\\s*=\\s*[\"']?(cid:|http://|https://)([^'\"\\r\\n]+)[\"']?", Perl5Compiler.CASE_INSENSITIVE_MASK);
      if (null==oHostSrc) oHostSrc = oCompiler.compile("<img( (align|alt|border|class|dir|height|hspace|id|ismap|lang|longdesc|style|title|usemap|vspace|width)\\s*=\\s*[\"']?([^'\"\\r\\n]+)[\"']?)* src\\s*=\\s*[\"']?(http://localhost|https://localhost)([^'\"\\r\\n]+)[\"']?", Perl5Compiler.CASE_INSENSITIVE_MASK);
      }

      oPinpt = new PatternMatcherInput(sBody);
      while (oMatcher.contains(oPinpt, oFullSrc)) {
      sSrcUrl = oMatcher.getMatch().toString();
      if (!oMatcher.matches(sSrcUrl, oGoodSrc))
      aLocalUrls.add(sSrcUrl);
      if (oPinpt.endOfInput()) break;
      } // wend

      oPinpt.setCurrentOffset(oPinpt.getBeginOffset());
      while (oMatcher.contains(oPinpt, oFullSrc)) {
      sSrcUrl = oMatcher.getMatch().toString();
    if (oMatcher.matches(sSrcUrl, oHostSrc))
      aLocalUrls.add(sSrcUrl);
      if (oPinpt.endOfInput()) break;
      } // wend
     
      oPinpt.setCurrentOffset(oPinpt.getBeginOffset());
      while (oMatcher.contains(oPinpt, oFullHref)) {
    sHrefUrl = oMatcher.getMatch().toString();
      if (!oMatcher.matches(sHrefUrl, oGoodHref))
      aLocalUrls.add(sHrefUrl);
      if (oPinpt.endOfInput()) break;
      } // wend

      oPinpt.setCurrentOffset(oPinpt.getBeginOffset());
      while (oMatcher.contains(oPinpt, oFullHref)) {
    sHrefUrl = oMatcher.getMatch().toString();
    if (oMatcher.matches(sHrefUrl, oHostHref))
      aLocalUrls.add(sHrefUrl);   
      if (oPinpt.endOfInput()) break;
      } // wend
    } catch (MalformedPatternException neverthrown) { }

  return aLocalUrls;
  } // extractLocalUrls
View Full Code Here

 

  protected Zone accept(String pathinfo, int start)
  {
    if (_pattern != null) {
      PatternMatcherInput input = new PatternMatcherInput(pathinfo, start, pathinfo.length()-start);
      Perl5Matcher matcher = new Perl5Matcher();
      if (matcher.contains(input, _pattern)) {
        return resolveZone(pathinfo, start);
      }
    } else {
View Full Code Here

    private int _index = 0;

    public StringPatternMatcher(Pattern pattern, String string)
    {
      _pattern = pattern;
      _input = new PatternMatcherInput(string);
    }
View Full Code Here

    if (oMatcher.contains(sHtml, oPattern)) {
      sBaseHref = Gadgets.chomp(oMatcher.getMatch().group(3),"/");
      if (DebugFile.trace) DebugFile.writeln("<base href="+sBaseHref+">");
    }
 
      PatternMatcherInput oMatchInput = new PatternMatcherInput(sHtml);
    oPattern = oCompiler.compile("\\x20(src=|background=|background-image:url\\x28)(\"|')?([^'\"\\r\\n]+)(\"|')?(\\x20|\\x29|/|>)", Perl5Compiler.CASE_INSENSITIVE_MASK);
    StringSubstitution oSrcSubs = new StringSubstitution();
    int nMatches = 0;
    while (oMatcher.contains(oMatchInput, oPattern)) {
      nMatches++;
      String sMatch = oMatcher.getMatch().toString();
      String sAttr = oMatcher.getMatch().group(1);
      String sQuo = oMatcher.getMatch().group(2);
      if (sQuo==null) sQuo = "";
      String sSrc = oMatcher.getMatch().group(3);
      if (DebugFile.trace) DebugFile.writeln("Source file found at "+sSrc);
      String sEnd = oMatcher.getMatch().group(5);
      if (!oFiles.contains(sSrc)) oFiles.add(sSrc);
      String sFilename = sSrc.substring(sSrc.replace('\\','/').lastIndexOf('/')+1);
      if (DebugFile.trace)
        DebugFile.writeln("StringSubstitution.setSubstitution("+sMatch+" replace with "+sMatch.substring(0,sAttr.length()+1)+sQuo+sFilename+sQuo+sEnd+")");
      oSrcSubs.setSubstitution(sMatch.substring(0,sAttr.length()+1)+sQuo+sFilename+sQuo+sEnd);
        sHtml = Util.substitute(oReplacer, oCompiler.compile(sMatch), oSrcSubs, sHtml, Util.SUBSTITUTE_ALL);
    } //wend
   
    oMatchInput = new PatternMatcherInput(sHtml);
    oPattern = oCompiler.compile("<link\\x20+(rel=(\"|')?stylesheet(\"|')?\\x20+)?(type=(\"|')?text/css(\"|')?\\x20+)?href=(\"|')?([^'\"\\r\\n]+)(\"|')?");
    while (oMatcher.contains(oMatchInput, oPattern)) {
      nMatches++;
      String sMatch = oMatcher.getMatch().toString();
      String sSrc = oMatcher.getMatch().group(8);
View Full Code Here

        Perl5Matcher localMatcher = JMeterUtils.getMatcher();
        // The headers and body are divided by a blank line
        String regularExpression = "^.$";
        Pattern pattern = JMeterUtils.getPattern(regularExpression, Perl5Compiler.READ_ONLY_MASK | Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.MULTILINE_MASK);
       
        PatternMatcherInput input = new PatternMatcherInput(stringToCheck);
        while(localMatcher.contains(input, pattern)) {
            MatchResult match = localMatcher.getMatch();
            return match.beginOffset(0);
        }
        // No divider was found
View Full Code Here

        }

        List<MatchResult> collectAllMatches = new ArrayList<MatchResult>();
        try {
            PatternMatcher matcher = JMeterUtils.getMatcher();
            PatternMatcherInput input = new PatternMatcherInput(textToMatch);
            while (matcher.contains(input, searchPattern)) {
                MatchResult match = matcher.getMatch();
                collectAllMatches.add(match);
            }
        } catch (NumberFormatException e) {//TODO: can this occur?
View Full Code Here

        List<String> pieces = new ArrayList<String>();
        // String or Integer
        List<Object> combined = new LinkedList<Object>();
        PatternMatcher matcher = JMeterUtils.getMatcher();
        Util.split(pieces, matcher, templatePattern, rawTemplate);
        PatternMatcherInput input = new PatternMatcherInput(rawTemplate);
        boolean startsWith = isFirstElementGroup(rawTemplate);
        if (startsWith) {
            pieces.remove(0);// Remove initial empty entry
        }
        Iterator<String> iter = pieces.iterator();
View Full Code Here

    }

    private String process(String textToParse) {

        Perl5Matcher matcher = new Perl5Matcher();
        PatternMatcherInput input = new PatternMatcherInput(textToParse);

        PatternCacheLRU pcLRU = new PatternCacheLRU();
        Pattern pattern = pcLRU.getPattern(regexpField.getText(), Perl5Compiler.READ_ONLY_MASK);
        List<MatchResult> matches = new LinkedList<MatchResult>();
        while (matcher.contains(input, pattern)) {
View Full Code Here

     */
    @Override
    public Iterator<URL> getEmbeddedResourceURLs(byte[] html, URL baseUrl, URLCollection urls) {

        Perl5Matcher matcher = JMeterUtils.getMatcher();
        PatternMatcherInput input = localInput.get();
        // TODO: find a way to avoid the cost of creating a String here --
        // probably a new PatternMatcherInput working on a byte[] would do
        // better.
        input.setInput(new String(html)); // TODO - charset?
        Pattern pattern=JMeterUtils.getPatternCache().getPattern(
                REGEXP,
                Perl5Compiler.CASE_INSENSITIVE_MASK
                | Perl5Compiler.SINGLELINE_MASK
                | Perl5Compiler.READ_ONLY_MASK);
View Full Code Here

TOP

Related Classes of org.apache.oro.text.regex.PatternMatcherInput

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.