Java - Regular Expressions

[ Published on - Mar 06, 2012 - Prague / Praha | Category: Java Basics | Comments (0) ]
1. Java - Primitives
2. Java - Strings
3. Java - Arrays
4. Java - Date
5. Java - Generics
6. Java - Collections
7. Java - Localization
8. Java - Formatting
9. Java - Regular Expressions
10. Java - System
11. Java - Serialization
12. Java - Multithreading
13. Java - IO/File
14. Java - Networking
15. Java - ORM - JPA
a) Simple User entity class
package basics.basicsExamples;

import java.util.Arrays;
import java.util.regex.MatchResult;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/** ****************************************************************************
 * REGULAR EXPRESSION EXAMPLE CLASS
 * 
 * http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html
 *   
 * Unicode Technical Standard #18
 * http://www.unicode.org/reports/tr18/
 * 
 * 
 * @author stanislav.zorjan
 */
public class RegexpExample {
    
    
    
   
     // Character Classes
     // http://docs.oracle.com/javase/tutorial/essential/regex/char_classes.html
     // [abc]		a, b, or c (simple class)
     // [^abc]		Any character except a, b, or c (negation)
     // [a-zA-Z]		a through z, or A through Z, inclusive (range)
     // [a-d[m-p]]	a through d, or m through p: [a-dm-p] (union)
     // [a-z&&[def]]	d, e, or f (intersection)
     // [a-z&&[^bc]]	a through z, except for b and c: [ad-z] (subtraction)
     // [a-z&&[^m-p]]	a through z, and not m through p: [a-lq-z] (subtraction)
      
     // Predefined Character Classes
     // http://docs.oracle.com/javase/tutorial/essential/regex/pre_char_classes.html
     // Any character (may or may not match line terminators)
     // \d	A digit: [0-9]
     // \D	A non-digit: [^0-9]
     // \s	A whitespace character: [ \t\n\x0B\f\r]
     // \S	A non-whitespace character: [^\s]
     // \w	A word character: [a-zA-Z_0-9]
     // \W	A non-word character: [^\w]
       
     // Quantifiers
     // http://docs.oracle.com/javase/tutorial/essential/regex/quant.html
     // X?	X??	X?+	X, once or not at all
     // X*	X*?	X*+	X, zero or more times
     // X+	X+?	X++	X, one or more times
     // X{n}	X{n}?	X{n}+	X, exactly n times
     // X{n,}	X{n,}?	X{n,}+	X, at least n times
     // X{n,m}	X{n,m}?	X{n,m}+	X, at least n but not more than m times
      
      
     // Boundary Matchers
     // http://docs.oracle.com/javase/tutorial/essential/regex/bounds.html
     // ^		The beginning of a line
     // $		The end of a line
     // \b	A word boundary
     // \B	A non-word boundary
     // \A	The beginning of the input
     // \G	The end of the previous match
     // \Z	The end of the input but for the final terminator, if any
     // \z	The end of the input  
    
    
    
    private Matcher matcher;
    
    private String str = "\'ad\\df\' \\$";
    private String str0 = "This is example string";
    private String str1 = ".*";
    private String str2 = "a   href=\"www.yahoo.com\"   style=\"top:30px;   width:  20px\">This is text between html tags";
    private String str3 = "acЊљчљsиљШђЏ";
    private String str4 = "Multi\n"
	    + "line\n"
	    + "string";
    
    private Pattern pattern;

    
    /** ************************************************************************
     * Default constructor
     */
    public RegexpExample() {
	regexpExamples();
	//patternMethods();
	//matcherMethods();
    }
    
    /** ************************************************************************
     * Regular expressions example
     */
    private void regexpExamples(){
	
	// removes all whitespaces
	pattern = Pattern.compile("[^\\s]");
	
	// Quoting metacharacters
	Pattern pattern1 = Pattern.compile("\\Q.*\\E");
	Pattern pattern2 = Pattern.compile("\\.\\*");
	
	// Grouping
	//Pattern pattern3 = Pattern.compile("(\\T)(h)");
	//matcher = pattern3.matcher(str0);
	//System.out.print(matcher.group(2) + " "+ matcher.group(1)); //prints h T
	
	// Extracts text and html tags
	//Pattern pattern3 = Pattern.compile("(<\\w*\\W[^>{1}]*>)(.*)()");
	//matcher = pattern3.matcher(str2);
	//System.out.print(matcher.group(1)); // extracts first html (everything between < and >)
	//System.out.print(matcher.group(2)); // extracts text between html tags
	//System.out.print(matcher.group(3)); // closing tag
	
	// Extracts tag name
	//Pattern pattern3 = Pattern.compile("<(\\w[^\\s]*)");
	
	// Extracts text including UTF characters by adding Pattern.UNICODE_CHARACTER_CLASS flag.
	//Pattern pattern3 = Pattern.compile("(.{3})(\\w*)", Pattern.UNICODE_CHARACTER_CLASS);
	//matcher = pattern3.matcher(str3);
	//System.out.print(matcher.group(2)); //print љчsишђџ
	
	// Specifying Unicode ranges
	//Pattern pattern3 = Pattern.compile("(.{3})(\\w*)", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.UNICODE_CHARACTER_CLASS);
	//Pattern pattern3 = Pattern.compile("[\\p{L}]", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.UNICODE_CHARACTER_CLASS);
	//Pattern pattern3 = Pattern.compile("[\\u0000-\\u10FFFF]", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.UNICODE_CHARACTER_CLASS);
	//Pattern pattern3 = Pattern.compile("[\\u0000-\\u"+Character.MAX_CODE_POINT+"]", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.UNICODE_CHARACTER_CLASS);
	//Pattern pattern3 = Pattern.compile("[\\u0000-\\u"+Character.MAX_CODE_POINT+"]", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.UNICODE_CHARACTER_CLASS);
	//Pattern pattern3 = Pattern.compile("[^\\u0409]", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.UNICODE_CHARACTER_CLASS);
	Pattern pattern3 = Pattern.compile("[^љ]", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.UNICODE_CHARACTER_CLASS);
	
	// named pattern
	//Pattern pattern3 = Pattern.compile("(?This)");
	
	matcher = pattern3.matcher(str0);
	
	while(matcher.find()){
	    System.out.print(matcher.group());
	    //System.out.print(matcher.group("namedGroup0"));
	    
	}
	
	System.out.println("");
    }
    
    /** ************************************************************************
     * Pattern methods example
     */
    private void patternMethods() {
	//pattern.matcher(str0);
	pattern = Pattern.compile(" ");
	System.out.println(Arrays.toString(pattern.split(str0)));
	
	//
	// @param string
	// @param limit - number of times pattern is applied
	//
	System.out.println(Arrays.toString(pattern.split(str0, 2)));
	
	pattern = Pattern.compile("\\n", Pattern.MULTILINE);
	System.out.println(str4);
	System.out.println(Arrays.toString(pattern.split(str4)));
	
	System.out.println(Pattern.matches(".*", str0));
	
    }

    /** ************************************************************************
     * Matcher methods example
     */
    private void matcherMethods() {
	
	System.out.println(str);
	System.out.println(Matcher.quoteReplacement(str));
	
	pattern = Pattern.compile("(?i)n");
	Matcher matcher = pattern.matcher(str0);
	matcher.find();
	
	// Append replacement + appendTail are almost the same as 
	// replaceAll method in String, with a difference that
	// you can have custom replacement using these two methods.
	// Look at the example in Java Doc.
	/*
	    StringBuffer sb = new StringBuffer();
	    matcher.appendReplacement(sb, str);
	    matcher.appendTail(sb);
	*/
	
	// returns start index of the match
	System.out.println(matcher.start());
	// returns end index of the match
	System.out.println(matcher.end());
	// start looking from character/index 7
	System.out.println(matcher.find(7));
	System.out.println(matcher.end());
	// return found matches including all groups
	System.out.println(matcher.group());
	// return first group
	System.out.println(matcher.group(1));
	// return named group
	System.out.println(matcher.group("iGroup"));
	// return group count
	System.out.println(matcher.groupCount());
	
	System.out.println(matcher.hasAnchoringBounds());
	System.out.println(matcher.hasTransparentBounds());
	
	// return if search hits the end of the search string
	System.out.println(matcher.hitEnd());
	
	
	pattern = Pattern.compile("This");
	matcher = pattern.matcher(str0);
	matcher.find();
	
	// returns true/false if pattern matches beginning of the searched region
	System.out.println(matcher.lookingAt());
	// return true/false if pattern matches whole searched region
	System.out.println(matcher.matches());
	
	// returns pattern used for matching
	System.out.println(matcher.pattern());
	// setting region (start index / end index) for search
	matcher = matcher.region(0, 5);
	matcher.find();
	System.out.println(matcher.group());
	
	// returns region end index
	System.out.println(matcher.regionEnd());
	// returns region start index
	System.out.println(matcher.regionStart());
	// replaces all matched occurrences with specified string
	System.out.println(matcher.replaceAll("{xxx}"));
	// replaces only first occurrence with specified string
	System.out.println(matcher.replaceFirst("{first}"));
	
	System.out.println(matcher.requireEnd());
	
	// resets search index so "find()" starts searching from beginning
	System.out.println(matcher.find());
	matcher.reset();
	System.out.println(matcher.find());
	
	// resets matcher with new string for searching
	matcher.reset("(This) (is replacement string)");
	System.out.println(matcher.find());
	
	// returns start index of the match
	System.out.println(matcher.start());
	// returns start index of found occurrence from within specified group
	System.out.println(matcher.start(0));
	
	// returns MatchResult
	MatchResult mr = matcher.toMatchResult();
	
	// returns matcher string representation: java.util.regex.Matcher[pattern=This region=0,30 lastmatch=This]
	System.out.println(matcher.toString());
	
	
	
    }
    
}
Java - Regular Expressions

Categories

Latest Posts