001/* Pattern.java -- Compiled regular expression ready to be applied. 002 Copyright (C) 2002, 2004, 2005, 2007, 2010 003 Free Software Foundation, Inc. 004 005This file is part of GNU Classpath. 006 007GNU Classpath is free software; you can redistribute it and/or modify 008it under the terms of the GNU General Public License as published by 009the Free Software Foundation; either version 2, or (at your option) 010any later version. 011 012GNU Classpath is distributed in the hope that it will be useful, but 013WITHOUT ANY WARRANTY; without even the implied warranty of 014MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015General Public License for more details. 016 017You should have received a copy of the GNU General Public License 018along with GNU Classpath; see the file COPYING. If not, write to the 019Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02002110-1301 USA. 021 022Linking this library statically or dynamically with other modules is 023making a combined work based on this library. Thus, the terms and 024conditions of the GNU General Public License cover the whole 025combination. 026 027As a special exception, the copyright holders of this library give you 028permission to link this library with independent modules to produce an 029executable, regardless of the license terms of these independent 030modules, and to copy and distribute the resulting executable under 031terms of your choice, provided that you also meet, for each linked 032independent module, the terms and conditions of the license of that 033module. An independent module is a module which is not derived from 034or based on this library. If you modify this library, you may extend 035this exception to your version of the library, but you are not 036obligated to do so. If you do not wish to do so, delete this 037exception statement from your version. */ 038 039package java.util.regex; 040 041import gnu.java.lang.CPStringBuilder; 042 043import gnu.java.util.regex.RE; 044import gnu.java.util.regex.REException; 045import gnu.java.util.regex.RESyntax; 046 047import java.io.Serializable; 048import java.util.ArrayList; 049 050 051/** 052 * Compiled regular expression ready to be applied. 053 * 054 * @since 1.4 055 */ 056public final class Pattern implements Serializable 057{ 058 private static final long serialVersionUID = 5073258162644648461L; 059 060 public static final int CANON_EQ = 128; 061 public static final int CASE_INSENSITIVE = 2; 062 public static final int COMMENTS = 4; 063 public static final int DOTALL = 32; 064 public static final int MULTILINE = 8; 065 public static final int UNICODE_CASE = 64; 066 public static final int UNIX_LINES = 1; 067 068 private final String regex; 069 private final int flags; 070 071 private final RE re; 072 073 private Pattern (String regex, int flags) 074 throws PatternSyntaxException 075 { 076 this.regex = regex; 077 this.flags = flags; 078 079 RESyntax syntax = RESyntax.RE_SYNTAX_JAVA_1_4; 080 int gnuFlags = 0; 081 gnuFlags |= RE.REG_ICASE_USASCII; 082 if ((flags & CASE_INSENSITIVE) != 0) 083 gnuFlags |= RE.REG_ICASE; 084 if ((flags & MULTILINE) != 0) 085 { 086 gnuFlags |= RE.REG_MULTILINE; 087 syntax = new RESyntax(syntax); 088 syntax.setLineSeparator(null); 089 } 090 if ((flags & DOTALL) != 0) 091 gnuFlags |= RE.REG_DOT_NEWLINE; 092 if ((flags & UNICODE_CASE) != 0) 093 gnuFlags &= ~RE.REG_ICASE_USASCII; 094 // not yet supported: 095 // if ((flags & CANON_EQ) != 0) gnuFlags = 096 097 if ((flags & UNIX_LINES) != 0) 098 { 099 // Use a syntax set with \n for linefeeds? 100 syntax = new RESyntax(syntax); 101 syntax.setLineSeparator("\n"); 102 } 103 104 if ((flags & COMMENTS) != 0) 105 { 106 gnuFlags |= RE.REG_X_COMMENTS; 107 } 108 109 try 110 { 111 this.re = new RE(regex, gnuFlags, syntax); 112 } 113 catch (REException e) 114 { 115 PatternSyntaxException pse; 116 pse = new PatternSyntaxException(e.getMessage(), 117 regex, e.getPosition()); 118 pse.initCause(e); 119 throw pse; 120 } 121 } 122 123 // package private accessor method 124 RE getRE() 125 { 126 return re; 127 } 128 129 /** 130 * @param regex The regular expression 131 * 132 * @exception PatternSyntaxException If the expression's syntax is invalid 133 */ 134 public static Pattern compile (String regex) 135 throws PatternSyntaxException 136 { 137 return compile(regex, 0); 138 } 139 140 /** 141 * @param regex The regular expression 142 * @param flags The match flags, a bit mask 143 * 144 * @exception PatternSyntaxException If the expression's syntax is invalid 145 * @exception IllegalArgumentException If bit values other than those 146 * corresponding to the defined match flags are set in flags 147 */ 148 public static Pattern compile (String regex, int flags) 149 throws PatternSyntaxException 150 { 151 // FIXME: check which flags are really accepted 152 if ((flags & ~0xEF) != 0) 153 throw new IllegalArgumentException (); 154 155 return new Pattern (regex, flags); 156 } 157 158 public int flags () 159 { 160 return this.flags; 161 } 162 163 /** 164 * @param regex The regular expression 165 * @param input The character sequence to be matched 166 * 167 * @exception PatternSyntaxException If the expression's syntax is invalid 168 */ 169 public static boolean matches (String regex, CharSequence input) 170 { 171 return compile(regex).matcher(input).matches(); 172 } 173 174 /** 175 * @param input The character sequence to be matched 176 */ 177 public Matcher matcher (CharSequence input) 178 { 179 return new Matcher(this, input); 180 } 181 182 /** 183 * @param input The character sequence to be matched 184 */ 185 public String[] split (CharSequence input) 186 { 187 return split(input, 0); 188 } 189 190 /** 191 * @param input The character sequence to be matched 192 * @param limit The result threshold 193 */ 194 public String[] split (CharSequence input, int limit) 195 { 196 Matcher matcher = new Matcher(this, input); 197 ArrayList<String> list = new ArrayList<String>(); 198 int empties = 0; 199 int count = 0; 200 int start = 0; 201 int end; 202 boolean matched = matcher.find(); 203 204 while (matched && (limit <= 0 || count < limit - 1)) 205 { 206 ++count; 207 end = matcher.start(); 208 if (start == end) 209 empties++; 210 else 211 { 212 while (empties > 0) 213 { 214 list.add(""); 215 empties--; 216 } 217 218 String text = input.subSequence(start, end).toString(); 219 list.add(text); 220 } 221 start = matcher.end(); 222 matched = matcher.find(); 223 } 224 225 // We matched nothing. 226 if (!matched && count == 0) 227 return new String[] { input.toString() }; 228 229 // Is the last token empty? 230 boolean emptyLast = (start == input.length()); 231 232 // Can/Must we add empties or an extra last token at the end? 233 if (list.size() < limit || limit < 0 || (limit == 0 && !emptyLast)) 234 { 235 if (limit > list.size()) 236 { 237 int max = limit - list.size(); 238 empties = (empties > max) ? max : empties; 239 } 240 while (empties > 0) 241 { 242 list.add(""); 243 empties--; 244 } 245 } 246 247 // last token at end 248 if (limit != 0 || (limit == 0 && !emptyLast)) 249 { 250 String t = input.subSequence(start, input.length()).toString(); 251 if ("".equals(t) && limit == 0) 252 { /* Don't add. */ } 253 else 254 list.add(t); 255 } 256 257 return list.toArray(new String[list.size()]); 258 } 259 260 public String pattern () 261 { 262 return regex; 263 } 264 265 /** 266 * Returns a literal pattern for the specified String. 267 * 268 * @param String to return a literal pattern for. 269 * @return a literal pattern for the specified String. 270 * @exception NullPointerException if str is null. 271 * @since 1.5 272 */ 273 public static String quote(String str) 274 { 275 int eInd = str.indexOf("\\E"); 276 if (eInd < 0) 277 { 278 // No need to handle backslashes. 279 return "\\Q" + str + "\\E"; 280 } 281 282 CPStringBuilder sb = new CPStringBuilder(str.length() + 16); 283 sb.append("\\Q"); // start quote 284 285 int pos = 0; 286 do 287 { 288 // A backslash is quoted by another backslash; 289 // 'E' is not needed to be quoted. 290 sb.append(str.substring(pos, eInd)) 291 .append("\\E" + "\\\\" + "E" + "\\Q"); 292 pos = eInd + 2; 293 } while ((eInd = str.indexOf("\\E", pos)) >= 0); 294 295 sb.append(str.substring(pos, str.length())) 296 .append("\\E"); // end quote 297 return sb.toString(); 298 } 299 300 /** 301 * Return the regular expression used to construct this object. 302 * @specnote Prior to JDK 1.5 this method had a different behavior 303 * @since 1.5 304 */ 305 public String toString() 306 { 307 return regex; 308 } 309}