#include <cmt_regexp.h>
Collaboration diagram for cmt_regexp:

Public Methods | |
| cmt_regexp (const cmt_string& expression) | |
| ~cmt_regexp () | |
| bool | is_valid () const |
| iterator | begin (const cmt_string& text, int pos = 0) |
| iterator | end () |
| iterator | begin (const cmt_string& text, int pos = 0) const |
| iterator | end () const |
| bool | match (const cmt_string& text) const |
Private Attributes | |
| cmt_node* | _root |
|
|
Definition at line 949 of file cmt_regexp.cxx. 00950 {
00951 _root = 0;
00952
00953 //
00954 // The root is the cmt_or_node which will be returned. It is
00955 // the top of the hierarchy.
00956 //
00957 // top is the running cmt_and_node.
00958 //
00959 cmt_node_set* or_root = 0;
00960 cmt_node_set* top_and = 0;
00961
00962 // abcd
00963 // ab|cd
00964 // a|b|cd
00965 // a|b*|cd
00966 // a|b*|cd?e
00967 //
00968 // exp : and
00969 // | exp '|' and
00970 //
00971 // and : unary
00972 // | unary and
00973 //
00974 // unary : primary '*'
00975 // | primary '?'
00976 //
00977 // primary : '[' opt_begin opt_chars opt_end ']'
00978 // | '^'
00979 // | '$'
00980 // | char
00981 // | '(' exp ')'
00982 //
00983
00984 {
00985 //
00986 // First we build an cmt_or_node (corresponding to the
00987 // first grammatical rule)
00988 //
00989 // Then cmt_and_nodes are pushed into it.
00990 // and standard nodes are pushed into the running (top_and) cmt_and_node
00991 //
00992 or_root = new cmt_or_node (0);
00993 top_and = new cmt_and_node (or_root);
00994 }
00995
00996 int i;
00997
00998 for (i = 0; i < expression.size (); i++)
00999 {
01000 char c = expression[i];
01001 switch (c)
01002 {
01003 case '[':
01004 {
01005 //
01006 // The case is
01007 //
01008 // exp : '[' char ... ']'
01009 // exp : '[' '^' char ... ']'
01010 //
01011
01012 if (i >= expression.size ())
01013 {
01014 // syntax error : unbalanced '['
01015 delete or_root;
01016 return;
01017 }
01018 i++;
01019
01020 int i0 = i;
01021
01022 bool done = false;
01023 bool has_not = false;
01024
01025 cmt_string choices = "";
01026
01027 for (; i < expression.size (); i++)
01028 {
01029 c = expression[i];
01030 switch (c)
01031 {
01032 case ']':
01033 done = true;
01034 break;
01035 case '^':
01036 if (i == i0) has_not = true;
01037 else choices += c;
01038 break;
01039 case '\\':
01040 choices += c;
01041 if (i >= expression.size ())
01042 {
01043 // syntax error : unbalanced '[' and unfinished
01044 // escape sequence
01045 delete or_root;
01046 return;
01047 }
01048 i++;
01049 c = expression[i];
01050 choices += c;
01051 break;
01052 default:
01053 choices += c;
01054 break;
01055 }
01056 if (done) break;
01057 }
01058
01059 if (!done)
01060 {
01061 // syntax error : unbalanced '['
01062 delete or_root;
01063 return;
01064 }
01065 if (has_not)
01066 top_and->push (new cmt_not_char_list_node (choices));
01067 else
01068 top_and->push (new cmt_char_list_node (choices));
01069 }
01070 break;
01071 case '*':
01072 {
01073 //
01074 // exp : exp '*'
01075 //
01076 if (top_and->nodes () == 0)
01077 {
01078 // Syntax error : '*' is not preceded by an expression
01079 delete or_root;
01080 return;
01081 }
01082
01083 cmt_node* n = top_and->pop ();
01084 top_and->push (new cmt_zero_more (n));
01085 }
01086 break;
01087 case '+':
01088 {
01089 //
01090 // exp : exp '+'
01091 //
01092 if (top_and->nodes () == 0)
01093 {
01094 // Syntax error : '+' is not preceded by an expression
01095 delete or_root;
01096 return;
01097 }
01098
01099 cmt_node* n = top_and->pop ();
01100 top_and->push (new cmt_one_more (n));
01101 }
01102 break;
01103 case '?':
01104 {
01105 //
01106 // exp : exp '?'
01107 //
01108 if (top_and->nodes () == 0)
01109 {
01110 // Syntax error : '?' is not preceded by an expression
01111 delete or_root;
01112 return;
01113 }
01114
01115 cmt_node* n = top_and->pop ();
01116 top_and->push (new cmt_zero_one (n));
01117 }
01118 break;
01119 case '.':
01120 //
01121 // exp : '.'
01122 //
01123 top_and->push (new cmt_any_node ());
01124 break;
01125 case '(':
01126 {
01127 //
01128 // exp : '(' exp ')'
01129 //
01130 if (top_and->parentheses ())
01131 {
01132 // This should never happen.
01133 delete or_root;
01134 return;
01135 }
01136
01137 top_and->set_parentheses (true);
01138
01139 //
01140 // A new complete expression is started.
01141 // -> do as for top_and parsing.
01142 //
01143
01144 top_and = new cmt_and_node (new cmt_or_node (top_and));
01145 }
01146 break;
01147 case ')':
01148 {
01149 //
01150 // exp : '(' exp ')'
01151 //
01152
01153 // top_and is the cmt_and_node into which new nodes are pushed.
01154 cmt_node_set* or_node = top_and->father ();
01155 if (or_node == 0)
01156 {
01157 // This should never happen : top_and should always be
01158 // at least an cmt_and_node hanging at an cmt_or_node
01159 delete or_root;
01160 return;
01161 }
01162
01163 //
01164 // The last cmt_and_node was empty, thus we had either '()' or '(...|)'
01165 //
01166
01167 if (top_and->nodes () == 0)
01168 {
01169 delete (or_node->pop ());
01170 }
01171 else
01172 {
01173 top_and->reduce ();
01174 }
01175
01176 top_and = or_node->father ();
01177
01178 if (top_and == 0)
01179 {
01180 // Syntax error : too many ')'
01181 delete or_root;
01182 return;
01183 }
01184
01185 //
01186 // top_and is now the previous running cmt_and_node where the '('
01187 // was originally met its top_and node contains the parenthesized
01188 // sub expression If this one is empty, (due to an empty '()'
01189 // expression) then it may simply be discarded.
01190 //
01191
01192 if (!top_and->parentheses ())
01193 {
01194 // Syntax error : too many ')'
01195 delete or_root;
01196 return;
01197 }
01198
01199 top_and->set_parentheses (false);
01200
01201 cmt_node* unique = 0;
01202 if (or_node->nodes () == 1)
01203 {
01204 cmt_node_set* and_node = (cmt_node_set*) or_node->top ();
01205 if (and_node->nodes () == 1)
01206 {
01207 unique = and_node->pop ();
01208 delete (or_node->pop ());
01209 }
01210 else if (and_node->nodes () == 0)
01211 {
01212 delete (or_node->pop ());
01213 }
01214 }
01215
01216 if (or_node->nodes () == 0) delete (top_and->pop ());
01217 if (unique != 0) top_and->push (unique);
01218 }
01219
01220 break;
01221 case '|':
01222 {
01223 //
01224 // exp : exp '|' exp
01225 //
01226
01227 cmt_node_set* or_node = top_and->father ();
01228
01229 top_and->reduce ();
01230
01231 //
01232 // or is the father cmt_or_node, which only contains cmt_and_nodes
01233 //
01234
01235 const cmt_node_set* and_node = (cmt_node_set*) or_node->top ();
01236 if (and_node->nodes () == 0)
01237 {
01238 // the previous node was empty.
01239 // we may discard it
01240 or_node->pop ();
01241 }
01242
01243 top_and = new cmt_and_node (or_node);
01244 }
01245 break;
01246 case '^':
01247 //
01248 // exp : '^'
01249 //
01250 top_and->push (new cmt_begin_node ());
01251 break;
01252 case '$':
01253 //
01254 // exp : '$'
01255 //
01256 top_and->push (new cmt_end_node ());
01257 break;
01258 case '\\':
01259 if (i >= expression.size ())
01260 {
01261 delete or_root;
01262 return;
01263 }
01264 i++;
01265 c = expression[i];
01266 switch (c)
01267 {
01268 case '[':
01269 case ']':
01270 case '(':
01271 case ')':
01272 case '.':
01273 case '*':
01274 case '?':
01275 case '^':
01276 case '$':
01277 case '\\':
01278 break;
01279 case 'r':
01280 c = '\r';
01281 break;
01282 case 't':
01283 c = '\t';
01284 break;
01285 case 'n':
01286 c = '\n';
01287 break;
01288 default:
01289 break;
01290 }
01291 default:
01292 top_and->push (new cmt_char_node (c));
01293 break;
01294 }
01295 }
01296
01297 if (or_root != 0)
01298 {
01299 cmt_node_set* and_node = (cmt_node_set*) or_root->top ();
01300
01301 if (or_root->nodes () == 1)
01302 {
01303 //
01304 // Check whether there is at least one non-empty
01305 // cmt_and_node
01306 //
01307 if (and_node->nodes () == 0)
01308 {
01309 delete or_root;
01310 return;
01311 }
01312 }
01313
01314 if (and_node != 0)
01315 {
01316 and_node->reduce ();
01317
01318 if (and_node->parentheses ())
01319 {
01320 delete or_root;
01321 return;
01322 }
01323 }
01324 }
01325
01326 _root = or_root;
01327 }
|
|
|
Definition at line 1329 of file cmt_regexp.cxx. 01330 {
01331 if (_root != 0)
01332 {
01333 delete _root;
01334 }
01335 }
|
|
|
Definition at line 1364 of file cmt_regexp.cxx. 01365 {
01366 if (_root != 0)
01367 {
01368 int i;
01369
01370 for (i = pos; i < text.size (); i++)
01371 {
01372 cmt_regexp::iterator it = _root->match (text, i);
01373 if (it != end ()) return (it);
01374 }
01375 }
01376
01377 return (end ());
01378 }
|
|
|
Definition at line 1343 of file cmt_regexp.cxx. Referenced by match(). 01344 {
01345 if (_root != 0)
01346 {
01347 int i;
01348
01349 for (i = pos; i < text.size (); i++)
01350 {
01351 cmt_regexp::iterator it = _root->match (text, i);
01352 if (it != end ()) return (it);
01353 }
01354 }
01355
01356 return (end ());
01357 }
|
|
|
Definition at line 1380 of file cmt_regexp.cxx. 01381 {
01382 return (cmt_regexp::iterator::null ());
01383 }
|
|
|
Definition at line 1359 of file cmt_regexp.cxx. Referenced by begin(), and match(). 01360 {
01361 return (cmt_regexp::iterator::null ());
01362 }
|
|
|
Definition at line 1337 of file cmt_regexp.cxx. 01338 {
01339 if (_root != 0) return (true);
01340 else return (false);
01341 }
|
|
|
Definition at line 1385 of file cmt_regexp.cxx. Referenced by CvsImplementation::filter_list(), CvsImplementation::match_version_request(), Parser::parse_line(), Cmt::print_macros(), Cmt::print_symbol_names(), PAwk::run(), and CmtSystem::scan_dir(). |
|
|
Definition at line 43 of file cmt_regexp.h. |
1.2.3 written by Dimitri van Heesch,
© 1997-2000