1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65 package org.jaxen.saxpath.base;
66
67 class XPathLexer
68 {
69 private String xpath;
70 private int currentPosition;
71 private int endPosition;
72
73 private Token previousToken;
74
75 XPathLexer(String xpath)
76 {
77 setXPath( xpath );
78 }
79
80 private void setXPath(String xpath)
81 {
82 this.xpath = xpath;
83 this.currentPosition = 0;
84 this.endPosition = xpath.length();
85 }
86
87 String getXPath()
88 {
89 return this.xpath;
90 }
91
92 Token nextToken()
93 {
94 Token token = null;
95
96 do
97 {
98 token = null;
99
100 switch ( LA(1) )
101 {
102 case '$':
103 {
104 token = dollar();
105 break;
106 }
107
108 case '"':
109 case '\'':
110 {
111 token = literal();
112 break;
113 }
114
115 case '/':
116 {
117 token = slashes();
118 break;
119 }
120
121 case ',':
122 {
123 token = comma();
124 break;
125 }
126
127 case '(':
128 {
129 token = leftParen();
130 break;
131 }
132
133 case ')':
134 {
135 token = rightParen();
136 break;
137 }
138
139 case '[':
140 {
141 token = leftBracket();
142 break;
143 }
144
145 case ']':
146 {
147 token = rightBracket();
148 break;
149 }
150
151 case '+':
152 {
153 token = plus();
154 break;
155 }
156
157 case '-':
158 {
159 token = minus();
160 break;
161 }
162
163 case '<':
164 case '>':
165 {
166 token = relationalOperator();
167 break;
168 }
169
170 case '=':
171 {
172 token = equals();
173 break;
174 }
175
176 case '!':
177 {
178 if ( LA(2) == '=' )
179 {
180 token = notEquals();
181 }
182 else
183 {
184 token = not();
185 }
186 break;
187 }
188
189 case '|':
190 {
191 token = pipe();
192 break;
193 }
194
195 case '@':
196 {
197 token = at();
198 break;
199 }
200
201 case ':':
202 {
203 if ( LA(2) == ':' )
204 {
205 token = doubleColon();
206 }
207 else
208 {
209 token = colon();
210 }
211 break;
212 }
213
214 case '*':
215 {
216 token = star();
217 break;
218 }
219
220 case '.':
221 {
222 switch ( LA(2) )
223 {
224 case '0':
225 case '1':
226 case '2':
227 case '3':
228 case '4':
229 case '5':
230 case '6':
231 case '7':
232 case '8':
233 case '9':
234 {
235 token = number();
236 break;
237 }
238 default:
239 {
240 token = dots();
241 break;
242 }
243 }
244 break;
245 }
246
247 case '0':
248 case '1':
249 case '2':
250 case '3':
251 case '4':
252 case '5':
253 case '6':
254 case '7':
255 case '8':
256 case '9':
257 {
258 token = number();
259 break;
260 }
261
262 case ' ':
263 case '\t':
264 case '\n':
265 case '\r':
266 {
267 token = whitespace();
268 break;
269 }
270
271 default:
272 {
273 if ( isIdentifierStartChar( LA(1) ) )
274 {
275 token = identifierOrOperatorName();
276 }
277 }
278 }
279
280 if ( token == null )
281 {
282 if (!hasMoreChars())
283 {
284 token = new Token( TokenTypes.EOF,
285 getXPath(),
286 currentPosition(),
287 endPosition() );
288 }
289 else
290 {
291 token = new Token( TokenTypes.ERROR,
292 getXPath(),
293 currentPosition(),
294 endPosition() );
295 }
296 }
297
298 }
299 while ( token.getTokenType() == TokenTypes.SKIP );
300
301 setPreviousToken( token );
302
303 return token;
304 }
305
306 private Token identifierOrOperatorName()
307 {
308 Token token = null;
309
310 if ( previousToken != null )
311 {
312
313
314
315
316
317
318
319
320
321
322
323 switch ( previousToken.getTokenType() )
324 {
325 case TokenTypes.AT:
326 case TokenTypes.DOUBLE_COLON:
327 case TokenTypes.LEFT_PAREN:
328 case TokenTypes.LEFT_BRACKET:
329 case TokenTypes.AND:
330 case TokenTypes.OR:
331 case TokenTypes.MOD:
332 case TokenTypes.DIV:
333 case TokenTypes.COLON:
334 case TokenTypes.SLASH:
335 case TokenTypes.DOUBLE_SLASH:
336 case TokenTypes.PIPE:
337 case TokenTypes.DOLLAR:
338 case TokenTypes.PLUS:
339 case TokenTypes.MINUS:
340 case TokenTypes.STAR:
341 case TokenTypes.COMMA:
342 case TokenTypes.LESS_THAN_SIGN:
343 case TokenTypes.GREATER_THAN_SIGN:
344 case TokenTypes.LESS_THAN_OR_EQUALS_SIGN:
345 case TokenTypes.GREATER_THAN_OR_EQUALS_SIGN:
346 case TokenTypes.EQUALS:
347 case TokenTypes.NOT_EQUALS:
348 {
349 token = identifier();
350 break;
351 }
352 default:
353 {
354 token = operatorName();
355 break;
356 }
357 }
358 }
359 else
360 {
361 token = identifier();
362 }
363
364 return token;
365 }
366
367 private Token identifier()
368 {
369 Token token = null;
370
371 int start = currentPosition();
372
373 while ( hasMoreChars() )
374 {
375 if ( isIdentifierChar( LA(1) ) )
376 {
377 consume();
378 }
379 else
380 {
381 break;
382 }
383 }
384
385 token = new Token( TokenTypes.IDENTIFIER,
386 getXPath(),
387 start,
388 currentPosition() );
389
390 return token;
391 }
392
393 private Token operatorName()
394 {
395 Token token = null;
396
397 switch ( LA(1) )
398 {
399 case 'a':
400 {
401 token = and();
402 break;
403 }
404
405 case 'o':
406 {
407 token = or();
408 break;
409 }
410
411 case 'm':
412 {
413 token = mod();
414 break;
415 }
416
417 case 'd':
418 {
419 token = div();
420 break;
421 }
422 }
423
424 return token;
425 }
426
427 private Token mod()
428 {
429 Token token = null;
430
431 if ( ( LA(1) == 'm' )
432 &&
433 ( LA(2) == 'o' )
434 &&
435 ( LA(3) == 'd' )
436 &&
437 ( ! isIdentifierChar( LA(4) ) ) )
438 {
439 token = new Token( TokenTypes.MOD,
440 getXPath(),
441 currentPosition(),
442 currentPosition()+3 );
443
444 consume();
445 consume();
446 consume();
447 }
448
449 return token;
450 }
451
452 private Token div()
453 {
454 Token token = null;
455
456 if ( ( LA(1) == 'd' )
457 &&
458 ( LA(2) == 'i' )
459 &&
460 ( LA(3) == 'v' )
461 &&
462 ( ! isIdentifierChar( LA(4) ) ) )
463 {
464 token = new Token( TokenTypes.DIV,
465 getXPath(),
466 currentPosition(),
467 currentPosition()+3 );
468
469 consume();
470 consume();
471 consume();
472 }
473
474 return token;
475 }
476
477 private Token and()
478 {
479 Token token = null;
480
481 if ( ( LA(1) == 'a' )
482 &&
483 ( LA(2) == 'n' )
484 &&
485 ( LA(3) == 'd' )
486 &&
487 ( ! isIdentifierChar( LA(4) ) ) )
488 {
489 token = new Token( TokenTypes.AND,
490 getXPath(),
491 currentPosition(),
492 currentPosition()+3 );
493
494 consume();
495 consume();
496 consume();
497 }
498
499 return token;
500 }
501
502 private Token or()
503 {
504 Token token = null;
505
506 if ( ( LA(1) == 'o' )
507 &&
508 ( LA(2) == 'r' )
509 &&
510 ( ! isIdentifierChar( LA(3) ) ) )
511 {
512 token = new Token( TokenTypes.OR,
513 getXPath(),
514 currentPosition(),
515 currentPosition()+2 );
516
517 consume();
518 consume();
519 }
520
521 return token;
522 }
523
524 private Token number()
525 {
526 int start = currentPosition();
527 boolean periodAllowed = true;
528
529 loop:
530 while( true )
531 {
532 switch ( LA(1) )
533 {
534 case '.':
535 {
536 if ( periodAllowed )
537 {
538 periodAllowed = false;
539 consume();
540 }
541 else
542 {
543 break loop;
544 }
545 break;
546 }
547
548 case '0':
549 case '1':
550 case '2':
551 case '3':
552 case '4':
553 case '5':
554 case '6':
555 case '7':
556 case '8':
557 case '9':
558 {
559 consume();
560 break;
561 }
562 default:
563 {
564 break loop;
565 }
566 }
567 }
568
569 Token token = null;
570
571 if ( periodAllowed )
572 {
573 token = new Token( TokenTypes.INTEGER,
574 getXPath(),
575 start,
576 currentPosition() );
577 }
578 else
579 {
580 token = new Token( TokenTypes.DOUBLE,
581 getXPath(),
582 start,
583 currentPosition() );
584 }
585
586 return token;
587 }
588
589 private Token whitespace()
590 {
591 consume();
592
593 loop:
594 while( hasMoreChars() )
595 {
596 switch ( LA(1) )
597 {
598 case ' ':
599 case '\t':
600 case '\n':
601 case '\r':
602 {
603 consume();
604 break;
605 }
606
607 default:
608 {
609 break loop;
610 }
611 }
612 }
613
614 return new Token( TokenTypes.SKIP,
615 getXPath(),
616 0,
617 0 );
618 }
619
620 private Token comma()
621 {
622 Token token = new Token( TokenTypes.COMMA,
623 getXPath(),
624 currentPosition(),
625 currentPosition()+1 );
626
627 consume();
628
629 return token;
630 }
631
632 private Token equals()
633 {
634 Token token = new Token( TokenTypes.EQUALS,
635 getXPath(),
636 currentPosition(),
637 currentPosition()+1 );
638
639 consume();
640
641 return token;
642 }
643
644 private Token minus()
645 {
646 Token token = new Token( TokenTypes.MINUS,
647 getXPath(),
648 currentPosition(),
649 currentPosition()+1 );
650 consume();
651
652 return token;
653 }
654
655 private Token plus()
656 {
657 Token token = new Token( TokenTypes.PLUS,
658 getXPath(),
659 currentPosition(),
660 currentPosition()+1 );
661 consume();
662
663 return token;
664 }
665
666 private Token dollar()
667 {
668 Token token = new Token( TokenTypes.DOLLAR,
669 getXPath(),
670 currentPosition(),
671 currentPosition()+1 );
672 consume();
673
674 return token;
675 }
676
677 private Token pipe()
678 {
679 Token token = new Token( TokenTypes.PIPE,
680 getXPath(),
681 currentPosition(),
682 currentPosition()+1 );
683
684 consume();
685
686 return token;
687 }
688
689 private Token at()
690 {
691 Token token = new Token( TokenTypes.AT,
692 getXPath(),
693 currentPosition(),
694 currentPosition()+1 );
695
696 consume();
697
698 return token;
699 }
700
701 private Token colon()
702 {
703 Token token = new Token( TokenTypes.COLON,
704 getXPath(),
705 currentPosition(),
706 currentPosition()+1 );
707 consume();
708
709 return token;
710 }
711
712 private Token doubleColon()
713 {
714 Token token = new Token( TokenTypes.DOUBLE_COLON,
715 getXPath(),
716 currentPosition(),
717 currentPosition()+2 );
718
719 consume();
720 consume();
721
722 return token;
723 }
724
725 private Token not()
726 {
727 Token token = new Token( TokenTypes.NOT,
728 getXPath(),
729 currentPosition(),
730 currentPosition() + 1 );
731
732 consume();
733
734 return token;
735 }
736
737 private Token notEquals()
738 {
739 Token token = new Token( TokenTypes.NOT_EQUALS,
740 getXPath(),
741 currentPosition(),
742 currentPosition() + 2 );
743
744 consume();
745 consume();
746
747 return token;
748 }
749
750 private Token relationalOperator()
751 {
752 Token token = null;
753
754 switch ( LA(1) )
755 {
756 case '<':
757 {
758 if ( LA(2) == '=' )
759 {
760 token = new Token( TokenTypes.LESS_THAN_OR_EQUALS_SIGN,
761 getXPath(),
762 currentPosition(),
763 currentPosition() + 2 );
764 consume();
765 }
766 else
767 {
768 token = new Token( TokenTypes.LESS_THAN_SIGN,
769 getXPath(),
770 currentPosition(),
771 currentPosition() + 1);
772 }
773
774 consume();
775 break;
776 }
777 case '>':
778 {
779 if ( LA(2) == '=' )
780 {
781 token = new Token( TokenTypes.GREATER_THAN_OR_EQUALS_SIGN,
782 getXPath(),
783 currentPosition(),
784 currentPosition() + 2 );
785 consume();
786 }
787 else
788 {
789 token = new Token( TokenTypes.GREATER_THAN_SIGN,
790 getXPath(),
791 currentPosition(),
792 currentPosition() + 1 );
793 }
794
795 consume();
796 break;
797 }
798 }
799
800 return token;
801
802 }
803
804 private Token star()
805 {
806 Token token = new Token( TokenTypes.STAR,
807 getXPath(),
808 currentPosition(),
809 currentPosition()+1 );
810
811 consume();
812
813 return token;
814 }
815
816 private Token literal()
817 {
818 Token token = null;
819
820 char match = LA(1);
821
822 consume();
823
824 int start = currentPosition();
825
826 while ( ( token == null )
827 &&
828 hasMoreChars() )
829 {
830 if ( LA(1) == match )
831 {
832 token = new Token( TokenTypes.LITERAL,
833 getXPath(),
834 start,
835 currentPosition() );
836 }
837 consume();
838 }
839
840 return token;
841 }
842
843 private Token dots()
844 {
845 Token token = null;
846
847 switch ( LA(2) )
848 {
849 case '.':
850 {
851 token = new Token( TokenTypes.DOT_DOT,
852 getXPath(),
853 currentPosition(),
854 currentPosition()+2 ) ;
855 consume();
856 consume();
857 break;
858 }
859 default:
860 {
861 token = new Token( TokenTypes.DOT,
862 getXPath(),
863 currentPosition(),
864 currentPosition()+1 );
865 consume();
866 break;
867 }
868 }
869
870 return token;
871 }
872
873 private Token leftBracket()
874 {
875 Token token = new Token( TokenTypes.LEFT_BRACKET,
876 getXPath(),
877 currentPosition(),
878 currentPosition()+1 );
879
880 consume();
881
882 return token;
883 }
884
885 private Token rightBracket()
886 {
887 Token token = new Token( TokenTypes.RIGHT_BRACKET,
888 getXPath(),
889 currentPosition(),
890 currentPosition()+1 );
891
892 consume();
893
894 return token;
895 }
896
897 private Token leftParen()
898 {
899 Token token = new Token( TokenTypes.LEFT_PAREN,
900 getXPath(),
901 currentPosition(),
902 currentPosition()+1 );
903
904 consume();
905
906 return token;
907 }
908
909 private Token rightParen()
910 {
911 Token token = new Token( TokenTypes.RIGHT_PAREN,
912 getXPath(),
913 currentPosition(),
914 currentPosition()+1 );
915
916 consume();
917
918 return token;
919 }
920
921 private Token slashes()
922 {
923 Token token = null;
924
925 switch ( LA(2) )
926 {
927 case '/':
928 {
929 token = new Token( TokenTypes.DOUBLE_SLASH,
930 getXPath(),
931 currentPosition(),
932 currentPosition()+2 );
933 consume();
934 consume();
935 break;
936 }
937 default:
938 {
939 token = new Token( TokenTypes.SLASH,
940 getXPath(),
941 currentPosition(),
942 currentPosition()+1 );
943 consume();
944 }
945 }
946
947 return token;
948 }
949
950 private char LA(int i)
951 {
952 if ( currentPosition + ( i - 1 ) >= endPosition() )
953 {
954 return (char) -1;
955 }
956
957 return getXPath().charAt( currentPosition() + (i - 1) );
958 }
959
960 private void consume()
961 {
962 ++this.currentPosition;
963 }
964
965 private int currentPosition()
966 {
967 return this.currentPosition;
968 }
969
970 private int endPosition()
971 {
972 return this.endPosition;
973 }
974
975 private void setPreviousToken(Token previousToken)
976 {
977 this.previousToken = previousToken;
978 }
979
980 private boolean hasMoreChars()
981 {
982 return currentPosition() < endPosition();
983 }
984
985 private boolean isIdentifierChar(char c)
986 {
987 return Verifier.isXMLNCNameCharacter( c );
988 }
989
990 private boolean isIdentifierStartChar(char c)
991 {
992 return Verifier.isXMLNCNameStartCharacter( c );
993 }
994
995 }