1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60 package org.jaxen.function;
61
62 import java.util.List;
63
64 import org.jaxen.Context;
65 import org.jaxen.Function;
66 import org.jaxen.FunctionCallException;
67 import org.jaxen.Navigator;
68 /***
69 * <p>
70 * <b>4.2</b>
71 * <code><i>string</i> substring(<i>string</i>,<i>number</i>,<i>number?</i>)</code>
72 * </p>
73 *
74 * <blockquote src="http://www.w3.org/TR/xpath">
75 * <p>The <b>substring</b> function returns the
76 * substring of the first argument starting at the position specified in
77 * the second argument with length specified in the third argument. For
78 * example,
79 *
80 * <code>substring("12345",2,3)</code> returns <code>"234"</code>.
81 * If the third argument is not specified, it returns the substring
82 * starting at the position specified in the second argument and
83 * continuing to the end of the string. For example,
84 * <code>substring("12345",2)</code> returns <code>"2345"</code>.
85 * </p>
86 *
87 * <p>
88 * More precisely, each character in the string (see <a
89 * href="http://www.w3.org/TR/xpath#strings">[<b>3.6 Strings</b>]</a>) is considered to have a
90 * numeric position: the position of the first character is 1, the
91 * position of the second character is 2 and so on.
92 * </p>
93 *
94 * <blockquote> <b>NOTE: </b>This differs from Java and ECMAScript, in
95 * which the <code>String.substring</code> method treats the position
96 * of the first character as 0.</blockquote>
97 *
98 * <p>
99 * The returned substring contains those characters for which the
100 * position of the character is greater than or equal to the rounded
101 * value of the second argument and, if the third argument is specified,
102 * less than the sum of the rounded value of the second argument and the
103 * rounded value of the third argument; the comparisons and addition
104 * used for the above follow the standard IEEE 754 rules; rounding is
105 * done as if by a call to the <b><a href="#function-round">round</a></b>
106 * function. The following examples illustrate various unusual cases:
107 * </p>
108 *
109 * <ul>
110 *
111 * <li>
112 * <p>
113 * <code>substring("12345", 1.5, 2.6)</code> returns
114 * <code>"234"</code>
115 * </p>
116 * </li>
117 *
118 * <li>
119 * <p>
120 * <code>substring("12345", 0, 3)</code> returns <code>"12"</code>
121 *
122 * </p>
123 * </li>
124 *
125 * <li>
126 * <p>
127 * <code>substring("12345", 0 div 0, 3)</code> returns <code>""</code>
128 * </p>
129 * </li>
130 *
131 * <li>
132 * <p>.
133 * <code>substring("12345", 1, 0 div 0)</code> returns
134 *
135 * <code>""</code>
136 * </p>
137 * </li>
138 *
139 * <li>
140 * <p>
141 * <code>substring("12345", -42, 1 div 0)</code> returns
142 * <code>"12345"</code>
143 * </p>
144 * </li>
145 *
146 * <li>
147 * <p>
148 *
149 * <code>substring("12345", -1 div 0, 1 div 0)</code> returns
150 * <code>""</code> </blockquote>
151 *
152 * @author bob mcwhirter (bob @ werken.com)
153 *
154 * @see <a href="http://www.w3.org/TR/xpath#function-substring"
155 * target="_top">Section 4.2 of the XPath Specification</a>
156 */
157 public class SubstringFunction implements Function
158 {
159
160 /***
161 * Create a new <code>SubstringFunction</code> object.
162 */
163 public SubstringFunction() {}
164
165
166 /*** Returns a substring of an XPath string-value by character index.
167 *
168 * @param context the context at the point in the
169 * expression when the function is called
170 * @param args a list that contains two or three items
171 *
172 * @return a <code>String</code> containing the specifed character subsequence of
173 * the original string or the string-value of the context node
174 *
175 * @throws FunctionCallException if <code>args</code> has more than three
176 * or less than two items
177 */
178 public Object call(Context context,
179 List args) throws FunctionCallException
180 {
181 final int argc = args.size();
182 if (argc < 2 || argc > 3){
183 throw new FunctionCallException( "substring() requires two or three arguments." );
184 }
185
186 final Navigator nav = context.getNavigator();
187
188 final String str = StringFunction.evaluate(args.get(0), nav );
189
190 if (str == null) {
191 return "";
192 }
193
194 final int stringLength = (StringLengthFunction.evaluate(args.get(0), nav )).intValue();
195
196 if (stringLength == 0) {
197 return "";
198 }
199
200 Double d1 = NumberFunction.evaluate(args.get(1), nav);
201
202 if (d1.isNaN()){
203 return "";
204 }
205
206 int start = RoundFunction.evaluate(d1, nav).intValue() - 1;
207
208 int substringLength = stringLength;
209 if (argc == 3){
210 Double d2 = NumberFunction.evaluate(args.get(2), nav);
211
212 if (!d2.isNaN()){
213 substringLength = RoundFunction.evaluate(d2, nav ).intValue();
214 }
215 else {
216 substringLength = 0;
217 }
218 }
219
220 if (substringLength < 0) return "";
221
222 int end = start + substringLength;
223 if (argc == 2) end = stringLength;
224
225
226 if ( start < 0){
227 start = 0;
228 }
229 else if (start > stringLength){
230 return "";
231 }
232
233 if (end > stringLength){
234 end = stringLength;
235 }
236 else if (end < start) return "";
237
238 if (stringLength == str.length()) {
239
240 return str.substring(start, end);
241 }
242 else {
243 return unicodeSubstring(str, start, end);
244 }
245
246 }
247
248 private static String unicodeSubstring(String s, int start, int end) {
249
250 StringBuffer result = new StringBuffer(s.length());
251 for (int jChar = 0, uChar=0; uChar < end; jChar++, uChar++) {
252 char c = s.charAt(jChar);
253 if (uChar >= start) result.append(c);
254 if (c >= 0xD800) {
255
256
257 jChar++;
258 if (uChar >= start) result.append(s.charAt(jChar));
259 }
260 }
261 return result.toString();
262 }
263 }