1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63 package org.jaxen.function;
64
65 import java.util.List;
66
67 import org.jaxen.Context;
68 import org.jaxen.Function;
69 import org.jaxen.FunctionCallException;
70 import org.jaxen.Navigator;
71
72 /***
73 * <p><b>4.2</b> <code><i>number</i> string-length(<i>string</i>)</code></p>
74 *
75 * <p>
76 * The <b>string-length</b> function returns the number of <strong>Unicode characters</strong>
77 * in its argument. This is <strong>not</strong> necessarily
78 * the same as the number <strong>Java chars</strong>
79 * in the corresponding Java string. In particular, if the Java <code>String</code>
80 * contains surrogate pairs each such pair will be counted as only one character
81 * by this function. If the argument is omitted,
82 * it returns the length of the string-value of the context node.
83 * </p>
84 *
85 * @author bob mcwhirter (bob @ werken.com)
86 * @see <a href="http://www.w3.org/TR/xpath#function-string-length" target="_top">Section
87 * 4.2 of the XPath Specification</a>
88 */
89 public class StringLengthFunction implements Function
90 {
91
92
93 /***
94 * Create a new <code>StringLengthFunction</code> object.
95 */
96 public StringLengthFunction() {}
97
98
99 /***
100 * <p>
101 * Returns the number of Unicode characters in the string-value of the argument.
102 * </p>
103 *
104 * @param context the context at the point in the
105 * expression when the function is called
106 * @param args a list containing the item whose string-value is to be counted.
107 * If empty, the length of the context node's string-value is returned.
108 *
109 * @return a <code>Double</code> giving the number of Unicode characters
110 *
111 * @throws FunctionCallException if args has more than one item
112 */
113 public Object call(Context context,
114 List args) throws FunctionCallException
115 {
116 if (args.size() == 0)
117 {
118 return evaluate( context.getNodeSet(),
119 context.getNavigator() );
120 }
121 else if (args.size() == 1)
122 {
123 return evaluate( args.get(0),
124 context.getNavigator() );
125 }
126
127 throw new FunctionCallException( "string-length() requires one argument." );
128 }
129
130 /***
131 * <p>
132 * Returns the number of Unicode characters in the string-value of
133 * an object.
134 * </p>
135 *
136 * @param obj the object whose string-value is counted
137 * @param nav used to calculate the string-values of the first two arguments
138 *
139 * @return a <code>Double</code> giving the number of Unicode characters
140 *
141 * @throws FunctionCallException if the string contains mismatched surrogates
142 */
143 public static Double evaluate(Object obj, Navigator nav) throws FunctionCallException
144 {
145
146
147 String str = StringFunction.evaluate( obj, nav );
148
149 char[] data = str.toCharArray();
150 int length = 0;
151 for (int i = 0; i < data.length; i++) {
152 char c = data[i];
153 length++;
154
155
156 if (c >= 0xD800) {
157 try {
158 char low = data[i+1];
159 if (low < 0xDC00 || low > 0xDFFF) {
160 throw new FunctionCallException("Bad surrogate pair in string " + str);
161 }
162 i++;
163 }
164 catch (ArrayIndexOutOfBoundsException ex) {
165 throw new FunctionCallException("Bad surrogate pair in string " + str);
166 }
167 }
168 }
169 return new Double(length);
170 }
171
172 }