1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63 package org.jaxen.function;
64
65 import java.util.HashMap;
66 import java.util.List;
67 import java.util.Map;
68
69 import org.jaxen.Context;
70 import org.jaxen.Function;
71 import org.jaxen.FunctionCallException;
72 import org.jaxen.Navigator;
73
74 /***
75 * <p>
76 * <b>4.2</b>
77 * <code><i>string</i> translate(<i>string</i>,<i>string</i>,<i>string</i>)</code>
78 * </p>
79 *
80 * <blockquote src="http://www.w3.org/TR/xpath#function-translate">
81 * <p>
82 * The <b><a href="http://www.w3.org/TR/xpath#function-translate">translate</a></b> function
83 * returns the first argument string with occurrences of characters in
84 * the second argument string replaced by the character at the
85 * corresponding position in the third argument string. For example,
86 * <code>translate("bar","abc","ABC")</code> returns the string
87 * <code>BAr</code>. If there is a character in the second argument
88 * string with no character at a corresponding position in the third
89 * argument string (because the second argument string is longer than
90 * the third argument string), then occurrences of that character in the
91 * first argument string are removed. For example,
92 * <code>translate("--aaa--","abc-","ABC")</code> returns
93 * <code>"AAA"</code>. If a character occurs more than once in the
94 * second argument string, then the first occurrence determines the
95 * replacement character. If the third argument string is longer than
96 * the second argument string, then excess characters are ignored.
97 * </p>
98 *
99 * <blockquote> <b>NOTE: </b>The <b>translate</b> function is not a
100 * sufficient solution for case conversion in all languages. A future
101 * version of XPath may provide additional functions for case
102 * conversion.</blockquote>
103 *
104 * </blockquote>
105 *
106 * @author Jan Dvorak ( jan.dvorak @ mathan.cz )
107 *
108 * @see <a href="http://www.w3.org/TR/xpath#function-translate"
109 * target="_top">Section 4.2 of the XPath Specification</a>
110 */
111 public class TranslateFunction implements Function
112 {
113
114
115
116
117
118 /***
119 * Create a new <code>TranslateFunction</code> object.
120 */
121 public TranslateFunction() {}
122
123
124 /*** Returns a copy of the first argument in which
125 * characters found in the second argument are replaced by
126 * corresponding characters from the third argument.
127 *
128 * @param context the context at the point in the
129 * expression when the function is called
130 * @param args a list that contains exactly three items
131 *
132 * @return a <code>String</code> built from <code>args.get(0)</code>
133 * in which occurrences of characters in <code>args.get(1)</code>
134 * are replaced by the corresponding characters in <code>args.get(2)</code>
135 *
136 * @throws FunctionCallException if <code>args</code> does not have exactly three items
137 */
138 public Object call(Context context,
139 List args) throws FunctionCallException
140 {
141 if (args.size() == 3) {
142 return evaluate( args.get(0),
143 args.get(1),
144 args.get(2),
145 context.getNavigator() );
146 }
147
148 throw new FunctionCallException( "translate() requires three arguments." );
149 }
150
151 /***
152 * Returns a copy of <code>strArg</code> in which
153 * characters found in <code>fromArg</code> are replaced by
154 * corresponding characters from <code>toArg</code>.
155 * If necessary each argument is first converted to it string-value
156 * as if by the XPath <code>string()</code> function.
157 *
158 * @param strArg the base string
159 * @param fromArg the characters to be replaced
160 * @param toArg the characters they will be replaced by
161 * @param nav the <code>Navigator</code> used to calculate the string-values of the arguments.
162 *
163 * @return a copy of <code>strArg</code> in which
164 * characters found in <code>fromArg</code> are replaced by
165 * corresponding characters from <code>toArg</code>
166 *
167 * @throws FunctionCallException if one of the arguments is a malformed Unicode string;
168 * that is, if surrogate characters don't line up properly
169 *
170 */
171 public static String evaluate(Object strArg,
172 Object fromArg,
173 Object toArg,
174 Navigator nav) throws FunctionCallException
175 {
176 String inStr = StringFunction.evaluate( strArg, nav );
177 String fromStr = StringFunction.evaluate( fromArg, nav );
178 String toStr = StringFunction.evaluate( toArg, nav );
179
180
181 Map characterMap = new HashMap();
182 String[] fromCharacters = toUnicodeCharacters(fromStr);
183 String[] toCharacters = toUnicodeCharacters(toStr);
184 int fromLen = fromCharacters.length;
185 int toLen = toCharacters.length;
186 for ( int i = 0; i < fromLen; i++ ) {
187 String cFrom = fromCharacters[i];
188 if ( characterMap.containsKey( cFrom ) ) {
189
190 continue;
191 }
192
193 if ( i < toLen ) {
194
195 characterMap.put( cFrom, toCharacters[i] );
196 }
197 else {
198
199 characterMap.put( cFrom, null );
200 }
201 }
202
203
204 StringBuffer outStr = new StringBuffer( inStr.length() );
205 String[] inCharacters = toUnicodeCharacters(inStr);
206 int inLen = inCharacters.length;
207 for ( int i = 0; i < inLen; i++ ) {
208 String cIn = inCharacters[i];
209 if ( characterMap.containsKey( cIn ) ) {
210 String cTo = (String) characterMap.get( cIn );
211 if ( cTo != null ) {
212 outStr.append( cTo );
213 }
214 }
215 else {
216 outStr.append( cIn );
217 }
218 }
219
220 return outStr.toString();
221 }
222
223 private static String[] toUnicodeCharacters(String s) throws FunctionCallException {
224
225 String[] result = new String[s.length()];
226 int stringLength = 0;
227 for (int i = 0; i < s.length(); i++) {
228 char c1 = s.charAt(i);
229 if (isHighSurrogate(c1)) {
230 try {
231 char c2 = s.charAt(i+1);
232 if (isLowSurrogate(c2)) {
233 result[stringLength] = (c1 + "" + c2).intern();
234 i++;
235 }
236 else {
237 throw new FunctionCallException("Mismatched surrogate pair in translate function");
238 }
239 }
240 catch (StringIndexOutOfBoundsException ex) {
241 throw new FunctionCallException("High surrogate without low surrogate at end of string passed to translate function");
242 }
243 }
244 else {
245 result[stringLength]=String.valueOf(c1).intern();
246 }
247 stringLength++;
248 }
249
250 if (stringLength == result.length) return result;
251
252
253 String[] trimmed = new String[stringLength];
254 System.arraycopy(result, 0, trimmed, 0, stringLength);
255 return trimmed;
256
257 }
258
259 private static boolean isHighSurrogate(char c) {
260 return c >= 0xD800 && c <= 0xDBFF;
261 }
262
263 private static boolean isLowSurrogate(char c) {
264 return c >= 0xDC00 && c <= 0xDFFF;
265 }
266
267 }