1 /* 2 * $Header$ 3 * $Revision$ 4 * $Date$ 5 * 6 * ==================================================================== 7 * 8 * Copyright 2000-2002 bob mcwhirter & James Strachan. 9 * All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions are 13 * met: 14 * 15 * * Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 18 * * Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * * Neither the name of the Jaxen Project nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 27 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 29 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 30 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 31 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 32 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 33 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 34 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 35 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 36 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 37 * 38 * ==================================================================== 39 * This software consists of voluntary contributions made by many 40 * individuals on behalf of the Jaxen Project and was originally 41 * created by bob mcwhirter <bob@werken.com> and 42 * James Strachan <jstrachan@apache.org>. For more information on the 43 * Jaxen Project, please see <http://www.jaxen.org/>. 44 * 45 * $Id$ 46 */ 47 48 49 package org.jaxen.function; 50 51 import java.util.HashMap; 52 import java.util.List; 53 import java.util.Map; 54 55 import org.jaxen.Context; 56 import org.jaxen.Function; 57 import org.jaxen.FunctionCallException; 58 import org.jaxen.Navigator; 59 60 /** 61 * <p> 62 * <b>4.2</b> 63 * <code><i>string</i> translate(<i>string</i>,<i>string</i>,<i>string</i>)</code> 64 * </p> 65 * 66 * <blockquote cite="http://www.w3.org/TR/xpath#function-translate"> 67 * <p> 68 * The <b><a href="https://www.w3.org/TR/xpath#function-translate">translate</a></b> function 69 * returns the first argument string with occurrences of characters in 70 * the second argument string replaced by the character at the 71 * corresponding position in the third argument string. For example, 72 * <code>translate("bar","abc","ABC")</code> returns the string 73 * <code>BAr</code>. If there is a character in the second argument 74 * string with no character at a corresponding position in the third 75 * argument string (because the second argument string is longer than 76 * the third argument string), then occurrences of that character in the 77 * first argument string are removed. For example, 78 * <code>translate("--aaa--","abc-","ABC")</code> returns 79 * <code>"AAA"</code>. If a character occurs more than once in the 80 * second argument string, then the first occurrence determines the 81 * replacement character. If the third argument string is longer than 82 * the second argument string, then excess characters are ignored. 83 * </p> 84 * 85 * <blockquote> <b>NOTE: </b>The <b>translate</b> function is not a 86 * sufficient solution for case conversion in all languages. A future 87 * version of XPath may provide additional functions for case 88 * conversion.</blockquote> 89 * 90 * </blockquote> 91 * 92 * @author Jan Dvorak ( jan.dvorak @ mathan.cz ) 93 * 94 * @see <a href="https://www.w3.org/TR/xpath#function-translate" 95 * target="_top">Section 4.2 of the XPath Specification</a> 96 */ 97 public class TranslateFunction implements Function 98 { 99 100 /* The translation is done thru a HashMap. Performance tip (for anyone 101 * who needs to improve the performance of this particular function): 102 * Cache the HashMaps, once they are constructed. */ 103 104 /** 105 * Create a new <code>TranslateFunction</code> object. 106 */ 107 public TranslateFunction() {} 108 109 110 /** Returns a copy of the first argument in which 111 * characters found in the second argument are replaced by 112 * corresponding characters from the third argument. 113 * 114 * @param context the context at the point in the 115 * expression when the function is called 116 * @param args a list that contains exactly three items 117 * 118 * @return a <code>String</code> built from <code>args.get(0)</code> 119 * in which occurrences of characters in <code>args.get(1)</code> 120 * are replaced by the corresponding characters in <code>args.get(2)</code> 121 * 122 * @throws FunctionCallException if <code>args</code> does not have exactly three items 123 */ 124 public Object call(Context context, 125 List args) throws FunctionCallException 126 { 127 if (args.size() == 3) { 128 return evaluate( args.get(0), 129 args.get(1), 130 args.get(2), 131 context.getNavigator() ); 132 } 133 134 throw new FunctionCallException( "translate() requires three arguments." ); 135 } 136 137 /** 138 * Returns a copy of <code>strArg</code> in which 139 * characters found in <code>fromArg</code> are replaced by 140 * corresponding characters from <code>toArg</code>. 141 * If necessary each argument is first converted to it string-value 142 * as if by the XPath <code>string()</code> function. 143 * 144 * @param strArg the base string 145 * @param fromArg the characters to be replaced 146 * @param toArg the characters they will be replaced by 147 * @param nav the <code>Navigator</code> used to calculate the string-values of the arguments. 148 * 149 * @return a copy of <code>strArg</code> in which 150 * characters found in <code>fromArg</code> are replaced by 151 * corresponding characters from <code>toArg</code> 152 * 153 * @throws FunctionCallException if one of the arguments is a malformed Unicode string; 154 * that is, if surrogate characters don't line up properly 155 * 156 */ 157 public static String evaluate(Object strArg, 158 Object fromArg, 159 Object toArg, 160 Navigator nav) throws FunctionCallException 161 { 162 String inStr = StringFunction.evaluate( strArg, nav ); 163 String fromStr = StringFunction.evaluate( fromArg, nav ); 164 String toStr = StringFunction.evaluate( toArg, nav ); 165 166 // Initialize the mapping in a HashMap 167 Map characterMap = new HashMap(); 168 String[] fromCharacters = toUnicodeCharacters(fromStr); 169 String[] toCharacters = toUnicodeCharacters(toStr); 170 int fromLen = fromCharacters.length; 171 int toLen = toCharacters.length; 172 for ( int i = 0; i < fromLen; i++ ) { 173 String cFrom = fromCharacters[i]; 174 if ( characterMap.containsKey( cFrom ) ) { 175 // We've seen the character before, ignore 176 continue; 177 } 178 179 if ( i < toLen ) { 180 // Will change 181 characterMap.put( cFrom, toCharacters[i] ); 182 } 183 else { 184 // Will delete 185 characterMap.put( cFrom, null ); 186 } 187 } 188 189 // Process the input string thru the map 190 StringBuffer outStr = new StringBuffer( inStr.length() ); 191 String[] inCharacters = toUnicodeCharacters(inStr); 192 int inLen = inCharacters.length; 193 for ( int i = 0; i < inLen; i++ ) { 194 String cIn = inCharacters[i]; 195 if ( characterMap.containsKey( cIn ) ) { 196 String cTo = (String) characterMap.get( cIn ); 197 if ( cTo != null ) { 198 outStr.append( cTo ); 199 } 200 } 201 else { 202 outStr.append( cIn ); 203 } 204 } 205 206 return outStr.toString(); 207 } 208 209 private static String[] toUnicodeCharacters(String s) throws FunctionCallException { 210 211 String[] result = new String[s.length()]; 212 int stringLength = 0; 213 for (int i = 0; i < s.length(); i++) { 214 char c1 = s.charAt(i); 215 if (isHighSurrogate(c1)) { 216 try { 217 char c2 = s.charAt(i+1); 218 if (isLowSurrogate(c2)) { 219 result[stringLength] = (c1 + "" + c2).intern(); 220 i++; 221 } 222 else { 223 throw new FunctionCallException("Mismatched surrogate pair in translate function"); 224 } 225 } 226 catch (StringIndexOutOfBoundsException ex) { 227 throw new FunctionCallException("High surrogate without low surrogate at end of string passed to translate function"); 228 } 229 } 230 else { 231 result[stringLength]=String.valueOf(c1).intern(); 232 } 233 stringLength++; 234 } 235 236 if (stringLength == result.length) return result; 237 238 // trim array 239 String[] trimmed = new String[stringLength]; 240 System.arraycopy(result, 0, trimmed, 0, stringLength); 241 return trimmed; 242 243 } 244 245 private static boolean isHighSurrogate(char c) { 246 return c >= 0xD800 && c <= 0xDBFF; 247 } 248 249 private static boolean isLowSurrogate(char c) { 250 return c >= 0xDC00 && c <= 0xDFFF; 251 } 252 253 }