View Javadoc

1   /*
2    * $Header: /home/projects/jaxen/scm/jaxen/src/java/main/org/jaxen/function/TranslateFunction.java,v 1.8 2005/06/26 16:07:22 elharo Exp $
3    * $Revision: 1.8 $
4    * $Date: 2005/06/26 16:07:22 $
5    *
6    * ====================================================================
7    *
8    * Copyright (C) 2000-2002 bob mcwhirter & James Strachan.
9    * All rights reserved.
10   *
11   * Redistribution and use in source and binary forms, with or without
12   * modification, are permitted provided that the following conditions
13   * are met:
14   * 
15   * 1. Redistributions of source code must retain the above copyright
16   *    notice, this list of conditions, and the following disclaimer.
17   *
18   * 2. Redistributions in binary form must reproduce the above copyright
19   *    notice, this list of conditions, and the disclaimer that follows 
20   *    these conditions in the documentation and/or other materials 
21   *    provided with the distribution.
22   *
23   * 3. The name "Jaxen" must not be used to endorse or promote products
24   *    derived from this software without prior written permission.  For
25   *    written permission, please contact license@jaxen.org.
26   * 
27   * 4. Products derived from this software may not be called "Jaxen", nor
28   *    may "Jaxen" appear in their name, without prior written permission
29   *    from the Jaxen Project Management (pm@jaxen.org).
30   * 
31   * In addition, we request (but do not require) that you include in the 
32   * end-user documentation provided with the redistribution and/or in the 
33   * software itself an acknowledgement equivalent to the following:
34   *     "This product includes software developed by the
35   *      Jaxen Project (http://www.jaxen.org/)."
36   * Alternatively, the acknowledgment may be graphical using the logos 
37   * available at http://www.jaxen.org/
38   *
39   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
40   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
41   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
42   * DISCLAIMED.  IN NO EVENT SHALL THE Jaxen AUTHORS OR THE PROJECT
43   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
45   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
46   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
47   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
48   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
49   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50   * SUCH DAMAGE.
51   *
52   * ====================================================================
53   * This software consists of voluntary contributions made by many 
54   * individuals on behalf of the Jaxen Project and was originally 
55   * created by bob mcwhirter <bob@werken.com> and 
56   * James Strachan <jstrachan@apache.org>.  For more information on the 
57   * Jaxen Project, please see <http://www.jaxen.org/>.
58   * 
59   * $Id: TranslateFunction.java,v 1.8 2005/06/26 16:07:22 elharo Exp $
60   */
61  
62  
63  package org.jaxen.function;
64  
65  import java.util.HashMap;
66  import java.util.List;
67  import java.util.Map;
68  
69  import org.jaxen.Context;
70  import org.jaxen.Function;
71  import org.jaxen.FunctionCallException;
72  import org.jaxen.Navigator;
73  
74  /***
75   * <p>
76   * <b>4.2</b>
77   * <code><i>string</i> translate(<i>string</i>,<i>string</i>,<i>string</i>)</code>
78   * </p>
79   * 
80   * <blockquote src="http://www.w3.org/TR/xpath#function-translate">
81   * <p>
82   * The <b><a href="http://www.w3.org/TR/xpath#function-translate">translate</a></b> function
83   * returns the first argument string with occurrences of characters in
84   * the second argument string replaced by the character at the
85   * corresponding position in the third argument string. For example,
86   * <code>translate("bar","abc","ABC")</code> returns the string
87   * <code>BAr</code>. If there is a character in the second argument
88   * string with no character at a corresponding position in the third
89   * argument string (because the second argument string is longer than
90   * the third argument string), then occurrences of that character in the
91   * first argument string are removed. For example,
92   * <code>translate("--aaa--","abc-","ABC")</code> returns
93   * <code>"AAA"</code>. If a character occurs more than once in the
94   * second argument string, then the first occurrence determines the
95   * replacement character. If the third argument string is longer than
96   * the second argument string, then excess characters are ignored.
97   * </p>
98   * 
99   * <blockquote> <b>NOTE: </b>The <b>translate</b> function is not a
100  * sufficient solution for case conversion in all languages. A future
101  * version of XPath may provide additional functions for case
102  * conversion.</blockquote>
103  * 
104  * </blockquote>
105  * 
106  * @author Jan Dvorak ( jan.dvorak @ mathan.cz )
107  * 
108  * @see <a href="http://www.w3.org/TR/xpath#function-translate"
109  *      target="_top">Section 4.2 of the XPath Specification</a>
110  */
111 public class TranslateFunction implements Function
112 {
113 
114      /* The translation is done thru a HashMap. Performance tip (for anyone
115       * who needs to improve the performance of this particular function):
116       * Cache the HashMaps, once they are constructed. */
117     
118     /***
119      * Create a new <code>TranslateFunction</code> object.
120      */
121     public TranslateFunction() {}
122     
123     
124     /*** Returns a copy of the first argument in which
125      * characters found in the second argument are replaced by
126      * corresponding characters from the third argument.
127      *
128      * @param context the context at the point in the
129      *         expression when the function is called
130      * @param args a list that contains exactly three items
131      * 
132      * @return a <code>String</code> built from <code>args.get(0)</code> 
133      *     in which occurrences of characters in <code>args.get(1)</code> 
134      *     are replaced by the corresponding characters in <code>args.get(2)</code> 
135      * 
136      * @throws FunctionCallException if <code>args</code> does not have exactly three items
137      */
138     public Object call(Context context,
139                        List args) throws FunctionCallException
140     {
141         if (args.size() == 3) {
142             return evaluate( args.get(0),
143                              args.get(1),
144                              args.get(2),
145                              context.getNavigator() );
146         }
147 
148         throw new FunctionCallException( "translate() requires three arguments." );
149     }
150 
151     /*** 
152      * Returns a copy of <code>strArg</code> in which
153      * characters found in <code>fromArg</code> are replaced by
154      * corresponding characters from <code>toArg</code>.
155      * If necessary each argument is first converted to it string-value
156      * as if by the XPath <code>string()</code> function.
157      * 
158      * @param strArg the base string
159      * @param fromArg the characters to be replaced
160      * @param toArg the characters they will be replaced by
161      * @param nav the <code>Navigator</code> used to calculate the string-values of the arguments.
162      * 
163      * @return a copy of <code>strArg</code> in which
164      *  characters found in <code>fromArg</code> are replaced by
165      *  corresponding characters from <code>toArg</code>
166      *  
167      * @throws FunctionCallException if one of the arguments is a malformed Unicode string;
168      *     that is, if surrogate characters don't line up properly
169      * 
170      */
171     public static String evaluate(Object strArg,
172                                   Object fromArg,
173                                   Object toArg,
174                                   Navigator nav) throws FunctionCallException
175     {
176         String inStr = StringFunction.evaluate( strArg, nav );
177         String fromStr = StringFunction.evaluate( fromArg, nav );
178         String toStr = StringFunction.evaluate( toArg, nav );
179     
180         // Initialize the mapping in a HashMap
181         Map characterMap = new HashMap();
182         String[] fromCharacters = toUnicodeCharacters(fromStr);
183         String[] toCharacters = toUnicodeCharacters(toStr);
184         int fromLen = fromCharacters.length;
185         int toLen = toCharacters.length;
186         for ( int i = 0; i < fromLen; i++ ) {
187             String cFrom = fromCharacters[i];
188             if ( characterMap.containsKey( cFrom ) ) {
189                 // We've seen the character before, ignore
190                 continue;
191             }
192             
193             if ( i < toLen ) {
194                 // Will change
195                 characterMap.put( cFrom, toCharacters[i] );
196             } 
197             else {
198                 // Will delete
199                 characterMap.put( cFrom, null );
200             }
201         }
202 
203         // Process the input string thru the map
204         StringBuffer outStr = new StringBuffer( inStr.length() );
205         String[] inCharacters = toUnicodeCharacters(inStr);
206         int inLen = inCharacters.length;
207         for ( int i = 0; i < inLen; i++ ) {
208             String cIn = inCharacters[i];
209             if ( characterMap.containsKey( cIn ) ) {
210                 String cTo = (String) characterMap.get( cIn );
211                 if ( cTo != null ) {
212                     outStr.append( cTo );
213                 }
214             } 
215             else {
216                 outStr.append( cIn );
217             }
218         }
219     
220         return outStr.toString();
221     }
222 
223     private static String[] toUnicodeCharacters(String s) throws FunctionCallException {
224 
225         String[] result = new String[s.length()];
226         int stringLength = 0;
227         for (int i = 0; i < s.length(); i++) {
228             char c1 = s.charAt(i);
229             if (isHighSurrogate(c1)) {
230                 try {
231                     char c2 = s.charAt(i+1);
232                     if (isLowSurrogate(c2)) {
233                         result[stringLength] = (c1 + "" + c2).intern();
234                         i++;
235                     }
236                     else {
237                         throw new FunctionCallException("Mismatched surrogate pair in translate function");
238                     }
239                 }
240                 catch (StringIndexOutOfBoundsException ex) {
241                     throw new FunctionCallException("High surrogate without low surrogate at end of string passed to translate function");
242                 }
243             }
244             else {
245                 result[stringLength]=String.valueOf(c1).intern();
246             }
247             stringLength++;
248         }
249         
250         if (stringLength == result.length) return result;
251         
252         // trim array
253         String[] trimmed = new String[stringLength];
254         System.arraycopy(result, 0, trimmed, 0, stringLength);
255         return trimmed;
256         
257     }
258 
259     private static boolean isHighSurrogate(char c) {
260         return c >= 0xD800 && c <= 0xDBFF;
261     }
262      
263     private static boolean isLowSurrogate(char c) {
264         return c >= 0xDC00 && c <= 0xDFFF;
265     }
266      
267 }