View Javadoc

1   /*
2    * $Header: /home/projects/jaxen/scm/jaxen/src/java/main/org/jaxen/function/StringLengthFunction.java,v 1.9 2005/06/26 16:07:22 elharo Exp $
3    * $Revision: 1.9 $
4    * $Date: 2005/06/26 16:07:22 $
5    *
6    * ====================================================================
7    *
8    * Copyright (C) 2000-2002 bob mcwhirter & James Strachan.
9    * All rights reserved.
10   *
11   * Redistribution and use in source and binary forms, with or without
12   * modification, are permitted provided that the following conditions
13   * are met:
14   * 
15   * 1. Redistributions of source code must retain the above copyright
16   *    notice, this list of conditions, and the following disclaimer.
17   *
18   * 2. Redistributions in binary form must reproduce the above copyright
19   *    notice, this list of conditions, and the disclaimer that follows 
20   *    these conditions in the documentation and/or other materials 
21   *    provided with the distribution.
22   *
23   * 3. The name "Jaxen" must not be used to endorse or promote products
24   *    derived from this software without prior written permission.  For
25   *    written permission, please contact license@jaxen.org.
26   * 
27   * 4. Products derived from this software may not be called "Jaxen", nor
28   *    may "Jaxen" appear in their name, without prior written permission
29   *    from the Jaxen Project Management (pm@jaxen.org).
30   * 
31   * In addition, we request (but do not require) that you include in the 
32   * end-user documentation provided with the redistribution and/or in the 
33   * software itself an acknowledgement equivalent to the following:
34   *     "This product includes software developed by the
35   *      Jaxen Project (http://www.jaxen.org/)."
36   * Alternatively, the acknowledgment may be graphical using the logos 
37   * available at http://www.jaxen.org/
38   *
39   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
40   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
41   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
42   * DISCLAIMED.  IN NO EVENT SHALL THE Jaxen AUTHORS OR THE PROJECT
43   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
45   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
46   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
47   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
48   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
49   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50   * SUCH DAMAGE.
51   *
52   * ====================================================================
53   * This software consists of voluntary contributions made by many 
54   * individuals on behalf of the Jaxen Project and was originally 
55   * created by bob mcwhirter <bob@werken.com> and 
56   * James Strachan <jstrachan@apache.org>.  For more information on the 
57   * Jaxen Project, please see <http://www.jaxen.org/>.
58   * 
59   * $Id: StringLengthFunction.java,v 1.9 2005/06/26 16:07:22 elharo Exp $
60   */
61  
62  
63  package org.jaxen.function;
64  
65  import java.util.List;
66  
67  import org.jaxen.Context;
68  import org.jaxen.Function;
69  import org.jaxen.FunctionCallException;
70  import org.jaxen.Navigator;
71  
72  /***
73   * <p><b>4.2</b> <code><i>number</i> string-length(<i>string</i>)</code></p> 
74   * 
75   * <p>
76   * The <b>string-length</b> function returns the number of <strong>Unicode characters</strong>
77   * in its argument. This is <strong>not</strong> necessarily 
78   * the same as the number <strong>Java chars</strong>
79   * in the corresponding Java string. In particular, if the Java <code>String</code>
80   * contains surrogate pairs each such pair will be counted as only one character
81   * by this function. If the argument is omitted, 
82   * it returns the length of the string-value of the context node.
83   * </p>
84   * 
85   * @author bob mcwhirter (bob @ werken.com)
86   * @see <a href="http://www.w3.org/TR/xpath#function-string-length" target="_top">Section 
87   *      4.2 of the XPath Specification</a>
88   */
89  public class StringLengthFunction implements Function
90  {
91  
92      
93      /***
94       * Create a new <code>StringLengthFunction</code> object.
95       */
96      public StringLengthFunction() {}
97      
98      
99      /***
100      * <p>
101      * Returns the number of Unicode characters in the string-value of the argument.
102      * </p>
103      * 
104      * @param context the context at the point in the
105      *         expression when the function is called
106      * @param args a list containing the item whose string-value is to be counted.
107      *     If empty, the length of the context node's string-value is returned.
108      * 
109      * @return a <code>Double</code> giving the number of Unicode characters
110      * 
111      * @throws FunctionCallException if args has more than one item
112      */
113     public Object call(Context context,
114                        List args) throws FunctionCallException
115     {
116         if (args.size() == 0)
117         {
118             return evaluate( context.getNodeSet(),
119                              context.getNavigator() );
120         } 
121         else if (args.size() == 1)
122         {
123             return evaluate( args.get(0),
124                              context.getNavigator() );
125         }
126 
127         throw new FunctionCallException( "string-length() requires one argument." );
128     }
129 
130     /***
131      * <p>
132      * Returns the number of Unicode characters in the string-value of 
133      * an object.
134      * </p>
135      * 
136      * @param obj the object whose string-value is counted
137      * @param nav used to calculate the string-values of the first two arguments
138      * 
139      * @return a <code>Double</code> giving the number of Unicode characters
140      * 
141      * @throws FunctionCallException if the string contains mismatched surrogates
142      */
143     public static Double evaluate(Object obj, Navigator nav) throws FunctionCallException
144     {
145         
146         // could/should I push the mismnatching checks into StringFunction.evaluate()????
147         String str = StringFunction.evaluate( obj, nav );
148         // String.length() counts UTF-16 code points; not Unicode characters
149         char[] data = str.toCharArray();
150         int length = 0;
151         for (int i = 0; i < data.length; i++) {
152             char c = data[i];
153             length++;
154             // if this is a high surrogate; assume the next character is
155             // is a low surrogate and skip it
156             if (c >= 0xD800) {
157                 try {
158                     char low = data[i+1];
159                     if (low < 0xDC00 || low > 0xDFFF) {
160                         throw new FunctionCallException("Bad surrogate pair in string " + str);
161                     }
162                     i++; // increment past low surrogate
163                 }
164                 catch (ArrayIndexOutOfBoundsException ex) {
165                     throw new FunctionCallException("Bad surrogate pair in string " + str);
166                 }
167             }
168         }
169         return new Double(length);
170     }
171     
172 }