1 /* 2 * $Header$ 3 * $Revision$ 4 * $Date$ 5 * 6 * ==================================================================== 7 * 8 * Copyright 2000-2002 bob mcwhirter & James Strachan. 9 * All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions are 13 * met: 14 * 15 * * Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 18 * * Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * * Neither the name of the Jaxen Project nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 27 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 29 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 30 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 31 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 32 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 33 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 34 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 35 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 36 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 37 * 38 * ==================================================================== 39 * This software consists of voluntary contributions made by many 40 * individuals on behalf of the Jaxen Project and was originally 41 * created by bob mcwhirter <bob@werken.com> and 42 * James Strachan <jstrachan@apache.org>. For more information on the 43 * Jaxen Project, please see <http://www.jaxen.org/>. 44 * 45 * $Id$ 46 */ 47 48 49 package org.jaxen.function; 50 51 import java.util.List; 52 53 import org.jaxen.Context; 54 import org.jaxen.Function; 55 import org.jaxen.FunctionCallException; 56 import org.jaxen.Navigator; 57 58 /** 59 * <p><b>4.2</b> <code><i>number</i> string-length(<i>string</i>)</code></p> 60 * 61 * <p> 62 * The <b>string-length</b> function returns the number of <strong>Unicode characters</strong> 63 * in its argument. This is <strong>not</strong> necessarily 64 * the same as the number <strong>Java chars</strong> 65 * in the corresponding Java string. In particular, if the Java <code>String</code> 66 * contains surrogate pairs each such pair will be counted as only one character 67 * by this function. If the argument is omitted, 68 * it returns the length of the string-value of the context node. 69 * </p> 70 * 71 * @author bob mcwhirter (bob @ werken.com) 72 * @see <a href="https://www.w3.org/TR/xpath#function-string-length" target="_top">Section 73 * 4.2 of the XPath Specification</a> 74 */ 75 public class StringLengthFunction implements Function 76 { 77 78 79 /** 80 * Create a new <code>StringLengthFunction</code> object. 81 */ 82 public StringLengthFunction() {} 83 84 85 /** 86 * <p> 87 * Returns the number of Unicode characters in the string-value of the argument. 88 * </p> 89 * 90 * @param context the context at the point in the 91 * expression when the function is called 92 * @param args a list containing the item whose string-value is to be counted. 93 * If empty, the length of the context node's string-value is returned. 94 * 95 * @return a <code>Double</code> giving the number of Unicode characters 96 * 97 * @throws FunctionCallException if args has more than one item 98 */ 99 public Object call(Context context, 100 List args) throws FunctionCallException 101 { 102 if (args.size() == 0) 103 { 104 return evaluate( context.getNodeSet(), 105 context.getNavigator() ); 106 } 107 else if (args.size() == 1) 108 { 109 return evaluate( args.get(0), 110 context.getNavigator() ); 111 } 112 113 throw new FunctionCallException( "string-length() requires one argument." ); 114 } 115 116 /** 117 * <p> 118 * Returns the number of Unicode characters in the string-value of 119 * an object. 120 * </p> 121 * 122 * @param obj the object whose string-value is counted 123 * @param nav used to calculate the string-values of the first two arguments 124 * 125 * @return a <code>Double</code> giving the number of Unicode characters 126 * 127 * @throws FunctionCallException if the string contains mismatched surrogates 128 */ 129 public static Double evaluate(Object obj, Navigator nav) throws FunctionCallException 130 { 131 String str = StringFunction.evaluate( obj, nav ); 132 // String.length() counts UTF-16 code points; not Unicode characters 133 char[] data = str.toCharArray(); 134 int length = 0; 135 for (int i = 0; i < data.length; i++) { 136 char c = data[i]; 137 length++; 138 // if this is a high surrogate; assume the next character is 139 // is a low surrogate and skip it 140 if (c >= 0xD800 && c <= 0xDFFF) { 141 try { 142 char low = data[i+1]; 143 if (low < 0xDC00 || low > 0xDFFF) { 144 throw new FunctionCallException("Bad surrogate pair in string " + str); 145 } 146 i++; // increment past low surrogate 147 } 148 catch (ArrayIndexOutOfBoundsException ex) { 149 throw new FunctionCallException("Bad surrogate pair in string " + str); 150 } 151 } 152 } 153 return new Double(length); 154 } 155 156 }