View Javadoc

1   /*
2    * $Header: /home/projects/jaxen/scm/jaxen/src/java/main/org/jaxen/function/SubstringFunction.java,v 1.14 2005/06/26 16:07:23 elharo Exp $
3    * $Revision: 1.14 $
4    * $Date: 2005/06/26 16:07:23 $
5    *
6    * ====================================================================
7    *
8    * Copyright (C) 2000-2002 bob mcwhirter & James Strachan.
9    * All rights reserved.
10   *
11   * Redistribution and use in source and binary forms, with or without
12   * modification, are permitted provided that the following conditions
13   * are met:
14   *
15   * 1. Redistributions of source code must retain the above copyright
16   *    notice, this list of conditions, and the following disclaimer.
17   *
18   * 2. Redistributions in binary form must reproduce the above copyright
19   *    notice, this list of conditions, and the disclaimer that follows
20   *    these conditions in the documentation and/or other materials
21   *    provided with the distribution.
22   *
23   * 3. The name "Jaxen" must not be used to endorse or promote products
24   *    derived from this software without prior written permission.  For
25   *    written permission, please contact license@jaxen.org.
26   *
27   * 4. Products derived from this software may not be called "Jaxen", nor
28   *    may "Jaxen" appear in their name, without prior written permission
29   *    from the Jaxen Project Management (pm@jaxen.org).
30   *
31   * In addition, we request (but do not require) that you include in the
32   * end-user documentation provided with the redistribution and/or in the
33   * software itself an acknowledgement equivalent to the following:
34   *     "This product includes software developed by the
35   *      Jaxen Project (http://www.jaxen.org/)."
36   * Alternatively, the acknowledgment may be graphical using the logos
37   * available at http://www.jaxen.org/
38   *
39   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
40   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
41   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
42   * DISCLAIMED.  IN NO EVENT SHALL THE Jaxen AUTHORS OR THE PROJECT
43   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
45   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
46   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
47   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
48   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
49   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50   * SUCH DAMAGE.
51   *
52   * ====================================================================
53   * This software consists of voluntary contributions made by many
54   * individuals on behalf of the Jaxen Project and was originally
55   * created by bob mcwhirter <bob@werken.com> and
56   * James Strachan <jstrachan@apache.org>.  For more information on the
57   * Jaxen Project, please see <http://www.jaxen.org/>.
58   *
59   */
60  package org.jaxen.function;
61  
62  import java.util.List;
63  
64  import org.jaxen.Context;
65  import org.jaxen.Function;
66  import org.jaxen.FunctionCallException;
67  import org.jaxen.Navigator;
68  /***
69   * <p>
70   * <b>4.2</b>
71   * <code><i>string</i> substring(<i>string</i>,<i>number</i>,<i>number?</i>)</code>
72   * </p>
73   * 
74   * <blockquote src="http://www.w3.org/TR/xpath"> 
75   * <p>The <b>substring</b> function returns the
76   * substring of the first argument starting at the position specified in
77   * the second argument with length specified in the third argument. For
78   * example,
79   * 
80   * <code>substring("12345",2,3)</code> returns <code>"234"</code>.
81   * If the third argument is not specified, it returns the substring
82   * starting at the position specified in the second argument and
83   * continuing to the end of the string. For example,
84   * <code>substring("12345",2)</code> returns <code>"2345"</code>.
85   * </p>
86   * 
87   * <p>
88   * More precisely, each character in the string (see <a
89   * href="http://www.w3.org/TR/xpath#strings">[<b>3.6 Strings</b>]</a>) is considered to have a
90   * numeric position: the position of the first character is 1, the
91   * position of the second character is 2 and so on.
92   * </p>
93   * 
94   * <blockquote> <b>NOTE: </b>This differs from Java and ECMAScript, in
95   * which the <code>String.substring</code> method treats the position
96   * of the first character as 0.</blockquote>
97   * 
98   * <p>
99   * The returned substring contains those characters for which the
100  * position of the character is greater than or equal to the rounded
101  * value of the second argument and, if the third argument is specified,
102  * less than the sum of the rounded value of the second argument and the
103  * rounded value of the third argument; the comparisons and addition
104  * used for the above follow the standard IEEE 754 rules; rounding is
105  * done as if by a call to the <b><a href="#function-round">round</a></b>
106  * function. The following examples illustrate various unusual cases:
107  * </p>
108  * 
109  * <ul>
110  * 
111  * <li>
112  * <p>
113  * <code>substring("12345", 1.5, 2.6)</code> returns
114  * <code>"234"</code>
115  * </p>
116  * </li>
117  * 
118  * <li>
119  * <p>
120  * <code>substring("12345", 0, 3)</code> returns <code>"12"</code>
121  * 
122  * </p>
123  * </li>
124  * 
125  * <li>
126  * <p>
127  * <code>substring("12345", 0 div 0, 3)</code> returns <code>""</code>
128  * </p>
129  * </li>
130  * 
131  * <li>
132  * <p>.
133  * <code>substring("12345", 1, 0 div 0)</code> returns
134  * 
135  * <code>""</code>
136  * </p>
137  * </li>
138  * 
139  * <li>
140  * <p>
141  * <code>substring("12345", -42, 1 div 0)</code> returns
142  * <code>"12345"</code>
143  * </p>
144  * </li>
145  * 
146  * <li>
147  * <p>
148  * 
149  * <code>substring("12345", -1 div 0, 1 div 0)</code> returns
150  * <code>""</code> </blockquote>
151  * 
152  * @author bob mcwhirter (bob @ werken.com)
153  * 
154  * @see <a href="http://www.w3.org/TR/xpath#function-substring"
155  *      target="_top">Section 4.2 of the XPath Specification</a>
156  */
157 public class SubstringFunction implements Function
158 {
159 
160     /***
161      * Create a new <code>SubstringFunction</code> object.
162      */
163     public SubstringFunction() {}
164 
165     
166     /*** Returns a substring of an XPath string-value by character index.
167      *
168      * @param context the context at the point in the
169      *         expression when the function is called
170      * @param args a list that contains two or three items
171      * 
172      * @return a <code>String</code> containing the specifed character subsequence of 
173      *     the original string or the string-value of the context node
174      * 
175      * @throws FunctionCallException if <code>args</code> has more than three
176      *     or less than two items
177      */
178     public Object call(Context context,
179                        List args) throws FunctionCallException
180     {
181         final int argc = args.size();
182         if (argc < 2 || argc > 3){
183             throw new FunctionCallException( "substring() requires two or three arguments." );
184         }
185 
186         final Navigator nav = context.getNavigator();
187 
188         final String str = StringFunction.evaluate(args.get(0), nav );
189         // The spec doesn't really address this case
190         if (str == null) {
191             return "";
192         }
193 
194         final int stringLength = (StringLengthFunction.evaluate(args.get(0), nav )).intValue();
195 
196         if (stringLength == 0) {
197             return "";
198         }
199 
200         Double d1 = NumberFunction.evaluate(args.get(1), nav);
201 
202         if (d1.isNaN()){
203             return "";
204         }
205         // Round the value and subtract 1 as Java strings are zero based
206         int start = RoundFunction.evaluate(d1, nav).intValue() - 1;
207 
208         int substringLength = stringLength;
209         if (argc == 3){
210             Double d2 = NumberFunction.evaluate(args.get(2), nav);
211 
212             if (!d2.isNaN()){
213                 substringLength = RoundFunction.evaluate(d2, nav ).intValue();
214             }
215             else {
216                 substringLength = 0;
217             }
218         }
219         
220         if (substringLength < 0) return "";
221 
222         int end = start + substringLength;
223         if (argc == 2) end = stringLength;
224             
225         // negative start is treated as 0
226         if ( start < 0){
227             start = 0;
228         }
229         else if (start > stringLength){
230             return "";
231         }
232 
233         if (end > stringLength){
234             end = stringLength;
235         }
236         else if (end < start) return "";
237         
238         if (stringLength == str.length()) {
239             // easy case; no surrogate pairs
240             return str.substring(start, end);
241         }
242         else {
243             return unicodeSubstring(str, start, end);
244         }
245         
246     }
247 
248     private static String unicodeSubstring(String s, int start, int end) {
249 
250         StringBuffer result = new StringBuffer(s.length());
251         for (int jChar = 0, uChar=0; uChar < end; jChar++, uChar++) {
252             char c = s.charAt(jChar);
253             if (uChar >= start) result.append(c);
254             if (c >= 0xD800) { // get the low surrogate
255                 // ???? we could check here that this is indeed a low surroagte
256                 // we could also catch StringIndexOutOfBoundsException
257                 jChar++;
258                 if (uChar >= start) result.append(s.charAt(jChar));
259             }
260         }
261         return result.toString();
262     }
263 }