001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     * 
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     * 
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.codec.binary;
019    
020    import java.io.UnsupportedEncodingException;
021    
022    import org.apache.commons.codec.CharEncoding;
023    
024    /**
025     * Converts String to and from bytes using the encodings required by the Java specification. These encodings are specified in <a
026     * href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
027     * 
028     * @see CharEncoding
029     * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
030     * @author <a href="mailto:ggregory@seagullsw.com">Gary Gregory</a>
031     * @version $Id: StringUtils.java 950460 2010-06-02 09:43:02Z sebb $
032     * @since 1.4
033     */
034    public class StringUtils {
035    
036        /**
037         * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
038         * byte array.
039         * 
040         * @param string
041         *            the String to encode, may be <code>null</code>
042         * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
043         * @throws IllegalStateException
044         *             Thrown when the charset is missing, which should be never according the the Java specification.
045         * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
046         * @see #getBytesUnchecked(String, String)
047         */
048        public static byte[] getBytesIso8859_1(String string) {
049            return StringUtils.getBytesUnchecked(string, CharEncoding.ISO_8859_1);
050        }
051    
052        /**
053         * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
054         * array.
055         * 
056         * @param string
057         *            the String to encode, may be <code>null</code>
058         * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
059         * @throws IllegalStateException
060         *             Thrown when the charset is missing, which should be never according the the Java specification.
061         * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
062         * @see #getBytesUnchecked(String, String)
063         */
064        public static byte[] getBytesUsAscii(String string) {
065            return StringUtils.getBytesUnchecked(string, CharEncoding.US_ASCII);
066        }
067    
068        /**
069         * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
070         * array.
071         * 
072         * @param string
073         *            the String to encode, may be <code>null</code>
074         * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
075         * @throws IllegalStateException
076         *             Thrown when the charset is missing, which should be never according the the Java specification.
077         * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
078         * @see #getBytesUnchecked(String, String)
079         */
080        public static byte[] getBytesUtf16(String string) {
081            return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16);
082        }
083    
084        /**
085         * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
086         * array.
087         * 
088         * @param string
089         *            the String to encode, may be <code>null</code>
090         * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
091         * @throws IllegalStateException
092         *             Thrown when the charset is missing, which should be never according the the Java specification.
093         * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
094         * @see #getBytesUnchecked(String, String)
095         */
096        public static byte[] getBytesUtf16Be(String string) {
097            return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16BE);
098        }
099    
100        /**
101         * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
102         * array.
103         * 
104         * @param string
105         *            the String to encode, may be <code>null</code>
106         * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
107         * @throws IllegalStateException
108         *             Thrown when the charset is missing, which should be never according the the Java specification.
109         * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
110         * @see #getBytesUnchecked(String, String)
111         */
112        public static byte[] getBytesUtf16Le(String string) {
113            return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16LE);
114        }
115    
116        /**
117         * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
118         * array.
119         * 
120         * @param string
121         *            the String to encode, may be <code>null</code>
122         * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
123         * @throws IllegalStateException
124         *             Thrown when the charset is missing, which should be never according the the Java specification.
125         * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
126         * @see #getBytesUnchecked(String, String)
127         */
128        public static byte[] getBytesUtf8(String string) {
129            return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_8);
130        }
131    
132        /**
133         * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
134         * array.
135         * <p>
136         * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which
137         * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
138         * </p>
139         * 
140         * @param string
141         *            the String to encode, may be <code>null</code>
142         * @param charsetName
143         *            The name of a required {@link java.nio.charset.Charset}
144         * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
145         * @throws IllegalStateException
146         *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
147         *             required charset name.
148         * @see CharEncoding
149         * @see String#getBytes(String)
150         */
151        public static byte[] getBytesUnchecked(String string, String charsetName) {
152            if (string == null) {
153                return null;
154            }
155            try {
156                return string.getBytes(charsetName);
157            } catch (UnsupportedEncodingException e) {
158                throw StringUtils.newIllegalStateException(charsetName, e);
159            }
160        }
161    
162        private static IllegalStateException newIllegalStateException(String charsetName, UnsupportedEncodingException e) {
163            return new IllegalStateException(charsetName + ": " + e);
164        }
165    
166        /**
167         * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
168         * <p>
169         * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which
170         * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
171         * </p>
172         * 
173         * @param bytes
174         *            The bytes to be decoded into characters, may be <code>null</code>
175         * @param charsetName
176         *            The name of a required {@link java.nio.charset.Charset}
177         * @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
178         *         or <code>null</code> if the input byte arrray was <code>null</code>.
179         * @throws IllegalStateException
180         *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
181         *             required charset name.
182         * @see CharEncoding
183         * @see String#String(byte[], String)
184         */
185        public static String newString(byte[] bytes, String charsetName) {
186            if (bytes == null) {
187                return null;
188            }
189            try {
190                return new String(bytes, charsetName);
191            } catch (UnsupportedEncodingException e) {
192                throw StringUtils.newIllegalStateException(charsetName, e);
193            }
194        }
195    
196        /**
197         * Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset.
198         * 
199         * @param bytes
200         *            The bytes to be decoded into characters, may be <code>null</code>
201         * @return A new <code>String</code> decoded from the specified array of bytes using the ISO-8859-1 charset,
202         *         or <code>null</code> if the input byte array was <code>null</code>.
203         * @throws IllegalStateException
204         *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
205         *             charset is required.
206         */
207        public static String newStringIso8859_1(byte[] bytes) {
208            return StringUtils.newString(bytes, CharEncoding.ISO_8859_1);
209        }
210    
211        /**
212         * Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset.
213         * 
214         * @param bytes
215         *            The bytes to be decoded into characters
216         * @return A new <code>String</code> decoded from the specified array of bytes using the US-ASCII charset,
217         *         or <code>null</code> if the input byte array was <code>null</code>.
218         * @throws IllegalStateException
219         *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
220         *             charset is required.
221         */
222        public static String newStringUsAscii(byte[] bytes) {
223            return StringUtils.newString(bytes, CharEncoding.US_ASCII);
224        }
225    
226        /**
227         * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset.
228         * 
229         * @param bytes
230         *            The bytes to be decoded into characters
231         * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16 charset
232         *         or <code>null</code> if the input byte array was <code>null</code>.
233         * @throws IllegalStateException
234         *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
235         *             charset is required.
236         */
237        public static String newStringUtf16(byte[] bytes) {
238            return StringUtils.newString(bytes, CharEncoding.UTF_16);
239        }
240    
241        /**
242         * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset.
243         * 
244         * @param bytes
245         *            The bytes to be decoded into characters
246         * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16BE charset,
247         *         or <code>null</code> if the input byte array was <code>null</code>.
248         * @throws IllegalStateException
249         *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
250         *             charset is required.
251         */
252        public static String newStringUtf16Be(byte[] bytes) {
253            return StringUtils.newString(bytes, CharEncoding.UTF_16BE);
254        }
255    
256        /**
257         * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset.
258         * 
259         * @param bytes
260         *            The bytes to be decoded into characters
261         * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16LE charset,
262         *         or <code>null</code> if the input byte array was <code>null</code>.
263         * @throws IllegalStateException
264         *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
265         *             charset is required.
266         */
267        public static String newStringUtf16Le(byte[] bytes) {
268            return StringUtils.newString(bytes, CharEncoding.UTF_16LE);
269        }
270    
271        /**
272         * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset.
273         * 
274         * @param bytes
275         *            The bytes to be decoded into characters
276         * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-8 charset,
277         *         or <code>null</code> if the input byte array was <code>null</code>.
278         * @throws IllegalStateException
279         *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
280         *             charset is required.
281         */
282        public static String newStringUtf8(byte[] bytes) {
283            return StringUtils.newString(bytes, CharEncoding.UTF_8);
284        }
285    
286    }