001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 package org.apache.commons.codec.binary; 019 020 import java.io.UnsupportedEncodingException; 021 022 import org.apache.commons.codec.CharEncoding; 023 024 /** 025 * Converts String to and from bytes using the encodings required by the Java specification. These encodings are specified in <a 026 * href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 027 * 028 * @see CharEncoding 029 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 030 * @author <a href="mailto:ggregory@seagullsw.com">Gary Gregory</a> 031 * @version $Id: StringUtils.java 950460 2010-06-02 09:43:02Z sebb $ 032 * @since 1.4 033 */ 034 public class StringUtils { 035 036 /** 037 * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new 038 * byte array. 039 * 040 * @param string 041 * the String to encode, may be <code>null</code> 042 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> 043 * @throws IllegalStateException 044 * Thrown when the charset is missing, which should be never according the the Java specification. 045 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 046 * @see #getBytesUnchecked(String, String) 047 */ 048 public static byte[] getBytesIso8859_1(String string) { 049 return StringUtils.getBytesUnchecked(string, CharEncoding.ISO_8859_1); 050 } 051 052 /** 053 * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte 054 * array. 055 * 056 * @param string 057 * the String to encode, may be <code>null</code> 058 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> 059 * @throws IllegalStateException 060 * Thrown when the charset is missing, which should be never according the the Java specification. 061 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 062 * @see #getBytesUnchecked(String, String) 063 */ 064 public static byte[] getBytesUsAscii(String string) { 065 return StringUtils.getBytesUnchecked(string, CharEncoding.US_ASCII); 066 } 067 068 /** 069 * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte 070 * array. 071 * 072 * @param string 073 * the String to encode, may be <code>null</code> 074 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> 075 * @throws IllegalStateException 076 * Thrown when the charset is missing, which should be never according the the Java specification. 077 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 078 * @see #getBytesUnchecked(String, String) 079 */ 080 public static byte[] getBytesUtf16(String string) { 081 return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16); 082 } 083 084 /** 085 * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte 086 * array. 087 * 088 * @param string 089 * the String to encode, may be <code>null</code> 090 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> 091 * @throws IllegalStateException 092 * Thrown when the charset is missing, which should be never according the the Java specification. 093 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 094 * @see #getBytesUnchecked(String, String) 095 */ 096 public static byte[] getBytesUtf16Be(String string) { 097 return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16BE); 098 } 099 100 /** 101 * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte 102 * array. 103 * 104 * @param string 105 * the String to encode, may be <code>null</code> 106 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> 107 * @throws IllegalStateException 108 * Thrown when the charset is missing, which should be never according the the Java specification. 109 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 110 * @see #getBytesUnchecked(String, String) 111 */ 112 public static byte[] getBytesUtf16Le(String string) { 113 return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16LE); 114 } 115 116 /** 117 * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte 118 * array. 119 * 120 * @param string 121 * the String to encode, may be <code>null</code> 122 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> 123 * @throws IllegalStateException 124 * Thrown when the charset is missing, which should be never according the the Java specification. 125 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 126 * @see #getBytesUnchecked(String, String) 127 */ 128 public static byte[] getBytesUtf8(String string) { 129 return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_8); 130 } 131 132 /** 133 * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte 134 * array. 135 * <p> 136 * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which 137 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE. 138 * </p> 139 * 140 * @param string 141 * the String to encode, may be <code>null</code> 142 * @param charsetName 143 * The name of a required {@link java.nio.charset.Charset} 144 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> 145 * @throws IllegalStateException 146 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a 147 * required charset name. 148 * @see CharEncoding 149 * @see String#getBytes(String) 150 */ 151 public static byte[] getBytesUnchecked(String string, String charsetName) { 152 if (string == null) { 153 return null; 154 } 155 try { 156 return string.getBytes(charsetName); 157 } catch (UnsupportedEncodingException e) { 158 throw StringUtils.newIllegalStateException(charsetName, e); 159 } 160 } 161 162 private static IllegalStateException newIllegalStateException(String charsetName, UnsupportedEncodingException e) { 163 return new IllegalStateException(charsetName + ": " + e); 164 } 165 166 /** 167 * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset. 168 * <p> 169 * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which 170 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE. 171 * </p> 172 * 173 * @param bytes 174 * The bytes to be decoded into characters, may be <code>null</code> 175 * @param charsetName 176 * The name of a required {@link java.nio.charset.Charset} 177 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset, 178 * or <code>null</code> if the input byte arrray was <code>null</code>. 179 * @throws IllegalStateException 180 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a 181 * required charset name. 182 * @see CharEncoding 183 * @see String#String(byte[], String) 184 */ 185 public static String newString(byte[] bytes, String charsetName) { 186 if (bytes == null) { 187 return null; 188 } 189 try { 190 return new String(bytes, charsetName); 191 } catch (UnsupportedEncodingException e) { 192 throw StringUtils.newIllegalStateException(charsetName, e); 193 } 194 } 195 196 /** 197 * Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset. 198 * 199 * @param bytes 200 * The bytes to be decoded into characters, may be <code>null</code> 201 * @return A new <code>String</code> decoded from the specified array of bytes using the ISO-8859-1 charset, 202 * or <code>null</code> if the input byte array was <code>null</code>. 203 * @throws IllegalStateException 204 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the 205 * charset is required. 206 */ 207 public static String newStringIso8859_1(byte[] bytes) { 208 return StringUtils.newString(bytes, CharEncoding.ISO_8859_1); 209 } 210 211 /** 212 * Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset. 213 * 214 * @param bytes 215 * The bytes to be decoded into characters 216 * @return A new <code>String</code> decoded from the specified array of bytes using the US-ASCII charset, 217 * or <code>null</code> if the input byte array was <code>null</code>. 218 * @throws IllegalStateException 219 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the 220 * charset is required. 221 */ 222 public static String newStringUsAscii(byte[] bytes) { 223 return StringUtils.newString(bytes, CharEncoding.US_ASCII); 224 } 225 226 /** 227 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset. 228 * 229 * @param bytes 230 * The bytes to be decoded into characters 231 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16 charset 232 * or <code>null</code> if the input byte array was <code>null</code>. 233 * @throws IllegalStateException 234 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the 235 * charset is required. 236 */ 237 public static String newStringUtf16(byte[] bytes) { 238 return StringUtils.newString(bytes, CharEncoding.UTF_16); 239 } 240 241 /** 242 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset. 243 * 244 * @param bytes 245 * The bytes to be decoded into characters 246 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16BE charset, 247 * or <code>null</code> if the input byte array was <code>null</code>. 248 * @throws IllegalStateException 249 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the 250 * charset is required. 251 */ 252 public static String newStringUtf16Be(byte[] bytes) { 253 return StringUtils.newString(bytes, CharEncoding.UTF_16BE); 254 } 255 256 /** 257 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset. 258 * 259 * @param bytes 260 * The bytes to be decoded into characters 261 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16LE charset, 262 * or <code>null</code> if the input byte array was <code>null</code>. 263 * @throws IllegalStateException 264 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the 265 * charset is required. 266 */ 267 public static String newStringUtf16Le(byte[] bytes) { 268 return StringUtils.newString(bytes, CharEncoding.UTF_16LE); 269 } 270 271 /** 272 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset. 273 * 274 * @param bytes 275 * The bytes to be decoded into characters 276 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-8 charset, 277 * or <code>null</code> if the input byte array was <code>null</code>. 278 * @throws IllegalStateException 279 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the 280 * charset is required. 281 */ 282 public static String newStringUtf8(byte[] bytes) { 283 return StringUtils.newString(bytes, CharEncoding.UTF_8); 284 } 285 286 }