001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 package org.apache.commons.codec.binary; 019 020 import org.apache.commons.codec.BinaryDecoder; 021 import org.apache.commons.codec.BinaryEncoder; 022 import org.apache.commons.codec.DecoderException; 023 import org.apache.commons.codec.EncoderException; 024 025 /** 026 * Abstract superclass for Base-N encoders and decoders. 027 * 028 * <p> 029 * This class is not thread-safe. 030 * Each thread should use its own instance. 031 * </p> 032 */ 033 public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder { 034 035 /** 036 * MIME chunk size per RFC 2045 section 6.8. 037 * 038 * <p> 039 * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any 040 * equal signs. 041 * </p> 042 * 043 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a> 044 */ 045 public static final int MIME_CHUNK_SIZE = 76; 046 047 /** 048 * PEM chunk size per RFC 1421 section 4.3.2.4. 049 * 050 * <p> 051 * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any 052 * equal signs. 053 * </p> 054 * 055 * @see <a href="http://tools.ietf.org/html/rfc1421">RFC 1421 section 4.3.2.4</a> 056 */ 057 public static final int PEM_CHUNK_SIZE = 64; 058 059 private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2; 060 061 /** 062 * Defines the default buffer size - currently {@value} 063 * - must be large enough for at least one encoded block+separator 064 */ 065 private static final int DEFAULT_BUFFER_SIZE = 8192; 066 067 /** Mask used to extract 8 bits, used in decoding bytes */ 068 protected static final int MASK_8BITS = 0xff; 069 070 /** 071 * Byte used to pad output. 072 */ 073 protected static final byte PAD_DEFAULT = '='; // Allow static access to default 074 075 protected final byte PAD = PAD_DEFAULT; // instance variable just in case it needs to vary later 076 077 /** Number of bytes in each full block of unencoded data, e.g. 4 for Base64 and 5 for Base32 */ 078 private final int unencodedBlockSize; 079 080 /** Number of bytes in each full block of encoded data, e.g. 3 for Base64 and 8 for Base32 */ 081 private final int encodedBlockSize; 082 083 /** 084 * Chunksize for encoding. Not used when decoding. 085 * A value of zero or less implies no chunking of the encoded data. 086 * Rounded down to nearest multiple of encodedBlockSize. 087 */ 088 protected final int lineLength; 089 090 /** 091 * Size of chunk separator. Not used unless {@link #lineLength} > 0. 092 */ 093 private final int chunkSeparatorLength; 094 095 /** 096 * Buffer for streaming. 097 */ 098 protected byte[] buffer; 099 100 /** 101 * Position where next character should be written in the buffer. 102 */ 103 protected int pos; 104 105 /** 106 * Position where next character should be read from the buffer. 107 */ 108 private int readPos; 109 110 /** 111 * Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this object becomes useless, 112 * and must be thrown away. 113 */ 114 protected boolean eof; 115 116 /** 117 * Variable tracks how many characters have been written to the current line. Only used when encoding. We use it to 118 * make sure each encoded line never goes beyond lineLength (if lineLength > 0). 119 */ 120 protected int currentLinePos; 121 122 /** 123 * Writes to the buffer only occur after every 3/5 reads when encoding, and every 4/8 reads when decoding. 124 * This variable helps track that. 125 */ 126 protected int modulus; 127 128 /** 129 * Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize} 130 * If <code>chunkSeparatorLength</code> is zero, then chunking is disabled. 131 * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3) 132 * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4) 133 * @param lineLength if > 0, use chunking with a length <code>lineLength</code> 134 * @param chunkSeparatorLength the chunk separator length, if relevant 135 */ 136 protected BaseNCodec(int unencodedBlockSize, int encodedBlockSize, int lineLength, int chunkSeparatorLength){ 137 this.unencodedBlockSize = unencodedBlockSize; 138 this.encodedBlockSize = encodedBlockSize; 139 this.lineLength = (lineLength > 0 && chunkSeparatorLength > 0) ? (lineLength / encodedBlockSize) * encodedBlockSize : 0; 140 this.chunkSeparatorLength = chunkSeparatorLength; 141 } 142 143 /** 144 * Returns true if this object has buffered data for reading. 145 * 146 * @return true if there is data still available for reading. 147 */ 148 boolean hasData() { // package protected for access from I/O streams 149 return this.buffer != null; 150 } 151 152 /** 153 * Returns the amount of buffered data available for reading. 154 * 155 * @return The amount of buffered data available for reading. 156 */ 157 int available() { // package protected for access from I/O streams 158 return buffer != null ? pos - readPos : 0; 159 } 160 161 /** 162 * Get the default buffer size. Can be overridden. 163 * 164 * @return {@link #DEFAULT_BUFFER_SIZE} 165 */ 166 protected int getDefaultBufferSize() { 167 return DEFAULT_BUFFER_SIZE; 168 } 169 170 /** Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}. */ 171 private void resizeBuffer() { 172 if (buffer == null) { 173 buffer = new byte[getDefaultBufferSize()]; 174 pos = 0; 175 readPos = 0; 176 } else { 177 byte[] b = new byte[buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR]; 178 System.arraycopy(buffer, 0, b, 0, buffer.length); 179 buffer = b; 180 } 181 } 182 183 /** 184 * Ensure that the buffer has room for <code>size</code> bytes 185 * 186 * @param size minimum spare space required 187 */ 188 protected void ensureBufferSize(int size){ 189 if ((buffer == null) || (buffer.length < pos + size)){ 190 resizeBuffer(); 191 } 192 } 193 194 /** 195 * Extracts buffered data into the provided byte[] array, starting at position bPos, 196 * up to a maximum of bAvail bytes. Returns how many bytes were actually extracted. 197 * 198 * @param b 199 * byte[] array to extract the buffered data into. 200 * @param bPos 201 * position in byte[] array to start extraction at. 202 * @param bAvail 203 * amount of bytes we're allowed to extract. We may extract fewer (if fewer are available). 204 * @return The number of bytes successfully extracted into the provided byte[] array. 205 */ 206 int readResults(byte[] b, int bPos, int bAvail) { // package protected for access from I/O streams 207 if (buffer != null) { 208 int len = Math.min(available(), bAvail); 209 System.arraycopy(buffer, readPos, b, bPos, len); 210 readPos += len; 211 if (readPos >= pos) { 212 buffer = null; // so hasData() will return false, and this method can return -1 213 } 214 return len; 215 } 216 return eof ? -1 : 0; 217 } 218 219 /** 220 * Checks if a byte value is whitespace or not. 221 * Whitespace is taken to mean: space, tab, CR, LF 222 * @param byteToCheck 223 * the byte to check 224 * @return true if byte is whitespace, false otherwise 225 */ 226 protected static boolean isWhiteSpace(byte byteToCheck) { 227 switch (byteToCheck) { 228 case ' ' : 229 case '\n' : 230 case '\r' : 231 case '\t' : 232 return true; 233 default : 234 return false; 235 } 236 } 237 238 /** 239 * Resets this object to its initial newly constructed state. 240 */ 241 private void reset() { 242 buffer = null; 243 pos = 0; 244 readPos = 0; 245 currentLinePos = 0; 246 modulus = 0; 247 eof = false; 248 } 249 250 /** 251 * Encodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of the 252 * Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[]. 253 * 254 * @param pObject 255 * Object to encode 256 * @return An object (of type byte[]) containing the Base-N encoded data which corresponds to the byte[] supplied. 257 * @throws EncoderException 258 * if the parameter supplied is not of type byte[] 259 */ 260 public Object encode(Object pObject) throws EncoderException { 261 if (!(pObject instanceof byte[])) { 262 throw new EncoderException("Parameter supplied to Base-N encode is not a byte[]"); 263 } 264 return encode((byte[]) pObject); 265 } 266 267 /** 268 * Encodes a byte[] containing binary data, into a String containing characters in the Base-N alphabet. 269 * 270 * @param pArray 271 * a byte array containing binary data 272 * @return A String containing only Base-N character data 273 */ 274 public String encodeToString(byte[] pArray) { 275 return StringUtils.newStringUtf8(encode(pArray)); 276 } 277 278 /** 279 * Decodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of the 280 * Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[] or String. 281 * 282 * @param pObject 283 * Object to decode 284 * @return An object (of type byte[]) containing the binary data which corresponds to the byte[] or String supplied. 285 * @throws DecoderException 286 * if the parameter supplied is not of type byte[] 287 */ 288 public Object decode(Object pObject) throws DecoderException { 289 if (pObject instanceof byte[]) { 290 return decode((byte[]) pObject); 291 } else if (pObject instanceof String) { 292 return decode((String) pObject); 293 } else { 294 throw new DecoderException("Parameter supplied to Base-N decode is not a byte[] or a String"); 295 } 296 } 297 298 /** 299 * Decodes a String containing characters in the Base-N alphabet. 300 * 301 * @param pArray 302 * A String containing Base-N character data 303 * @return a byte array containing binary data 304 */ 305 public byte[] decode(String pArray) { 306 return decode(StringUtils.getBytesUtf8(pArray)); 307 } 308 309 /** 310 * Decodes a byte[] containing characters in the Base-N alphabet. 311 * 312 * @param pArray 313 * A byte array containing Base-N character data 314 * @return a byte array containing binary data 315 */ 316 public byte[] decode(byte[] pArray) { 317 reset(); 318 if (pArray == null || pArray.length == 0) { 319 return pArray; 320 } 321 decode(pArray, 0, pArray.length); 322 decode(pArray, 0, -1); // Notify decoder of EOF. 323 byte[] result = new byte[pos]; 324 readResults(result, 0, result.length); 325 return result; 326 } 327 328 /** 329 * Encodes a byte[] containing binary data, into a byte[] containing characters in the alphabet. 330 * 331 * @param pArray 332 * a byte array containing binary data 333 * @return A byte array containing only the basen alphabetic character data 334 */ 335 public byte[] encode(byte[] pArray) { 336 reset(); 337 if (pArray == null || pArray.length == 0) { 338 return pArray; 339 } 340 encode(pArray, 0, pArray.length); 341 encode(pArray, 0, -1); // Notify encoder of EOF. 342 byte[] buf = new byte[pos - readPos]; 343 readResults(buf, 0, buf.length); 344 return buf; 345 } 346 347 /** 348 * Encodes a byte[] containing binary data, into a String containing characters in the appropriate alphabet. 349 * Uses UTF8 encoding. 350 * 351 * @param pArray a byte array containing binary data 352 * @return String containing only character data in the appropriate alphabet. 353 */ 354 public String encodeAsString(byte[] pArray){ 355 return StringUtils.newStringUtf8(encode(pArray)); 356 } 357 358 abstract void encode(byte[] pArray, int i, int length); // package protected for access from I/O streams 359 360 abstract void decode(byte[] pArray, int i, int length); // package protected for access from I/O streams 361 362 /** 363 * Returns whether or not the <code>octet</code> is in the current alphabet. 364 * Does not allow whitespace or pad. 365 * 366 * @param value The value to test 367 * 368 * @return <code>true</code> if the value is defined in the current alphabet, <code>false</code> otherwise. 369 */ 370 protected abstract boolean isInAlphabet(byte value); 371 372 /** 373 * Tests a given byte array to see if it contains only valid characters within the alphabet. 374 * The method optionally treats whitespace and pad as valid. 375 * 376 * @param arrayOctet byte array to test 377 * @param allowWSPad if <code>true</code>, then whitespace and PAD are also allowed 378 * 379 * @return <code>true</code> if all bytes are valid characters in the alphabet or if the byte array is empty; 380 * <code>false</code>, otherwise 381 */ 382 public boolean isInAlphabet(byte[] arrayOctet, boolean allowWSPad) { 383 for (int i = 0; i < arrayOctet.length; i++) { 384 if (!isInAlphabet(arrayOctet[i]) && 385 (!allowWSPad || (arrayOctet[i] != PAD) && !isWhiteSpace(arrayOctet[i]))) { 386 return false; 387 } 388 } 389 return true; 390 } 391 392 /** 393 * Tests a given String to see if it contains only valid characters within the alphabet. 394 * The method treats whitespace and PAD as valid. 395 * 396 * @param basen String to test 397 * @return <code>true</code> if all characters in the String are valid characters in the alphabet or if 398 * the String is empty; <code>false</code>, otherwise 399 * @see #isInAlphabet(byte[], boolean) 400 */ 401 public boolean isInAlphabet(String basen) { 402 return isInAlphabet(StringUtils.getBytesUtf8(basen), true); 403 } 404 405 /** 406 * Tests a given byte array to see if it contains any characters within the alphabet or PAD. 407 * 408 * Intended for use in checking line-ending arrays 409 * 410 * @param arrayOctet 411 * byte array to test 412 * @return <code>true</code> if any byte is a valid character in the alphabet or PAD; <code>false</code> otherwise 413 */ 414 protected boolean containsAlphabetOrPad(byte[] arrayOctet) { 415 if (arrayOctet == null) { 416 return false; 417 } 418 for (int i = 0; i < arrayOctet.length; i++) { 419 if (PAD == arrayOctet[i] || isInAlphabet(arrayOctet[i])) { 420 return true; 421 } 422 } 423 return false; 424 } 425 426 /** 427 * Calculates the amount of space needed to encode the supplied array. 428 * 429 * @param pArray byte[] array which will later be encoded 430 * 431 * @return amount of space needed to encoded the supplied array. 432 * Returns a long since a max-len array will require > Integer.MAX_VALUE 433 */ 434 public long getEncodedLength(byte[] pArray) { 435 // Calculate non-chunked size - rounded up to allow for padding 436 // cast to long is needed to avoid possibility of overflow 437 long len = ((pArray.length + unencodedBlockSize-1) / unencodedBlockSize) * (long) encodedBlockSize; 438 if (lineLength > 0) { // We're using chunking 439 // Round up to nearest multiple 440 len += ((len + lineLength-1) / lineLength) * chunkSeparatorLength; 441 } 442 return len; 443 } 444 }