001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 package org.apache.commons.codec.binary; 019 020 /** 021 * Provides Base32 encoding and decoding as defined by <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>. 022 * 023 * <p> 024 * The class can be parameterized in the following manner with various constructors: 025 * <ul> 026 * <li>Whether to use the "base32hex" variant instead of the default "base32"</li> 027 * <li>Line length: Default 76. Line length that aren't multiples of 8 will still essentially end up being multiples of 028 * 8 in the encoded data. 029 * <li>Line separator: Default is CRLF ("\r\n")</li> 030 * </ul> 031 * </p> 032 * <p> 033 * This class operates directly on byte streams, and not character streams. 034 * </p> 035 * <p> 036 * This class is not thread-safe. Each thread should use its own instance. 037 * </p> 038 * 039 * @see <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a> 040 * 041 * @since 1.5 042 * @version $Revision: 1080712 $ 043 */ 044 public class Base32 extends BaseNCodec { 045 046 /** 047 * BASE32 characters are 5 bits in length. 048 * They are formed by taking a block of five octets to form a 40-bit string, 049 * which is converted into eight BASE32 characters. 050 */ 051 private static final int BITS_PER_ENCODED_BYTE = 5; 052 private static final int BYTES_PER_ENCODED_BLOCK = 8; 053 private static final int BYTES_PER_UNENCODED_BLOCK = 5; 054 055 /** 056 * Chunk separator per RFC 2045 section 2.1. 057 * 058 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a> 059 */ 060 private static final byte[] CHUNK_SEPARATOR = {'\r', '\n'}; 061 062 /** 063 * This array is a lookup table that translates Unicode characters drawn from the "Base32 Alphabet" (as specified in 064 * Table 3 of RFC 2045) into their 5-bit positive integer equivalents. Characters that are not in the Base32 065 * alphabet but fall within the bounds of the array are translated to -1. 066 * 067 */ 068 private static final byte[] DECODE_TABLE = { 069 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 070 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f 071 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f 072 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 63, // 20-2f 073 -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, // 30-3f 2-7 074 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40-4f A-N 075 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 50-5a O-Z 076 }; 077 078 /** 079 * This array is a lookup table that translates 5-bit positive integer index values into their "Base32 Alphabet" 080 * equivalents as specified in Table 3 of RFC 2045. 081 */ 082 private static final byte[] ENCODE_TABLE = { 083 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 084 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 085 '2', '3', '4', '5', '6', '7', 086 }; 087 088 /** 089 * This array is a lookup table that translates Unicode characters drawn from the "Base32 |Hex Alphabet" (as specified in 090 * Table 3 of RFC 2045) into their 5-bit positive integer equivalents. Characters that are not in the Base32 Hex 091 * alphabet but fall within the bounds of the array are translated to -1. 092 * 093 */ 094 private static final byte[] HEX_DECODE_TABLE = { 095 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 096 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f 097 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f 098 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 63, // 20-2f 099 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 2-7 100 -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 40-4f A-N 101 25, 26, 27, 28, 29, 30, 31, 32, // 50-57 O-V 102 }; 103 104 /** 105 * This array is a lookup table that translates 5-bit positive integer index values into their "Base32 Hex Alphabet" 106 * equivalents as specified in Table 3 of RFC 2045. 107 */ 108 private static final byte[] HEX_ENCODE_TABLE = { 109 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 110 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 111 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 112 }; 113 114 /** Mask used to extract 5 bits, used when encoding Base32 bytes */ 115 private static final int MASK_5BITS = 0x1f; 116 117 // The static final fields above are used for the original static byte[] methods on Base32. 118 // The private member fields below are used with the new streaming approach, which requires 119 // some state be preserved between calls of encode() and decode(). 120 121 /** 122 * Place holder for the bytes we're dealing with for our based logic. 123 * Bitwise operations store and extract the encoding or decoding from this variable. 124 */ 125 private long bitWorkArea; 126 127 /** 128 * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. 129 * <code>decodeSize = {@link BYTES_PER_ENCODED_BLOCK} - 1 + lineSeparator.length;</code> 130 */ 131 private final int decodeSize; 132 133 /** 134 * Decode table to use. 135 */ 136 private final byte[] decodeTable; 137 138 /** 139 * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. 140 * <code>encodeSize = {@link BYTES_PER_ENCODED_BLOCK} + lineSeparator.length;</code> 141 */ 142 private final int encodeSize; 143 144 /** 145 * Encode table to use. 146 */ 147 private final byte[] encodeTable; 148 149 /** 150 * Line separator for encoding. Not used when decoding. Only used if lineLength > 0. 151 */ 152 private final byte[] lineSeparator; 153 154 /** 155 * Creates a Base32 codec used for decoding and encoding. 156 * <p> 157 * When encoding the line length is 0 (no chunking). 158 * </p> 159 * 160 */ 161 public Base32() { 162 this(false); 163 } 164 165 /** 166 * Creates a Base32 codec used for decoding and encoding. 167 * <p> 168 * When encoding the line length is 0 (no chunking). 169 * </p> 170 * @param useHex if <code>true</code> then use Base32 Hex alphabet 171 */ 172 public Base32(boolean useHex) { 173 this(0, null, useHex); 174 } 175 176 /** 177 * Creates a Base32 codec used for decoding and encoding. 178 * <p> 179 * When encoding the line length is given in the constructor, the line separator is CRLF. 180 * </p> 181 * 182 * @param lineLength 183 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of 8). 184 * If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when decoding. 185 */ 186 public Base32(int lineLength) { 187 this(lineLength, CHUNK_SEPARATOR); 188 } 189 190 /** 191 * Creates a Base32 codec used for decoding and encoding. 192 * <p> 193 * When encoding the line length and line separator are given in the constructor. 194 * </p> 195 * <p> 196 * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. 197 * </p> 198 * 199 * @param lineLength 200 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of 8). 201 * If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when decoding. 202 * @param lineSeparator 203 * Each line of encoded data will end with this sequence of bytes. 204 * @throws IllegalArgumentException 205 * The provided lineSeparator included some Base32 characters. That's not going to work! 206 */ 207 public Base32(int lineLength, byte[] lineSeparator) { 208 this(lineLength, lineSeparator, false); 209 } 210 211 /** 212 * Creates a Base32 / Base32 Hex codec used for decoding and encoding. 213 * <p> 214 * When encoding the line length and line separator are given in the constructor. 215 * </p> 216 * <p> 217 * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. 218 * </p> 219 * 220 * @param lineLength 221 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of 8). 222 * If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when decoding. 223 * @param lineSeparator 224 * Each line of encoded data will end with this sequence of bytes. 225 * @param useHex if <code>true</code>, then use Base32 Hex alphabet, otherwise use Base32 alphabet 226 * @throws IllegalArgumentException 227 * The provided lineSeparator included some Base32 characters. That's not going to work! 228 * Or the lineLength > 0 and lineSeparator is null. 229 */ 230 public Base32(int lineLength, byte[] lineSeparator, boolean useHex) { 231 super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, 232 lineLength, 233 lineSeparator == null ? 0 : lineSeparator.length); 234 if (useHex){ 235 this.encodeTable = HEX_ENCODE_TABLE; 236 this.decodeTable = HEX_DECODE_TABLE; 237 } else { 238 this.encodeTable = ENCODE_TABLE; 239 this.decodeTable = DECODE_TABLE; 240 } 241 if (lineLength > 0) { 242 if (lineSeparator == null) { 243 throw new IllegalArgumentException("lineLength "+lineLength+" > 0, but lineSeparator is null"); 244 } 245 // Must be done after initializing the tables 246 if (containsAlphabetOrPad(lineSeparator)) { 247 String sep = StringUtils.newStringUtf8(lineSeparator); 248 throw new IllegalArgumentException("lineSeparator must not contain Base32 characters: [" + sep + "]"); 249 } 250 this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparator.length; 251 this.lineSeparator = new byte[lineSeparator.length]; 252 System.arraycopy(lineSeparator, 0, this.lineSeparator, 0, lineSeparator.length); 253 } else { 254 this.encodeSize = BYTES_PER_ENCODED_BLOCK; 255 this.lineSeparator = null; 256 } 257 this.decodeSize = this.encodeSize - 1; 258 } 259 260 /** 261 * <p> 262 * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once 263 * with the data to decode, and once with inAvail set to "-1" to alert decoder that EOF has been reached. The "-1" 264 * call is not necessary when decoding, but it doesn't hurt, either. 265 * </p> 266 * <p> 267 * Ignores all non-Base32 characters. This is how chunked (e.g. 76 character) data is handled, since CR and LF are 268 * silently ignored, but has implications for other bytes, too. This method subscribes to the garbage-in, 269 * garbage-out philosophy: it will not check the provided data for validity. 270 * </p> 271 * 272 * @param in 273 * byte[] array of ascii data to Base32 decode. 274 * @param inPos 275 * Position to start reading data from. 276 * @param inAvail 277 * Amount of bytes available from input for encoding. 278 * 279 * Output is written to {@link #buffer} as 8-bit octets, using {@link pos} as the buffer position 280 */ 281 void decode(byte[] in, int inPos, int inAvail) { // package protected for access from I/O streams 282 if (eof) { 283 return; 284 } 285 if (inAvail < 0) { 286 eof = true; 287 } 288 for (int i = 0; i < inAvail; i++) { 289 byte b = in[inPos++]; 290 if (b == PAD) { 291 // We're done. 292 eof = true; 293 break; 294 } else { 295 ensureBufferSize(decodeSize); 296 if (b >= 0 && b < this.decodeTable.length) { 297 int result = this.decodeTable[b]; 298 if (result >= 0) { 299 modulus = (modulus+1) % BYTES_PER_ENCODED_BLOCK; 300 bitWorkArea = (bitWorkArea << BITS_PER_ENCODED_BYTE) + result; // collect decoded bytes 301 if (modulus == 0) { // we can output the 5 bytes 302 buffer[pos++] = (byte) ((bitWorkArea >> 32) & MASK_8BITS); 303 buffer[pos++] = (byte) ((bitWorkArea >> 24) & MASK_8BITS); 304 buffer[pos++] = (byte) ((bitWorkArea >> 16) & MASK_8BITS); 305 buffer[pos++] = (byte) ((bitWorkArea >> 8) & MASK_8BITS); 306 buffer[pos++] = (byte) (bitWorkArea & MASK_8BITS); 307 } 308 } 309 } 310 } 311 } 312 313 // Two forms of EOF as far as Base32 decoder is concerned: actual 314 // EOF (-1) and first time '=' character is encountered in stream. 315 // This approach makes the '=' padding characters completely optional. 316 if (eof && modulus >= 2) { // if modulus < 2, nothing to do 317 ensureBufferSize(decodeSize); 318 319 // we ignore partial bytes, i.e. only multiples of 8 count 320 switch (modulus) { 321 case 2 : // 10 bits, drop 2 and output one byte 322 buffer[pos++] = (byte) ((bitWorkArea >> 2) & MASK_8BITS); 323 break; 324 case 3 : // 15 bits, drop 7 and output 1 byte 325 buffer[pos++] = (byte) ((bitWorkArea >> 7) & MASK_8BITS); 326 break; 327 case 4 : // 20 bits = 2*8 + 4 328 bitWorkArea = bitWorkArea >> 4; // drop 4 bits 329 buffer[pos++] = (byte) ((bitWorkArea >> 8) & MASK_8BITS); 330 buffer[pos++] = (byte) ((bitWorkArea) & MASK_8BITS); 331 break; 332 case 5 : // 25bits = 3*8 + 1 333 bitWorkArea = bitWorkArea >> 1; 334 buffer[pos++] = (byte) ((bitWorkArea >> 16) & MASK_8BITS); 335 buffer[pos++] = (byte) ((bitWorkArea >> 8) & MASK_8BITS); 336 buffer[pos++] = (byte) ((bitWorkArea) & MASK_8BITS); 337 break; 338 case 6 : // 30bits = 3*8 + 6 339 bitWorkArea = bitWorkArea >> 6; 340 buffer[pos++] = (byte) ((bitWorkArea >> 16) & MASK_8BITS); 341 buffer[pos++] = (byte) ((bitWorkArea >> 8) & MASK_8BITS); 342 buffer[pos++] = (byte) ((bitWorkArea) & MASK_8BITS); 343 break; 344 case 7 : // 35 = 4*8 +3 345 bitWorkArea = bitWorkArea >> 3; 346 buffer[pos++] = (byte) ((bitWorkArea >> 24) & MASK_8BITS); 347 buffer[pos++] = (byte) ((bitWorkArea >> 16) & MASK_8BITS); 348 buffer[pos++] = (byte) ((bitWorkArea >> 8) & MASK_8BITS); 349 buffer[pos++] = (byte) ((bitWorkArea) & MASK_8BITS); 350 break; 351 } 352 } 353 } 354 355 /** 356 * <p> 357 * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with 358 * the data to encode, and once with inAvail set to "-1" to alert encoder that EOF has been reached, so flush last 359 * remaining bytes (if not multiple of 5). 360 * </p> 361 * 362 * @param in 363 * byte[] array of binary data to Base32 encode. 364 * @param inPos 365 * Position to start reading data from. 366 * @param inAvail 367 * Amount of bytes available from input for encoding. 368 */ 369 void encode(byte[] in, int inPos, int inAvail) { // package protected for access from I/O streams 370 if (eof) { 371 return; 372 } 373 // inAvail < 0 is how we're informed of EOF in the underlying data we're 374 // encoding. 375 if (inAvail < 0) { 376 eof = true; 377 if (0 == modulus && lineLength == 0) { 378 return; // no leftovers to process and not using chunking 379 } 380 ensureBufferSize(encodeSize); 381 int savedPos = pos; 382 switch (modulus) { // % 5 383 case 1 : // Only 1 octet; take top 5 bits then remainder 384 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 3) & MASK_5BITS]; // 8-1*5 = 3 385 buffer[pos++] = encodeTable[(int)(bitWorkArea << 2) & MASK_5BITS]; // 5-3=2 386 buffer[pos++] = PAD; 387 buffer[pos++] = PAD; 388 buffer[pos++] = PAD; 389 buffer[pos++] = PAD; 390 buffer[pos++] = PAD; 391 buffer[pos++] = PAD; 392 break; 393 394 case 2 : // 2 octets = 16 bits to use 395 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 11) & MASK_5BITS]; // 16-1*5 = 11 396 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 6) & MASK_5BITS]; // 16-2*5 = 6 397 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 1) & MASK_5BITS]; // 16-3*5 = 1 398 buffer[pos++] = encodeTable[(int)(bitWorkArea << 4) & MASK_5BITS]; // 5-1 = 4 399 buffer[pos++] = PAD; 400 buffer[pos++] = PAD; 401 buffer[pos++] = PAD; 402 buffer[pos++] = PAD; 403 break; 404 case 3 : // 3 octets = 24 bits to use 405 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 19) & MASK_5BITS]; // 24-1*5 = 19 406 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 14) & MASK_5BITS]; // 24-2*5 = 14 407 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 9) & MASK_5BITS]; // 24-3*5 = 9 408 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 4) & MASK_5BITS]; // 24-4*5 = 4 409 buffer[pos++] = encodeTable[(int)(bitWorkArea << 1) & MASK_5BITS]; // 5-4 = 1 410 buffer[pos++] = PAD; 411 buffer[pos++] = PAD; 412 buffer[pos++] = PAD; 413 break; 414 case 4 : // 4 octets = 32 bits to use 415 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 27) & MASK_5BITS]; // 32-1*5 = 27 416 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 22) & MASK_5BITS]; // 32-2*5 = 22 417 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 17) & MASK_5BITS]; // 32-3*5 = 17 418 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 12) & MASK_5BITS]; // 32-4*5 = 12 419 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 7) & MASK_5BITS]; // 32-5*5 = 7 420 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 2) & MASK_5BITS]; // 32-6*5 = 2 421 buffer[pos++] = encodeTable[(int)(bitWorkArea << 3) & MASK_5BITS]; // 5-2 = 3 422 buffer[pos++] = PAD; 423 break; 424 } 425 currentLinePos += pos - savedPos; // keep track of current line position 426 // if currentPos == 0 we are at the start of a line, so don't add CRLF 427 if (lineLength > 0 && currentLinePos > 0){ // add chunk separator if required 428 System.arraycopy(lineSeparator, 0, buffer, pos, lineSeparator.length); 429 pos += lineSeparator.length; 430 } 431 } else { 432 for (int i = 0; i < inAvail; i++) { 433 ensureBufferSize(encodeSize); 434 modulus = (modulus+1) % BYTES_PER_UNENCODED_BLOCK; 435 int b = in[inPos++]; 436 if (b < 0) { 437 b += 256; 438 } 439 bitWorkArea = (bitWorkArea << 8) + b; // BITS_PER_BYTE 440 if (0 == modulus) { // we have enough bytes to create our output 441 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 35) & MASK_5BITS]; 442 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 30) & MASK_5BITS]; 443 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 25) & MASK_5BITS]; 444 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 20) & MASK_5BITS]; 445 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 15) & MASK_5BITS]; 446 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 10) & MASK_5BITS]; 447 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 5) & MASK_5BITS]; 448 buffer[pos++] = encodeTable[(int)bitWorkArea & MASK_5BITS]; 449 currentLinePos += BYTES_PER_ENCODED_BLOCK; 450 if (lineLength > 0 && lineLength <= currentLinePos) { 451 System.arraycopy(lineSeparator, 0, buffer, pos, lineSeparator.length); 452 pos += lineSeparator.length; 453 currentLinePos = 0; 454 } 455 } 456 } 457 } 458 } 459 460 /** 461 * Returns whether or not the <code>octet</code> is in the Base32 alphabet. 462 * 463 * @param octet 464 * The value to test 465 * @return <code>true</code> if the value is defined in the the Base32 alphabet <code>false</code> otherwise. 466 */ 467 public boolean isInAlphabet(byte octet) { 468 return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1; 469 } 470 }