001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     * 
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     * 
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.codec.binary;
019    
020    import org.apache.commons.codec.BinaryDecoder;
021    import org.apache.commons.codec.BinaryEncoder;
022    import org.apache.commons.codec.DecoderException;
023    import org.apache.commons.codec.EncoderException;
024    
025    /**
026     * Abstract superclass for Base-N encoders and decoders.
027     *
028     * <p>
029     * This class is not thread-safe.
030     * Each thread should use its own instance.
031     * </p>
032     */
033    public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder {
034    
035        /**
036         *  MIME chunk size per RFC 2045 section 6.8.
037         *
038         * <p>
039         * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
040         * equal signs.
041         * </p>
042         *
043         * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
044         */
045        public static final int MIME_CHUNK_SIZE = 76;
046    
047        /**
048         * PEM chunk size per RFC 1421 section 4.3.2.4.
049         *
050         * <p>
051         * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
052         * equal signs.
053         * </p>
054         *
055         * @see <a href="http://tools.ietf.org/html/rfc1421">RFC 1421 section 4.3.2.4</a>
056         */
057        public static final int PEM_CHUNK_SIZE = 64;
058    
059        private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2;
060    
061        /**
062         * Defines the default buffer size - currently {@value}
063         * - must be large enough for at least one encoded block+separator
064         */
065        private static final int DEFAULT_BUFFER_SIZE = 8192;
066    
067        /** Mask used to extract 8 bits, used in decoding bytes */
068        protected static final int MASK_8BITS = 0xff;
069    
070        /**
071         * Byte used to pad output.
072         */
073        protected static final byte PAD_DEFAULT = '='; // Allow static access to default
074        
075        protected final byte PAD = PAD_DEFAULT; // instance variable just in case it needs to vary later
076    
077        /** Number of bytes in each full block of unencoded data, e.g. 4 for Base64 and 5 for Base32 */
078        private final int unencodedBlockSize;
079    
080        /** Number of bytes in each full block of encoded data, e.g. 3 for Base64 and 8 for Base32 */
081        private final int encodedBlockSize;
082    
083        /**
084         * Chunksize for encoding. Not used when decoding. 
085         * A value of zero or less implies no chunking of the encoded data.
086         * Rounded down to nearest multiple of encodedBlockSize.
087         */
088        protected final int lineLength;
089        
090        /**
091         * Size of chunk separator. Not used unless {@link #lineLength} > 0. 
092         */
093        private final int chunkSeparatorLength;
094    
095        /**
096         * Buffer for streaming.
097         */
098        protected byte[] buffer;
099    
100        /**
101         * Position where next character should be written in the buffer.
102         */
103        protected int pos;
104    
105        /**
106         * Position where next character should be read from the buffer.
107         */
108        private int readPos;
109    
110        /**
111         * Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this object becomes useless,
112         * and must be thrown away.
113         */
114        protected boolean eof;
115    
116        /**
117         * Variable tracks how many characters have been written to the current line. Only used when encoding. We use it to
118         * make sure each encoded line never goes beyond lineLength (if lineLength > 0).
119         */
120        protected int currentLinePos;
121    
122        /**
123         * Writes to the buffer only occur after every 3/5 reads when encoding, and every 4/8 reads when decoding.
124         * This variable helps track that.
125         */
126        protected int modulus;
127    
128        /**
129         * Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize}
130         * If <code>chunkSeparatorLength</code> is zero, then chunking is disabled.
131         * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3)
132         * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4)
133         * @param lineLength if &gt; 0, use chunking with a length <code>lineLength</code>
134         * @param chunkSeparatorLength the chunk separator length, if relevant
135         */
136        protected BaseNCodec(int unencodedBlockSize, int encodedBlockSize, int lineLength, int chunkSeparatorLength){
137            this.unencodedBlockSize = unencodedBlockSize;
138            this.encodedBlockSize = encodedBlockSize;
139            this.lineLength = (lineLength > 0  && chunkSeparatorLength > 0) ? (lineLength / encodedBlockSize) * encodedBlockSize : 0;
140            this.chunkSeparatorLength = chunkSeparatorLength;
141        }
142    
143        /**
144         * Returns true if this object has buffered data for reading.
145         *
146         * @return true if there is data still available for reading.
147         */
148        boolean hasData() {  // package protected for access from I/O streams
149            return this.buffer != null;
150        }
151    
152        /**
153         * Returns the amount of buffered data available for reading.
154         *
155         * @return The amount of buffered data available for reading.
156         */
157        int available() {  // package protected for access from I/O streams
158            return buffer != null ? pos - readPos : 0;
159        }
160    
161        /**
162         * Get the default buffer size. Can be overridden.
163         *
164         * @return {@link #DEFAULT_BUFFER_SIZE}
165         */
166        protected int getDefaultBufferSize() {
167            return DEFAULT_BUFFER_SIZE;
168        }
169    
170        /** Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}. */
171        private void resizeBuffer() {
172            if (buffer == null) {
173                buffer = new byte[getDefaultBufferSize()];
174                pos = 0;
175                readPos = 0;
176            } else {
177                byte[] b = new byte[buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR];
178                System.arraycopy(buffer, 0, b, 0, buffer.length);
179                buffer = b;
180            }
181        }
182    
183        /**
184         * Ensure that the buffer has room for <code>size</code> bytes
185         *
186         * @param size minimum spare space required
187         */
188        protected void ensureBufferSize(int size){
189            if ((buffer == null) || (buffer.length < pos + size)){
190                resizeBuffer();
191            }
192        }
193    
194        /**
195         * Extracts buffered data into the provided byte[] array, starting at position bPos, 
196         * up to a maximum of bAvail bytes. Returns how many bytes were actually extracted.
197         *
198         * @param b
199         *            byte[] array to extract the buffered data into.
200         * @param bPos
201         *            position in byte[] array to start extraction at.
202         * @param bAvail
203         *            amount of bytes we're allowed to extract. We may extract fewer (if fewer are available).
204         * @return The number of bytes successfully extracted into the provided byte[] array.
205         */
206        int readResults(byte[] b, int bPos, int bAvail) {  // package protected for access from I/O streams
207            if (buffer != null) {
208                int len = Math.min(available(), bAvail);
209                System.arraycopy(buffer, readPos, b, bPos, len);
210                readPos += len;
211                if (readPos >= pos) {
212                    buffer = null; // so hasData() will return false, and this method can return -1
213                }
214                return len;
215            }
216            return eof ? -1 : 0;
217        }
218    
219        /**
220         * Checks if a byte value is whitespace or not.
221         * Whitespace is taken to mean: space, tab, CR, LF
222         * @param byteToCheck
223         *            the byte to check
224         * @return true if byte is whitespace, false otherwise
225         */
226        protected static boolean isWhiteSpace(byte byteToCheck) {
227            switch (byteToCheck) {
228                case ' ' :
229                case '\n' :
230                case '\r' :
231                case '\t' :
232                    return true;
233                default :
234                    return false;
235            }
236        }
237    
238        /**
239         * Resets this object to its initial newly constructed state.
240         */
241        private void reset() {
242            buffer = null;
243            pos = 0;
244            readPos = 0;
245            currentLinePos = 0;
246            modulus = 0;
247            eof = false;
248        }
249    
250        /**
251         * Encodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of the
252         * Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[].
253         *
254         * @param pObject
255         *            Object to encode
256         * @return An object (of type byte[]) containing the Base-N encoded data which corresponds to the byte[] supplied.
257         * @throws EncoderException
258         *             if the parameter supplied is not of type byte[]
259         */
260        public Object encode(Object pObject) throws EncoderException {
261            if (!(pObject instanceof byte[])) {
262                throw new EncoderException("Parameter supplied to Base-N encode is not a byte[]");
263            }
264            return encode((byte[]) pObject);
265        }
266    
267        /**
268         * Encodes a byte[] containing binary data, into a String containing characters in the Base-N alphabet.
269         *
270         * @param pArray
271         *            a byte array containing binary data
272         * @return A String containing only Base-N character data
273         */
274        public String encodeToString(byte[] pArray) {
275            return StringUtils.newStringUtf8(encode(pArray));
276        }
277    
278        /**
279         * Decodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of the
280         * Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[] or String.
281         *
282         * @param pObject
283         *            Object to decode
284         * @return An object (of type byte[]) containing the binary data which corresponds to the byte[] or String supplied.
285         * @throws DecoderException
286         *             if the parameter supplied is not of type byte[]
287         */
288        public Object decode(Object pObject) throws DecoderException {        
289            if (pObject instanceof byte[]) {
290                return decode((byte[]) pObject);
291            } else if (pObject instanceof String) {
292                return decode((String) pObject);
293            } else {
294                throw new DecoderException("Parameter supplied to Base-N decode is not a byte[] or a String");
295            }
296        }
297    
298        /**
299         * Decodes a String containing characters in the Base-N alphabet.
300         *
301         * @param pArray
302         *            A String containing Base-N character data
303         * @return a byte array containing binary data
304         */
305        public byte[] decode(String pArray) {
306            return decode(StringUtils.getBytesUtf8(pArray));
307        }
308    
309        /**
310         * Decodes a byte[] containing characters in the Base-N alphabet.
311         * 
312         * @param pArray
313         *            A byte array containing Base-N character data
314         * @return a byte array containing binary data
315         */
316        public byte[] decode(byte[] pArray) {
317            reset();
318            if (pArray == null || pArray.length == 0) {
319                return pArray;
320            }
321            decode(pArray, 0, pArray.length);
322            decode(pArray, 0, -1); // Notify decoder of EOF.
323            byte[] result = new byte[pos];
324            readResults(result, 0, result.length);
325            return result;
326        }
327    
328        /**
329         * Encodes a byte[] containing binary data, into a byte[] containing characters in the alphabet.
330         *
331         * @param pArray
332         *            a byte array containing binary data
333         * @return A byte array containing only the basen alphabetic character data
334         */
335        public byte[] encode(byte[] pArray) {
336            reset();        
337            if (pArray == null || pArray.length == 0) {
338                return pArray;
339            }
340            encode(pArray, 0, pArray.length);
341            encode(pArray, 0, -1); // Notify encoder of EOF.
342            byte[] buf = new byte[pos - readPos];
343            readResults(buf, 0, buf.length);
344            return buf;
345        }
346        
347        /**
348         * Encodes a byte[] containing binary data, into a String containing characters in the appropriate alphabet.
349         * Uses UTF8 encoding.
350         *
351         * @param pArray a byte array containing binary data
352         * @return String containing only character data in the appropriate alphabet.
353        */
354        public String encodeAsString(byte[] pArray){
355            return StringUtils.newStringUtf8(encode(pArray));
356        }
357    
358        abstract void encode(byte[] pArray, int i, int length);  // package protected for access from I/O streams
359    
360        abstract void decode(byte[] pArray, int i, int length); // package protected for access from I/O streams
361        
362        /**
363         * Returns whether or not the <code>octet</code> is in the current alphabet.
364         * Does not allow whitespace or pad.
365         *
366         * @param value The value to test
367         *
368         * @return <code>true</code> if the value is defined in the current alphabet, <code>false</code> otherwise.
369         */
370        protected abstract boolean isInAlphabet(byte value);
371        
372        /**
373         * Tests a given byte array to see if it contains only valid characters within the alphabet.
374         * The method optionally treats whitespace and pad as valid.
375         *
376         * @param arrayOctet byte array to test
377         * @param allowWSPad if <code>true</code>, then whitespace and PAD are also allowed
378         *
379         * @return <code>true</code> if all bytes are valid characters in the alphabet or if the byte array is empty;
380         *         <code>false</code>, otherwise
381         */    
382        public boolean isInAlphabet(byte[] arrayOctet, boolean allowWSPad) {
383            for (int i = 0; i < arrayOctet.length; i++) {
384                if (!isInAlphabet(arrayOctet[i]) &&
385                        (!allowWSPad || (arrayOctet[i] != PAD) && !isWhiteSpace(arrayOctet[i]))) {
386                    return false;
387                }
388            }
389            return true;
390        }
391    
392        /**
393         * Tests a given String to see if it contains only valid characters within the alphabet. 
394         * The method treats whitespace and PAD as valid.
395         *
396         * @param basen String to test
397         * @return <code>true</code> if all characters in the String are valid characters in the alphabet or if
398         *         the String is empty; <code>false</code>, otherwise
399         * @see #isInAlphabet(byte[], boolean)
400         */
401        public boolean isInAlphabet(String basen) {
402            return isInAlphabet(StringUtils.getBytesUtf8(basen), true);
403        }
404    
405        /**
406         * Tests a given byte array to see if it contains any characters within the alphabet or PAD.
407         *
408         * Intended for use in checking line-ending arrays
409         *
410         * @param arrayOctet
411         *            byte array to test
412         * @return <code>true</code> if any byte is a valid character in the alphabet or PAD; <code>false</code> otherwise
413         */
414        protected boolean containsAlphabetOrPad(byte[] arrayOctet) {
415            if (arrayOctet == null) {
416                return false;
417            }
418            for (int i = 0; i < arrayOctet.length; i++) {
419                if (PAD == arrayOctet[i] || isInAlphabet(arrayOctet[i])) {
420                    return true;
421                }
422            }
423            return false;
424        }
425    
426        /**
427         * Calculates the amount of space needed to encode the supplied array.
428         *
429         * @param pArray byte[] array which will later be encoded
430         *
431         * @return amount of space needed to encoded the supplied array.  
432         * Returns a long since a max-len array will require > Integer.MAX_VALUE
433         */
434        public long getEncodedLength(byte[] pArray) {
435            // Calculate non-chunked size - rounded up to allow for padding
436            // cast to long is needed to avoid possibility of overflow
437            long len = ((pArray.length + unencodedBlockSize-1)  / unencodedBlockSize) * (long) encodedBlockSize;
438            if (lineLength > 0) { // We're using chunking
439                // Round up to nearest multiple
440                len += ((len + lineLength-1) / lineLength) * chunkSeparatorLength;
441            }
442            return len;
443        }
444    }