001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.tar;
020
021import java.io.ByteArrayOutputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.io.UncheckedIOException;
025import java.math.BigInteger;
026import java.nio.ByteBuffer;
027import java.nio.charset.Charset;
028import java.nio.charset.StandardCharsets;
029import java.util.ArrayList;
030import java.util.Collections;
031import java.util.HashMap;
032import java.util.List;
033import java.util.Map;
034
035import org.apache.commons.compress.archivers.zip.ZipEncoding;
036import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
037import org.apache.commons.compress.utils.CharsetNames;
038import org.apache.commons.compress.utils.IOUtils;
039
040/**
041 * This class provides static utility methods to work with byte streams.
042 *
043 * @Immutable
044 */
045// CheckStyle:HideUtilityClassConstructorCheck OFF (bc)
046public class TarUtils {
047
048    private static final int BYTE_MASK = 255;
049
050    static final ZipEncoding DEFAULT_ENCODING =
051        ZipEncodingHelper.getZipEncoding(null);
052
053    /**
054     * Encapsulates the algorithms used up to Commons Compress 1.3 as
055     * ZipEncoding.
056     */
057    static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() {
058            @Override
059            public boolean canEncode(final String name) { return true; }
060
061            @Override
062            public String decode(final byte[] buffer) {
063                final int length = buffer.length;
064                final StringBuilder result = new StringBuilder(length);
065
066                for (final byte b : buffer) {
067                    if (b == 0) { // Trailing null
068                        break;
069                    }
070                    result.append((char) (b & 0xFF)); // Allow for sign-extension
071                }
072
073                return result.toString();
074            }
075
076            @Override
077            public ByteBuffer encode(final String name) {
078                final int length = name.length();
079                final byte[] buf = new byte[length];
080
081                // copy until end of input or output is reached.
082                for (int i = 0; i < length; ++i) {
083                    buf[i] = (byte) name.charAt(i);
084                }
085                return ByteBuffer.wrap(buf);
086            }
087        };
088
089    /**
090     * Compute the checksum of a tar entry header.
091     *
092     * @param buf The tar entry's header buffer.
093     * @return The computed checksum.
094     */
095    public static long computeCheckSum(final byte[] buf) {
096        long sum = 0;
097
098        for (final byte element : buf) {
099            sum += BYTE_MASK & element;
100        }
101
102        return sum;
103    }
104
105    // Helper method to generate the exception message
106    private static String exceptionMessage(final byte[] buffer, final int offset,
107            final int length, final int current, final byte currentByte) {
108        // default charset is good enough for an exception message,
109        //
110        // the alternative was to modify parseOctal and
111        // parseOctalOrBinary to receive the ZipEncoding of the
112        // archive (deprecating the existing public methods, of
113        // course) and dealing with the fact that ZipEncoding#decode
114        // can throw an IOException which parseOctal* doesn't declare
115        String string = new String(buffer, offset, length, Charset.defaultCharset());
116
117        string = string.replace("\0", "{NUL}"); // Replace NULs to allow string to be printed
118        return "Invalid byte " + currentByte + " at offset " + (current - offset) + " in '" + string + "' len=" + length;
119    }
120
121    private static void formatBigIntegerBinary(final long value, final byte[] buf,
122                                               final int offset,
123                                               final int length,
124                                               final boolean negative) {
125        final BigInteger val = BigInteger.valueOf(value);
126        final byte[] b = val.toByteArray();
127        final int len = b.length;
128        if (len > length - 1) {
129            throw new IllegalArgumentException("Value " + value +
130                " is too large for " + length + " byte field.");
131        }
132        final int off = offset + length - len;
133        System.arraycopy(b, 0, buf, off, len);
134        final byte fill = (byte) (negative ? 0xff : 0);
135        for (int i = offset + 1; i < off; i++) {
136            buf[i] = fill;
137        }
138    }
139
140    /**
141     * Writes an octal value into a buffer.
142     *
143     * Uses {@link #formatUnsignedOctalString} to format
144     * the value as an octal string with leading zeros.
145     * The converted number is followed by NUL and then space.
146     *
147     * @param value The value to convert
148     * @param buf The destination buffer
149     * @param offset The starting offset into the buffer.
150     * @param length The size of the buffer.
151     * @return The updated value of offset, i.e. offset+length
152     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
153     */
154    public static int formatCheckSumOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
155
156        int idx=length-2; // for NUL and space
157        formatUnsignedOctalString(value, buf, offset, idx);
158
159        buf[offset + idx++]   = 0; // Trailing null
160        buf[offset + idx]     = (byte) ' '; // Trailing space
161
162        return offset + length;
163    }
164
165    private static void formatLongBinary(final long value, final byte[] buf,
166                                         final int offset, final int length,
167                                         final boolean negative) {
168        final int bits = (length - 1) * 8;
169        final long max = 1L << bits;
170        long val = Math.abs(value); // Long.MIN_VALUE stays Long.MIN_VALUE
171        if (val < 0 || val >= max) {
172            throw new IllegalArgumentException("Value " + value +
173                " is too large for " + length + " byte field.");
174        }
175        if (negative) {
176            val ^= max - 1;
177            val++;
178            val |= 0xffL << bits;
179        }
180        for (int i = offset + length - 1; i >= offset; i--) {
181            buf[i] = (byte) val;
182            val >>= 8;
183        }
184    }
185
186    /**
187     * Write an octal long integer into a buffer.
188     *
189     * Uses {@link #formatUnsignedOctalString} to format
190     * the value as an octal string with leading zeros.
191     * The converted number is followed by a space.
192     *
193     * @param value The value to write as octal
194     * @param buf The destinationbuffer.
195     * @param offset The starting offset into the buffer.
196     * @param length The length of the buffer
197     * @return The updated offset
198     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
199     */
200    public static int formatLongOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
201
202        final int idx=length-1; // For space
203
204        formatUnsignedOctalString(value, buf, offset, idx);
205        buf[offset + idx] = (byte) ' '; // Trailing space
206
207        return offset + length;
208    }
209
210    /**
211     * Write a long integer into a buffer as an octal string if this
212     * will fit, or as a binary number otherwise.
213     *
214     * Uses {@link #formatUnsignedOctalString} to format
215     * the value as an octal string with leading zeros.
216     * The converted number is followed by a space.
217     *
218     * @param value The value to write into the buffer.
219     * @param buf The destination buffer.
220     * @param offset The starting offset into the buffer.
221     * @param length The length of the buffer.
222     * @return The updated offset.
223     * @throws IllegalArgumentException if the value (and trailer)
224     * will not fit in the buffer.
225     * @since 1.4
226     */
227    public static int formatLongOctalOrBinaryBytes(
228        final long value, final byte[] buf, final int offset, final int length) {
229
230        // Check whether we are dealing with UID/GID or SIZE field
231        final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE;
232
233        final boolean negative = value < 0;
234        if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars
235            return formatLongOctalBytes(value, buf, offset, length);
236        }
237
238        if (length < 9) {
239            formatLongBinary(value, buf, offset, length, negative);
240        } else {
241            formatBigIntegerBinary(value, buf, offset, length, negative);
242        }
243
244        buf[offset] = (byte) (negative ? 0xff : 0x80);
245        return offset + length;
246    }
247
248    /**
249     * Copy a name into a buffer.
250     * Copies characters from the name into the buffer
251     * starting at the specified offset.
252     * If the buffer is longer than the name, the buffer
253     * is filled with trailing NULs.
254     * If the name is longer than the buffer,
255     * the output is truncated.
256     *
257     * @param name The header name from which to copy the characters.
258     * @param buf The buffer where the name is to be stored.
259     * @param offset The starting offset into the buffer
260     * @param length The maximum number of header bytes to copy.
261     * @return The updated offset, i.e. offset + length
262     */
263    public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length) {
264        try {
265            return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING);
266        } catch (final IOException ex) { // NOSONAR
267            try {
268                return formatNameBytes(name, buf, offset, length,
269                                       FALLBACK_ENCODING);
270            } catch (final IOException ex2) {
271                // impossible
272                throw new UncheckedIOException(ex2); //NOSONAR
273            }
274        }
275    }
276
277    /**
278     * Copy a name into a buffer.
279     * Copies characters from the name into the buffer
280     * starting at the specified offset.
281     * If the buffer is longer than the name, the buffer
282     * is filled with trailing NULs.
283     * If the name is longer than the buffer,
284     * the output is truncated.
285     *
286     * @param name The header name from which to copy the characters.
287     * @param buf The buffer where the name is to be stored.
288     * @param offset The starting offset into the buffer
289     * @param length The maximum number of header bytes to copy.
290     * @param encoding name of the encoding to use for file names
291     * @since 1.4
292     * @return The updated offset, i.e. offset + length
293     * @throws IOException on error
294     */
295    public static int formatNameBytes(final String name, final byte[] buf, final int offset,
296                                      final int length,
297                                      final ZipEncoding encoding)
298        throws IOException {
299        int len = name.length();
300        ByteBuffer b = encoding.encode(name);
301        while (b.limit() > length && len > 0) {
302            b = encoding.encode(name.substring(0, --len));
303        }
304        final int limit = b.limit() - b.position();
305        System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit);
306
307        // Pad any remaining output bytes with NUL
308        for (int i = limit; i < length; ++i) {
309            buf[offset + i] = 0;
310        }
311
312        return offset + length;
313    }
314
315    /**
316     * Write an octal integer into a buffer.
317     *
318     * Uses {@link #formatUnsignedOctalString} to format
319     * the value as an octal string with leading zeros.
320     * The converted number is followed by space and NUL
321     *
322     * @param value The value to write
323     * @param buf The buffer to receive the output
324     * @param offset The starting offset into the buffer
325     * @param length The size of the output buffer
326     * @return The updated offset, i.e. offset+length
327     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
328     */
329    public static int formatOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
330
331        int idx=length-2; // For space and trailing null
332        formatUnsignedOctalString(value, buf, offset, idx);
333
334        buf[offset + idx++] = (byte) ' '; // Trailing space
335        buf[offset + idx]   = 0; // Trailing null
336
337        return offset + length;
338    }
339
340    /**
341     * Fill buffer with unsigned octal number, padded with leading zeroes.
342     *
343     * @param value number to convert to octal - treated as unsigned
344     * @param buffer destination buffer
345     * @param offset starting offset in buffer
346     * @param length length of buffer to fill
347     * @throws IllegalArgumentException if the value will not fit in the buffer
348     */
349    public static void formatUnsignedOctalString(final long value, final byte[] buffer,
350            final int offset, final int length) {
351        int remaining = length;
352        remaining--;
353        if (value == 0) {
354            buffer[offset + remaining--] = (byte) '0';
355        } else {
356            long val = value;
357            for (; remaining >= 0 && val != 0; --remaining) {
358                // CheckStyle:MagicNumber OFF
359                buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7));
360                val = val >>> 3;
361                // CheckStyle:MagicNumber ON
362            }
363            if (val != 0){
364                throw new IllegalArgumentException
365                (value+"="+Long.toOctalString(value)+ " will not fit in octal number buffer of length "+length);
366            }
367        }
368
369        for (; remaining >= 0; --remaining) { // leading zeros
370            buffer[offset + remaining] = (byte) '0';
371        }
372    }
373
374    private static long parseBinaryBigInteger(final byte[] buffer,
375                                              final int offset,
376                                              final int length,
377                                              final boolean negative) {
378        final byte[] remainder = new byte[length - 1];
379        System.arraycopy(buffer, offset + 1, remainder, 0, length - 1);
380        BigInteger val = new BigInteger(remainder);
381        if (negative) {
382            // 2's complement
383            val = val.add(BigInteger.valueOf(-1)).not();
384        }
385        if (val.bitLength() > 63) {
386            throw new IllegalArgumentException("At offset " + offset + ", "
387                                               + length + " byte binary number"
388                                               + " exceeds maximum signed long"
389                                               + " value");
390        }
391        return negative ? -val.longValue() : val.longValue();
392    }
393
394    private static long parseBinaryLong(final byte[] buffer, final int offset,
395                                        final int length,
396                                        final boolean negative) {
397        if (length >= 9) {
398            throw new IllegalArgumentException("At offset " + offset + ", "
399                                               + length + " byte binary number"
400                                               + " exceeds maximum signed long"
401                                               + " value");
402        }
403        long val = 0;
404        for (int i = 1; i < length; i++) {
405            val = (val << 8) + (buffer[offset + i] & 0xff);
406        }
407        if (negative) {
408            // 2's complement
409            val--;
410            val ^= (long) Math.pow(2.0, (length - 1) * 8.0) - 1;
411        }
412        return negative ? -val : val;
413    }
414
415    /**
416     * Parse a boolean byte from a buffer.
417     * Leading spaces and NUL are ignored.
418     * The buffer may contain trailing spaces or NULs.
419     *
420     * @param buffer The buffer from which to parse.
421     * @param offset The offset into the buffer from which to parse.
422     * @return The boolean value of the bytes.
423     * @throws IllegalArgumentException if an invalid byte is detected.
424     */
425    public static boolean parseBoolean(final byte[] buffer, final int offset) {
426        return buffer[offset] == 1;
427    }
428
429    /**
430     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
431     * GNU.sparse.map
432     *    Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
433     *
434     * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
435     * @return unmodifiable list of sparse headers parsed from sparse map
436     * @throws IOException Corrupted TAR archive.
437     * @since 1.21
438     */
439    protected static List<TarArchiveStructSparse> parseFromPAX01SparseHeaders(final String sparseMap)
440        throws IOException {
441        final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
442        final String[] sparseHeaderStrings = sparseMap.split(",");
443        if (sparseHeaderStrings.length % 2 == 1) {
444            throw new IOException("Corrupted TAR archive. Bad format in GNU.sparse.map PAX Header");
445        }
446
447        for (int i = 0; i < sparseHeaderStrings.length; i += 2) {
448            long sparseOffset;
449            try {
450                sparseOffset = Long.parseLong(sparseHeaderStrings[i]);
451            } catch (final NumberFormatException ex) {
452                throw new IOException("Corrupted TAR archive."
453                    + " Sparse struct offset contains a non-numeric value");
454            }
455            if (sparseOffset < 0) {
456                throw new IOException("Corrupted TAR archive."
457                    + " Sparse struct offset contains negative value");
458            }
459            long sparseNumbytes;
460            try {
461                sparseNumbytes = Long.parseLong(sparseHeaderStrings[i + 1]);
462            } catch (final NumberFormatException ex) {
463                throw new IOException("Corrupted TAR archive."
464                    + " Sparse struct numbytes contains a non-numeric value");
465            }
466            if (sparseNumbytes < 0) {
467                throw new IOException("Corrupted TAR archive."
468                    + " Sparse struct numbytes contains negative value");
469            }
470            sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes));
471        }
472
473        return Collections.unmodifiableList(sparseHeaders);
474    }
475
476    /**
477     * Parse an entry name from a buffer.
478     * Parsing stops when a NUL is found
479     * or the buffer length is reached.
480     *
481     * @param buffer The buffer from which to parse.
482     * @param offset The offset into the buffer from which to parse.
483     * @param length The maximum number of bytes to parse.
484     * @return The entry name.
485     */
486    public static String parseName(final byte[] buffer, final int offset, final int length) {
487        try {
488            return parseName(buffer, offset, length, DEFAULT_ENCODING);
489        } catch (final IOException ex) { // NOSONAR
490            try {
491                return parseName(buffer, offset, length, FALLBACK_ENCODING);
492            } catch (final IOException ex2) {
493                // impossible
494                throw new UncheckedIOException(ex2); //NOSONAR
495            }
496        }
497    }
498
499    /**
500     * Parse an entry name from a buffer.
501     * Parsing stops when a NUL is found
502     * or the buffer length is reached.
503     *
504     * @param buffer The buffer from which to parse.
505     * @param offset The offset into the buffer from which to parse.
506     * @param length The maximum number of bytes to parse.
507     * @param encoding name of the encoding to use for file names
508     * @since 1.4
509     * @return The entry name.
510     * @throws IOException on error
511     */
512    public static String parseName(final byte[] buffer, final int offset,
513                                   final int length,
514                                   final ZipEncoding encoding)
515        throws IOException {
516
517        int len = 0;
518        for (int i = offset; len < length && buffer[i] != 0; i++) {
519            len++;
520        }
521        if (len > 0) {
522            final byte[] b = new byte[len];
523            System.arraycopy(buffer, offset, b, 0, len);
524            return encoding.decode(b);
525        }
526        return "";
527    }
528
529    /**
530     * Parse an octal string from a buffer.
531     *
532     * <p>Leading spaces are ignored.
533     * The buffer must contain a trailing space or NUL,
534     * and may contain an additional trailing space or NUL.</p>
535     *
536     * <p>The input buffer is allowed to contain all NULs,
537     * in which case the method returns 0L
538     * (this allows for missing fields).</p>
539     *
540     * <p>To work-around some tar implementations that insert a
541     * leading NUL this method returns 0 if it detects a leading NUL
542     * since Commons Compress 1.4.</p>
543     *
544     * @param buffer The buffer from which to parse.
545     * @param offset The offset into the buffer from which to parse.
546     * @param length The maximum number of bytes to parse - must be at least 2 bytes.
547     * @return The long value of the octal string.
548     * @throws IllegalArgumentException if the trailing space/NUL is missing or if an invalid byte is detected.
549     */
550    public static long parseOctal(final byte[] buffer, final int offset, final int length) {
551        long result = 0;
552        int end = offset + length;
553        int start = offset;
554
555        if (length < 2) {
556            throw new IllegalArgumentException("Length " + length + " must be at least 2");
557        }
558
559        if (buffer[start] == 0) {
560            return 0L;
561        }
562
563        // Skip leading spaces
564        while (start < end) {
565            if (buffer[start] != ' ') {
566                break;
567            }
568            start++;
569        }
570
571        // Trim all trailing NULs and spaces.
572        // The ustar and POSIX tar specs require a trailing NUL or
573        // space but some implementations use the extra digit for big
574        // sizes/uids/gids ...
575        byte trailer = buffer[end - 1];
576        while (start < end && (trailer == 0 || trailer == ' ')) {
577            end--;
578            trailer = buffer[end - 1];
579        }
580
581        for (; start < end; start++) {
582            final byte currentByte = buffer[start];
583            // CheckStyle:MagicNumber OFF
584            if (currentByte < '0' || currentByte > '7') {
585                throw new IllegalArgumentException(exceptionMessage(buffer, offset, length, start, currentByte));
586            }
587            result = (result << 3) + (currentByte - '0'); // convert from ASCII
588            // CheckStyle:MagicNumber ON
589        }
590
591        return result;
592    }
593
594    /**
595     * Compute the value contained in a byte buffer.  If the most
596     * significant bit of the first byte in the buffer is set, this
597     * bit is ignored and the rest of the buffer is interpreted as a
598     * binary number.  Otherwise, the buffer is interpreted as an
599     * octal number as per the parseOctal function above.
600     *
601     * @param buffer The buffer from which to parse.
602     * @param offset The offset into the buffer from which to parse.
603     * @param length The maximum number of bytes to parse.
604     * @return The long value of the octal or binary string.
605     * @throws IllegalArgumentException if the trailing space/NUL is
606     * missing or an invalid byte is detected in an octal number, or
607     * if a binary number would exceed the size of a signed long
608     * 64-bit integer.
609     * @since 1.4
610     */
611    public static long parseOctalOrBinary(final byte[] buffer, final int offset,
612                                          final int length) {
613
614        if ((buffer[offset] & 0x80) == 0) {
615            return parseOctal(buffer, offset, length);
616        }
617        final boolean negative = buffer[offset] == (byte) 0xff;
618        if (length < 9) {
619            return parseBinaryLong(buffer, offset, length, negative);
620        }
621        return parseBinaryBigInteger(buffer, offset, length, negative);
622    }
623
624    /**
625     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
626     * GNU.sparse.map
627     *    Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
628     *
629     * <p>Will internally invoke {@link #parseFromPAX01SparseHeaders} and map IOExceptions to a RzuntimeException, You
630     * should use {@link #parseFromPAX01SparseHeaders} directly instead.
631     *
632     * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
633     * @return sparse headers parsed from sparse map
634     * @deprecated use #parseFromPAX01SparseHeaders instead
635     */
636    @Deprecated
637    protected static List<TarArchiveStructSparse> parsePAX01SparseHeaders(final String sparseMap) {
638        try {
639            return parseFromPAX01SparseHeaders(sparseMap);
640        } catch (final IOException ex) {
641            throw new UncheckedIOException(ex.getMessage(), ex);
642        }
643    }
644
645    /**
646     * For PAX Format 1.X:
647     * The sparse map itself is stored in the file data block, preceding the actual file data.
648     * It consists of a series of decimal numbers delimited by newlines. The map is padded with nulls to the nearest block boundary.
649     * The first number gives the number of entries in the map. Following are map entries, each one consisting of two numbers
650     * giving the offset and size of the data block it describes.
651     * @param inputStream parsing source.
652     * @param recordSize The size the TAR header
653     * @return sparse headers
654     * @throws IOException if an I/O error occurs.
655     */
656    protected static List<TarArchiveStructSparse> parsePAX1XSparseHeaders(final InputStream inputStream, final int recordSize) throws IOException {
657        // for 1.X PAX Headers
658        final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
659        long bytesRead = 0;
660
661        long[] readResult = readLineOfNumberForPax1X(inputStream);
662        long sparseHeadersCount = readResult[0];
663        if (sparseHeadersCount < 0) {
664            // overflow while reading number?
665            throw new IOException("Corrupted TAR archive. Negative value in sparse headers block");
666        }
667        bytesRead += readResult[1];
668        while (sparseHeadersCount-- > 0) {
669            readResult = readLineOfNumberForPax1X(inputStream);
670            final long sparseOffset = readResult[0];
671            if (sparseOffset < 0) {
672                throw new IOException("Corrupted TAR archive."
673                    + " Sparse header block offset contains negative value");
674            }
675            bytesRead += readResult[1];
676
677            readResult = readLineOfNumberForPax1X(inputStream);
678            final long sparseNumbytes = readResult[0];
679            if (sparseNumbytes < 0) {
680                throw new IOException("Corrupted TAR archive."
681                    + " Sparse header block numbytes contains negative value");
682            }
683            bytesRead += readResult[1];
684            sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes));
685        }
686
687        // skip the rest of this record data
688        final long bytesToSkip = recordSize - bytesRead % recordSize;
689        IOUtils.skip(inputStream, bytesToSkip);
690        return sparseHeaders;
691    }
692
693    /**
694     * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes)
695     * may appear multi times, and they look like:
696     *
697     * GNU.sparse.size=size
698     * GNU.sparse.numblocks=numblocks
699     * repeat numblocks times
700     *   GNU.sparse.offset=offset
701     *   GNU.sparse.numbytes=numbytes
702     * end repeat
703     *
704     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
705     *
706     * GNU.sparse.map
707     *    Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
708     *
709     * @param inputStream input stream to read keys and values
710     * @param sparseHeaders used in PAX Format 0.0 &amp; 0.1, as it may appear multiple times,
711     *                      the sparse headers need to be stored in an array, not a map
712     * @param globalPaxHeaders global PAX headers of the tar archive
713     * @return map of PAX headers values found inside the current (local or global) PAX headers tar entry.
714     * @throws IOException if an I/O error occurs.
715     * @deprecated use the four-arg version instead
716     */
717    @Deprecated
718    protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, final List<TarArchiveStructSparse> sparseHeaders, final Map<String, String> globalPaxHeaders)
719            throws IOException {
720        return parsePaxHeaders(inputStream, sparseHeaders, globalPaxHeaders, -1);
721    }
722
723    /**
724     * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes)
725     * may appear multi times, and they look like:
726     *
727     * GNU.sparse.size=size
728     * GNU.sparse.numblocks=numblocks
729     * repeat numblocks times
730     *   GNU.sparse.offset=offset
731     *   GNU.sparse.numbytes=numbytes
732     * end repeat
733     *
734     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
735     *
736     * GNU.sparse.map
737     *    Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
738     *
739     * @param inputStream input stream to read keys and values
740     * @param sparseHeaders used in PAX Format 0.0 &amp; 0.1, as it may appear multiple times,
741     *                      the sparse headers need to be stored in an array, not a map
742     * @param globalPaxHeaders global PAX headers of the tar archive
743     * @param headerSize total size of the PAX header, will be ignored if negative
744     * @return map of PAX headers values found inside the current (local or global) PAX headers tar entry.
745     * @throws IOException if an I/O error occurs.
746     * @since 1.21
747     */
748    protected static Map<String, String> parsePaxHeaders(final InputStream inputStream,
749            final List<TarArchiveStructSparse> sparseHeaders, final Map<String, String> globalPaxHeaders,
750            final long headerSize) throws IOException {
751        final Map<String, String> headers = new HashMap<>(globalPaxHeaders);
752        Long offset = null;
753        // Format is "length keyword=value\n";
754        int totalRead = 0;
755        while(true) { // get length
756            int ch;
757            int len = 0;
758            int read = 0;
759            while((ch = inputStream.read()) != -1) {
760                read++;
761                totalRead++;
762                if (ch == '\n') { // blank line in header
763                    break;
764                }
765                if (ch == ' '){ // End of length string
766                    // Get keyword
767                    final ByteArrayOutputStream coll = new ByteArrayOutputStream();
768                    while((ch = inputStream.read()) != -1) {
769                        read++;
770                        totalRead++;
771                        if (totalRead < 0 || (headerSize >= 0 && totalRead >= headerSize)) {
772                            break;
773                        }
774                        if (ch == '='){ // end of keyword
775                            final String keyword = coll.toString(CharsetNames.UTF_8);
776                            // Get rest of entry
777                            final int restLen = len - read;
778                            if (restLen <= 1) { // only NL
779                                headers.remove(keyword);
780                            } else if (headerSize >= 0 && restLen > headerSize - totalRead) {
781                                throw new IOException("Paxheader value size " + restLen
782                                    + " exceeds size of header record");
783                            } else {
784                                final byte[] rest = IOUtils.readRange(inputStream, restLen);
785                                final int got = rest.length;
786                                if (got != restLen) {
787                                    throw new IOException("Failed to read "
788                                            + "Paxheader. Expected "
789                                            + restLen
790                                            + " bytes, read "
791                                            + got);
792                                }
793                                totalRead += restLen;
794                                // Drop trailing NL
795                                if (rest[restLen - 1] != '\n') {
796                                    throw new IOException("Failed to read Paxheader."
797                                       + "Value should end with a newline");
798                                }
799                                final String value = new String(rest, 0, restLen - 1, StandardCharsets.UTF_8);
800                                headers.put(keyword, value);
801
802                                // for 0.0 PAX Headers
803                                if (keyword.equals(TarGnuSparseKeys.OFFSET)) {
804                                    if (offset != null) {
805                                        // previous GNU.sparse.offset header but no numBytes
806                                        sparseHeaders.add(new TarArchiveStructSparse(offset, 0));
807                                    }
808                                    try {
809                                        offset = Long.valueOf(value);
810                                    } catch (final NumberFormatException ex) {
811                                        throw new IOException("Failed to read Paxheader."
812                                            + TarGnuSparseKeys.OFFSET + " contains a non-numeric value");
813                                    }
814                                    if (offset < 0) {
815                                        throw new IOException("Failed to read Paxheader."
816                                            + TarGnuSparseKeys.OFFSET + " contains negative value");
817                                    }
818                                }
819
820                                // for 0.0 PAX Headers
821                                if (keyword.equals(TarGnuSparseKeys.NUMBYTES)) {
822                                    if (offset == null) {
823                                        throw new IOException("Failed to read Paxheader."
824                                                + TarGnuSparseKeys.OFFSET + " is expected before GNU.sparse.numbytes shows up.");
825                                    }
826                                    long numbytes;
827                                    try {
828                                        numbytes = Long.parseLong(value);
829                                    } catch (final NumberFormatException ex) {
830                                        throw new IOException("Failed to read Paxheader."
831                                            + TarGnuSparseKeys.NUMBYTES + " contains a non-numeric value.");
832                                    }
833                                    if (numbytes < 0) {
834                                        throw new IOException("Failed to read Paxheader."
835                                            + TarGnuSparseKeys.NUMBYTES + " contains negative value");
836                                    }
837                                    sparseHeaders.add(new TarArchiveStructSparse(offset, numbytes));
838                                    offset = null;
839                                }
840                            }
841                            break;
842                        }
843                        coll.write((byte) ch);
844                    }
845                    break; // Processed single header
846                }
847
848                // COMPRESS-530 : throw if we encounter a non-number while reading length
849                if (ch < '0' || ch > '9') {
850                    throw new IOException("Failed to read Paxheader. Encountered a non-number while reading length");
851                }
852
853                len *= 10;
854                len += ch - '0';
855            }
856            if (ch == -1){ // EOF
857                break;
858            }
859        }
860        if (offset != null) {
861            // offset but no numBytes
862            sparseHeaders.add(new TarArchiveStructSparse(offset, 0));
863        }
864        return headers;
865    }
866
867    /**
868     * Parses the content of a PAX 1.0 sparse block.
869     * @since 1.20
870     * @param buffer The buffer from which to parse.
871     * @param offset The offset into the buffer from which to parse.
872     * @return a parsed sparse struct
873     */
874    public static TarArchiveStructSparse parseSparse(final byte[] buffer, final int offset) {
875        final long sparseOffset = parseOctalOrBinary(buffer, offset, TarConstants.SPARSE_OFFSET_LEN);
876        final long sparseNumbytes = parseOctalOrBinary(buffer, offset + TarConstants.SPARSE_OFFSET_LEN, TarConstants.SPARSE_NUMBYTES_LEN);
877
878        return new TarArchiveStructSparse(sparseOffset, sparseNumbytes);
879    }
880
881    /**
882     * For 1.X PAX Format, the sparse headers are stored in the file data block, preceding the actual file data.
883     * It consists of a series of decimal numbers delimited by newlines.
884     *
885     * @param inputStream the input stream of the tar file
886     * @return the decimal number delimited by '\n', and the bytes read from input stream
887     * @throws IOException
888     */
889    private static long[] readLineOfNumberForPax1X(final InputStream inputStream) throws IOException {
890        int number;
891        long result = 0;
892        long bytesRead = 0;
893
894        while ((number = inputStream.read()) != '\n') {
895            bytesRead += 1;
896            if (number == -1) {
897                throw new IOException("Unexpected EOF when reading parse information of 1.X PAX format");
898            }
899            if (number < '0' || number > '9') {
900                throw new IOException("Corrupted TAR archive. Non-numeric value in sparse headers block");
901            }
902            result = result * 10 + (number - '0');
903        }
904        bytesRead += 1;
905
906        return new long[]{result, bytesRead};
907    }
908
909    /**
910     * @since 1.21
911     */
912    static List<TarArchiveStructSparse> readSparseStructs(final byte[] buffer, final int offset, final int entries)
913        throws IOException {
914        final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
915        for (int i = 0; i < entries; i++) {
916            try {
917                final TarArchiveStructSparse sparseHeader =
918                    parseSparse(buffer, offset + i * (TarConstants.SPARSE_OFFSET_LEN + TarConstants.SPARSE_NUMBYTES_LEN));
919
920                if (sparseHeader.getOffset() < 0) {
921                    throw new IOException("Corrupted TAR archive, sparse entry with negative offset");
922                }
923                if (sparseHeader.getNumbytes() < 0) {
924                    throw new IOException("Corrupted TAR archive, sparse entry with negative numbytes");
925                }
926                sparseHeaders.add(sparseHeader);
927            } catch (final IllegalArgumentException ex) {
928                // thrown internally by parseOctalOrBinary
929                throw new IOException("Corrupted TAR archive, sparse entry is invalid", ex);
930            }
931        }
932        return Collections.unmodifiableList(sparseHeaders);
933    }
934
935    /**
936     * Wikipedia <a href="https://en.wikipedia.org/wiki/Tar_(computing)#File_header">says</a>:
937     * <blockquote>
938     * The checksum is calculated by taking the sum of the unsigned byte values
939     * of the header block with the eight checksum bytes taken to be ascii
940     * spaces (decimal value 32). It is stored as a six digit octal number with
941     * leading zeroes followed by a NUL and then a space. Various
942     * implementations do not adhere to this format. For better compatibility,
943     * ignore leading and trailing whitespace, and get the first six digits. In
944     * addition, some historic tar implementations treated bytes as signed.
945     * Implementations typically calculate the checksum both ways, and treat it
946     * as good if either the signed or unsigned sum matches the included
947     * checksum.
948     * </blockquote>
949     * <p>
950     * The return value of this method should be treated as a best-effort
951     * heuristic rather than an absolute and final truth. The checksum
952     * verification logic may well evolve over time as more special cases
953     * are encountered.
954     *
955     * @param header tar header
956     * @return whether the checksum is reasonably good
957     * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a>
958     * @since 1.5
959     */
960    public static boolean verifyCheckSum(final byte[] header) {
961        final long storedSum = parseOctal(header, TarConstants.CHKSUM_OFFSET, TarConstants.CHKSUMLEN);
962        long unsignedSum = 0;
963        long signedSum = 0;
964
965        for (int i = 0; i < header.length; i++) {
966            byte b = header[i];
967            if (TarConstants.CHKSUM_OFFSET <= i && i < TarConstants.CHKSUM_OFFSET + TarConstants.CHKSUMLEN) {
968                b = ' ';
969            }
970            unsignedSum += 0xff & b;
971            signedSum += b;
972        }
973        return storedSum == unsignedSum || storedSum == signedSum;
974    }
975
976    /** Private constructor to prevent instantiation of this utility class. */
977    private TarUtils(){
978    }
979
980}